// Multiply two matrices A * B = C #include #include #include #include #include "MersenneTwister.h" #include "book.h" #define TILE 20 // tile size #define NT 200 // number of tiles #define MS TILE*NT //matrix size // row-major storing #define IDX2(i,j) (((i)*(MS))+(j)) // Matrices are stored in row-major order typedef struct { int width; int height; double* elements; } Matrix; __global__ void matrixMul(Matrix A, Matrix B, Matrix C){ int tidx=threadIdx.x; int tidy=threadIdx.y; int bidx=blockIdx.x; int bidy=blockIdx.y; int row = bidy*TILE+tidy; int col = bidx*TILE+tidx; int i; double S=0.0; for(i=0;i>>(dev_A,dev_B,dev_C); HANDLE_ERROR(cudaMemcpy(C.elements, dev_C.elements, C.width*C.height*sizeof(double), cudaMemcpyDeviceToHost)); printf("Time to compute on GPU: %f seconds.\n ", ((double)clock() - start)/CLOCKS_PER_SEC ); /* for(i=0;i