#include #include #include #include "MersenneTwister.h" #include "book.h" #include #include #include #include #define num 1000000 using namespace std; __global__ void ResidCalc( double *x, double *y, double *resid, double *hat, double a, double b, double c, double b0, double b1) { int tid = threadIdx.x + blockIdx.x * blockDim.x; // this thread handles the data at its thread id if (tid < num) resid[tid] = y[tid] - b0-b1*x[tid]; hat[tid]=a+2*b*x[tid]+c*x[tid]*x[tid]; } __global__ void tResid(double *resid, double *hat, double *deletedResid, double SumSquareError){ int tid = threadIdx.x + blockIdx.x * blockDim.x; // this thread handles the data at its thread id if (tid < num) deletedResid[tid] = resid[tid]/sqrt((SumSquareError*(1-hat[tid])-resid[tid]*resid[tid])/(num-3)); } int main(){ double *dev_x, *dev_y, *dev_resid, *dev_hat, *dev_deletedResid; cudaMalloc( (void**)&dev_x, num * sizeof(double) ); cudaMalloc( (void**)&dev_y, num * sizeof(double) ); cudaMalloc( (void**)&dev_resid, num * sizeof(double) ); cudaMalloc( (void**)&dev_hat, num * sizeof(double) ); cudaMalloc( (void**)&dev_deletedResid, num * sizeof(double) ); /*Open the file generated by GPUClass_Generator.cpp */ ifstream in_file("GPUnumbers.txt", ios::binary); /*Make sure it worked. */ if(!in_file.is_open()){ cout << "File not opened..." << endl; return 1; } /*Declare the number of observed x and y values, initialize things */ double *x,*y,*hat, *resid,invdet, sumx=0,sumy=0, sumSquare=0, sumCross=0, a, b, c, b0, b1; x = (double*) malloc((num+1)*sizeof(double)); y = (double*) malloc((num+1)*sizeof(double)); hat = (double*) malloc((num+1)*sizeof(double)); resid = (double*) malloc((num+1)*sizeof(double)); /*Load in the data */ for(int i=0; i> x[i]; in_file >> y[i]; } /*Calculate the sum of the x's, y's, x^2's, and x*y's which will be used repeatedly */ clock_t start=clock(); for(int i=0; i>>( dev_x, dev_y, dev_resid, dev_hat, a, b, c, b0, b1); cudaMemcpy( resid, dev_resid, num * sizeof(double), cudaMemcpyDeviceToHost ); cudaMemcpy( hat, dev_hat, num * sizeof(double), cudaMemcpyDeviceToHost ); /*Here's where we should split everything up, this gets the same terms as above, but with the i-th observation deleted */ double SumSquareError=0; for(int i=0; i>>( dev_resid, dev_hat, dev_deletedResid, SumSquareError); cudaMemcpy( deletedResid, dev_deletedResid, num * sizeof(double), cudaMemcpyDeviceToHost ); for(int i=0; i