bool GPUMatrixTest::performTest() { cout << "Creating 2x2 matrix" << endl; Matrix a(2, 2, generators::zero); a(0,1) = 2; a(1,0) = 4; cout << a; cout << "Creating 2x3 matrix" << endl; Matrix b(2, 3, generators::zero); b(0,0) = 3; b(1,0) = 5; b(0,1) = 7; b(1,2) = 6; cout << b; // Matrix * Matrix on CPU //const double correctResultMult[] = {10, 0, 12, 12, 28, 0}; Matrix c(2, 3); cout << "---" << endl << "Product on CPU:" << endl; a.multWithMatrix(b, &c); cout << c; // Matrix * Matrix on GPU GPUMatrix a_d(a); Matrix agpu(2,2, generators::zero); a_d.getMatrix(&agpu); cout << agpu << endl; //return true; GPUMatrix b_d(b); GPUMatrix c_d(c); a_d.multWithMatrix(b_d, &c_d); Matrix cgpu(2, 3, generators::zero); c_d.getMatrix(&cgpu); cout << "Product on GPU:" << endl << cgpu; srand(1); Matrix d(9, 10, generators::random); cout << "D = " << endl << d; GPUMatrix d_d(d); // Row sum GPUMatrix rsums_d(d.rows(), 1); // "col vector" d_d.rowSums(&rsums_d); Matrix rsums(d.rows(), 1); rsums_d.getMatrix(&rsums); cout << "Row sums of D: " << endl; cout << rsums << endl; for (unsigned int i = 0; i < d.rows(); ++i) { //cout << sums(i, 0) << " "; if (!epsilonCheck(rsums(i, 0), d.rowSum(i), 1e-6)) return false; } // Col sum GPUMatrix csums_d(1, d.cols()); // "row vector" d_d.colSums(&csums_d); Matrix csums(1, d.cols()); csums_d.getMatrix(&csums); cout << "Column sums of D: " << endl; cout << csums << endl; for (unsigned int i = 0; i < d.cols(); ++i) { //cout << sums(i, 0) << " "; if (!epsilonCheck(csums(0, i), d.colSum(i), 1e-6)) return false; } // Matrix + Matrix on GPU Matrix e(9, 10, generators::random); cout << "E = " << endl << e; GPUMatrix e_d(e); GPUMatrix f_d(9, 10); Matrix fgpu(9, 10, generators::zero); d_d.add(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D+E: " << endl << fgpu << endl; d_d.sub(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D-E: " << endl << fgpu << endl; d_d.elementWiseMult(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D.*E: " << endl << fgpu << endl; d_d.elementWiseDiv(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D./E: " << endl << fgpu << endl; d_d.elementWisePow(2.5, &f_d); f_d.getMatrix(&fgpu); cout << "Result D.^2.5: " << endl << fgpu << endl; // Scaling const double alpha = .5f; f_d.scale(alpha, 2, 4); f_d.getMatrix(&fgpu); cout << "Scale columns 2 to 4 by " << alpha << ":" << endl << fgpu << endl; // Zero of submatrix f_d.zero(2, 3, 7, 8); f_d.getMatrix(&fgpu); cout << "Set [2,3]->[7,8] to zero:" << endl << fgpu << endl; // Zero whole matrix f_d.zero(); f_d.getMatrix(&fgpu); cout << "Zero matrix:" << endl << fgpu << endl; // // Large matrix multiplication and verification against CPU gold standard // cout << "Matrix multiplication on CPU ... " << endl; int m = 999; int k = 199; int n = 1; Matrix left(m, k, generators::random); Matrix right(k, n, generators::unity); Matrix resultCPU(m, n); left.multWithMatrix(right, &resultCPU); cout << "Matrix multiplication on GPU ... " << endl; GPUMatrix leftGPU(left); GPUMatrix rightGPU(right); GPUMatrix resultGPU(resultCPU.rows(), resultCPU.cols()); Matrix resultGPUtransfer(resultCPU.rows(), resultCPU.cols()); leftGPU.multWithMatrix(rightGPU, &resultGPU); resultGPU.getMatrix(&resultGPUtransfer); //cout << resultGPUtransfer << endl; int nwarn = 0; for (unsigned int i = 0; i < resultCPU.rows(); ++i) { for (unsigned int j = 0; j < resultCPU.cols(); ++j) { if (abs(resultCPU(i, j) - resultGPUtransfer(i, j)) > 1e-3) { cout << "WARN " << i << " " << j << ": CPU = " << resultCPU(i, j) << "; GPU = " << resultGPUtransfer(i, j) << endl; nwarn++; if (nwarn > 50) return false; } } } return true; }
void double_tests (void) { av_alist a; double dr; dr = d_d(d1); fprintf(out,"->%g\n",dr); fflush(out); dr = 0.0; clear_traces(); av_start_double(a,d_d,&dr); av_double(a,d1); av_call(a); fprintf(out,"->%g\n",dr); fflush(out); dr = d_d2(d1,d2); fprintf(out,"->%g\n",dr); fflush(out); dr = 0.0; clear_traces(); av_start_double(a,d_d2,&dr); av_double(a,d1); av_double(a,d2); av_call(a); fprintf(out,"->%g\n",dr); fflush(out); dr = d_d4(d1,d2,d3,d4); fprintf(out,"->%g\n",dr); fflush(out); dr = 0.0; clear_traces(); av_start_double(a,d_d4,&dr); av_double(a,d1); av_double(a,d2); av_double(a,d3); av_double(a,d4); av_call(a); fprintf(out,"->%g\n",dr); fflush(out); dr = d_d8(d1,d2,d3,d4,d5,d6,d7,d8); fprintf(out,"->%g\n",dr); fflush(out); dr = 0.0; clear_traces(); av_start_double(a,d_d8,&dr); av_double(a,d1); av_double(a,d2); av_double(a,d3); av_double(a,d4); av_double(a,d5); av_double(a,d6); av_double(a,d7); av_double(a,d8); av_call(a); fprintf(out,"->%g\n",dr); fflush(out); dr = d_d16(d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16); fprintf(out,"->%g\n",dr); fflush(out); dr = 0.0; clear_traces(); av_start_double(a,d_d16,&dr); av_double(a,d1); av_double(a,d2); av_double(a,d3); av_double(a,d4); av_double(a,d5); av_double(a,d6); av_double(a,d7); av_double(a,d8); av_double(a,d9); av_double(a,d10); av_double(a,d11); av_double(a,d12); av_double(a,d13); av_double(a,d14); av_double(a,d15); av_double(a,d16); av_call(a); fprintf(out,"->%g\n",dr); fflush(out); return; }