void run_SpMV(RBFFD& der, Grid& grid) { unsigned int N = grid.getNodeListSize(); char test_name[256]; char assemble_timer_name[256]; char multiply_timer_name[256]; sprintf(test_name, "%u SpMV (%s -> %s)", N, matTypeStrings[matType], matTypeStrings[multType]); sprintf(assemble_timer_name, "%u %s Assemble", N, matTypeStrings[matType]); sprintf(multiply_timer_name, "%u %s Multiply", N, matTypeStrings[matType]); if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); } if (!timers.contains(multiply_timer_name)) { timers[multiply_timer_name] = new EB::Timer(multiply_timer_name); } std::cout << test_name << std::endl; // Assemble the matrix // ---------------------- timers[assemble_timer_name]->start(); MatType* A = new MatType(N); assemble_LHS(der, grid, *A); timers[assemble_timer_name]->stop(); timers[multiply_timer_name]->start(); benchmarkMultiplyHost<MatType>(*A); timers[multiply_timer_name]->stop(); // Cleanup delete(A); }
void gpuTest(RBFFD& der, Grid& grid, int primeGPU=0) { unsigned int N = grid.getStencilsSize(); unsigned int n = grid.getMaxStencilSize(); unsigned int nb_bnd = grid.getBoundaryIndicesSize(); unsigned int n_unknowns = 4 * N; // We subtract off the unknowns for the boundary unsigned int nrows = 4 * N - 4*nb_bnd; unsigned int ncols = 4 * N - 4*nb_bnd; unsigned int NNZ = 9*n*N+2*(4*N)+2*(3*N); char test_name[256]; char assemble_timer_name[256]; char copy_timer_name[512]; char test_timer_name[256]; if (primeGPU) { sprintf(test_name, "%u PRIMING THE GPU", N); sprintf(assemble_timer_name, "%u Primer Assemble", N); sprintf(copy_timer_name, "%u Primer Copy To VCL_CSR", N); sprintf(test_timer_name, "%u Primer GMRES test", N); } else { sprintf(test_name, "%u GMRES GPU (VCL_CSR)", N); sprintf(assemble_timer_name, "%u UBLAS_CSR Assemble", N); sprintf(copy_timer_name, "%u UBLAS_CSR Copy To VCL_CSR", N); sprintf(test_timer_name, "%u GPU GMRES test", N); } if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); } if (!timers.contains(copy_timer_name)) { timers[copy_timer_name] = new EB::Timer(copy_timer_name); } if (!timers.contains(test_timer_name)) { timers[test_timer_name] = new EB::Timer(test_timer_name); } std::cout << test_name << std::endl; // ----- ASSEMBLE ----- timers[assemble_timer_name]->start(); // Compress system to remove boundary rows UBLAS_MAT_t* A = new UBLAS_MAT_t(nrows, ncols, NNZ); UBLAS_VEC_t* F = new UBLAS_VEC_t(nrows, 0); UBLAS_VEC_t* U_exact = new UBLAS_VEC_t(n_unknowns, 0); UBLAS_VEC_t* U_exact_compressed = new UBLAS_VEC_t(nrows, 0); assemble_System_Stokes(der, grid, *A, *F, *U_exact, *U_exact_compressed); timers[assemble_timer_name]->stop(); write_System(*A, *F, *U_exact); write_to_file(*U_exact_compressed, "output/U_exact_compressed.mtx"); UBLAS_VEC_t F_discrete = prod(*A, *U_exact_compressed); write_to_file(F_discrete, "output/F_discrete.mtx"); // ----- SOLVE ----- timers[copy_timer_name]->start(); VCL_MAT_t* A_gpu = new VCL_MAT_t(A->size1(), A->size2()); copy(*A, *A_gpu); VCL_VEC_t* F_gpu = new VCL_VEC_t(F->size()); VCL_VEC_t* U_exact_gpu = new VCL_VEC_t(U_exact_compressed->size()); VCL_VEC_t* U_approx_gpu = new VCL_VEC_t(F->size()); viennacl::copy(F->begin(), F->end(), F_gpu->begin()); viennacl::copy(U_exact_compressed->begin(), U_exact_compressed->end(), U_exact_gpu->begin()); timers[copy_timer_name]->stop(); timers[test_timer_name]->start(); // Use GMRES to solve A*u = F GMRES_Device(*A_gpu, *F_gpu, *U_exact_gpu, *U_approx_gpu, N, nb_bnd); timers[test_timer_name]->stop(); write_Solution(grid, *U_exact_compressed, *U_approx_gpu); // Cleanup delete(A); delete(A_gpu); delete(F); delete(U_exact); delete(U_exact_compressed); delete(F_gpu); delete(U_exact_gpu); delete(U_approx_gpu); }