void run_SpMV(RBFFD& der, Grid& grid) { unsigned int N = grid.getNodeListSize(); char test_name[256]; char assemble_timer_name[256]; char multiply_timer_name[256]; sprintf(test_name, "%u SpMV (%s -> %s)", N, matTypeStrings[matType], matTypeStrings[multType]); sprintf(assemble_timer_name, "%u %s Assemble", N, matTypeStrings[matType]); sprintf(multiply_timer_name, "%u %s Multiply", N, matTypeStrings[matType]); if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); } if (!timers.contains(multiply_timer_name)) { timers[multiply_timer_name] = new EB::Timer(multiply_timer_name); } std::cout << test_name << std::endl; // Assemble the matrix // ---------------------- timers[assemble_timer_name]->start(); MatType* A = new MatType(N); assemble_LHS(der, grid, *A); timers[assemble_timer_name]->stop(); timers[multiply_timer_name]->start(); benchmarkMultiplyHost<MatType>(*A); timers[multiply_timer_name]->stop(); // Cleanup delete(A); }
int main(void) { bool writeIntermediate = true; bool primed = false; std::vector<std::string> grids; // grids.push_back("~/GRIDS/md/md005.00036"); //grids.push_back("~/GRIDS/md/md165.27556"); //grids.push_back("~/GRIDS/md/md031.01024"); grids.push_back("~/GRIDS/md/md089.08100"); #if 0 grids.push_back("~/GRIDS/md/md031.01024"); grids.push_back("~/GRIDS/md/md050.02601"); grids.push_back("~/GRIDS/md/md063.04096"); grids.push_back("~/GRIDS/md/md089.08100"); grids.push_back("~/GRIDS/md/md127.16384"); grids.push_back("~/GRIDS/md/md165.27556"); #endif #if 0 grids.push_back("~/GRIDS/geoff/scvtmesh_100k_nodes.ascii"); grids.push_back("~/GRIDS/geoff/scvtmesh_500k_nodes.ascii"); grids.push_back("~/GRIDS/geoff/scvtmesh_1m_nodes.ascii"); #endif //grids.push_back("~/GRIDS/geoff/scvtmesh_1m_nodes.ascii"); for (size_t i = 0; i < grids.size(); i++) { std::string& grid_name = grids[i]; std::string weight_timer_name = grid_name + " Calc Weights"; timers[weight_timer_name] = new EB::Timer(weight_timer_name.c_str()); // Get contours from rbfzone.blogspot.com to choose eps_c1 and eps_c2 based on stencil_size (n) #if 0 // Too ill-conditioned? Doesnt converge in GMRES + ILU0 unsigned int stencil_size = 40; double eps_c1 = 0.027; double eps_c2 = 0.274; #else unsigned int stencil_size = 31; double eps_c1 = 0.035; double eps_c2 = 0.1; #endif GridReader* grid = new GridReader(grid_name, 4); grid->setMaxStencilSize(stencil_size); // We do not read until generate is called: Grid::GridLoadErrType err = grid->loadFromFile(); if (err == Grid::NO_GRID_FILES) { grid->generate(); #if 1 // NOTE: We force at least one node in the domain to be a boundary. //----------------------------- // We will set the first node as a boundary/ground point. We know // the normal because we're on teh sphere centered at (0,0,0) for (unsigned int nodeIndex = 0; nodeIndex < 1; nodeIndex++) { NodeType& node = grid->getNode(nodeIndex); Vec3 nodeNormal = node - Vec3(0,0,0); grid->appendBoundaryIndex(nodeIndex, nodeNormal); } #endif //----------------------------- if (writeIntermediate) { grid->writeToFile(); } } std::cout << "Generate Stencils\n"; Grid::GridLoadErrType st_err = grid->loadStencilsFromFile(); if (st_err == Grid::NO_STENCIL_FILES) { // grid->generateStencils(Grid::ST_BRUTE_FORCE); #if 1 grid->generateStencils(Grid::ST_KDTREE); #else grid->setNSHashDims(50, 50,50); grid->generateStencils(Grid::ST_HASH); #endif if (writeIntermediate) { grid->writeToFile(); } } std::cout << "Generate RBFFD Weights\n"; timers[weight_timer_name]->start(); RBFFD der(RBFFD::LSFC | RBFFD::XSFC | RBFFD::YSFC | RBFFD::ZSFC, grid, 3, 0); //TODO: der.setWeightType(RBFFD::ContourSVD); der.setEpsilonByParameters(eps_c1, eps_c2); int der_err = der.loadAllWeightsFromFile(); if (der_err) { der.computeAllWeightsForAllStencils(); timers[weight_timer_name]->stop(); #if 0 if (writeIntermediate) { der.writeAllWeightsToFile(); } #endif } if (!primed) { std::cout << "\n\n"; cout << "Priming GPU with dummy operations (removes compile from benchmarks)\n"; gpuTest(der,*grid, 1); primed = true; std::cout << "\n\n"; } // No support for GMRES on the CPU yet. //cpuTest(der,*grid); gpuTest(der,*grid); delete(grid); } timers.printAll(); timers.writeToFile(); return EXIT_SUCCESS; }
void gpuTest(RBFFD& der, Grid& grid, int primeGPU=0) { unsigned int N = grid.getStencilsSize(); unsigned int n = grid.getMaxStencilSize(); unsigned int nb_bnd = grid.getBoundaryIndicesSize(); unsigned int n_unknowns = 4 * N; // We subtract off the unknowns for the boundary unsigned int nrows = 4 * N - 4*nb_bnd; unsigned int ncols = 4 * N - 4*nb_bnd; unsigned int NNZ = 9*n*N+2*(4*N)+2*(3*N); char test_name[256]; char assemble_timer_name[256]; char copy_timer_name[512]; char test_timer_name[256]; if (primeGPU) { sprintf(test_name, "%u PRIMING THE GPU", N); sprintf(assemble_timer_name, "%u Primer Assemble", N); sprintf(copy_timer_name, "%u Primer Copy To VCL_CSR", N); sprintf(test_timer_name, "%u Primer GMRES test", N); } else { sprintf(test_name, "%u GMRES GPU (VCL_CSR)", N); sprintf(assemble_timer_name, "%u UBLAS_CSR Assemble", N); sprintf(copy_timer_name, "%u UBLAS_CSR Copy To VCL_CSR", N); sprintf(test_timer_name, "%u GPU GMRES test", N); } if (!timers.contains(assemble_timer_name)) { timers[assemble_timer_name] = new EB::Timer(assemble_timer_name); } if (!timers.contains(copy_timer_name)) { timers[copy_timer_name] = new EB::Timer(copy_timer_name); } if (!timers.contains(test_timer_name)) { timers[test_timer_name] = new EB::Timer(test_timer_name); } std::cout << test_name << std::endl; // ----- ASSEMBLE ----- timers[assemble_timer_name]->start(); // Compress system to remove boundary rows UBLAS_MAT_t* A = new UBLAS_MAT_t(nrows, ncols, NNZ); UBLAS_VEC_t* F = new UBLAS_VEC_t(nrows, 0); UBLAS_VEC_t* U_exact = new UBLAS_VEC_t(n_unknowns, 0); UBLAS_VEC_t* U_exact_compressed = new UBLAS_VEC_t(nrows, 0); assemble_System_Stokes(der, grid, *A, *F, *U_exact, *U_exact_compressed); timers[assemble_timer_name]->stop(); write_System(*A, *F, *U_exact); write_to_file(*U_exact_compressed, "output/U_exact_compressed.mtx"); UBLAS_VEC_t F_discrete = prod(*A, *U_exact_compressed); write_to_file(F_discrete, "output/F_discrete.mtx"); // ----- SOLVE ----- timers[copy_timer_name]->start(); VCL_MAT_t* A_gpu = new VCL_MAT_t(A->size1(), A->size2()); copy(*A, *A_gpu); VCL_VEC_t* F_gpu = new VCL_VEC_t(F->size()); VCL_VEC_t* U_exact_gpu = new VCL_VEC_t(U_exact_compressed->size()); VCL_VEC_t* U_approx_gpu = new VCL_VEC_t(F->size()); viennacl::copy(F->begin(), F->end(), F_gpu->begin()); viennacl::copy(U_exact_compressed->begin(), U_exact_compressed->end(), U_exact_gpu->begin()); timers[copy_timer_name]->stop(); timers[test_timer_name]->start(); // Use GMRES to solve A*u = F GMRES_Device(*A_gpu, *F_gpu, *U_exact_gpu, *U_approx_gpu, N, nb_bnd); timers[test_timer_name]->stop(); write_Solution(grid, *U_exact_compressed, *U_approx_gpu); // Cleanup delete(A); delete(A_gpu); delete(F); delete(U_exact); delete(U_exact_compressed); delete(F_gpu); delete(U_exact_gpu); delete(U_approx_gpu); }
int main(void) { bool writeIntermediate = true; bool primed = false; std::vector<std::string> grids; #if 0 //grids.push_back("~/GRIDS/md/md005.00036"); grids.push_back("~/GRIDS/md/md031.01024"); grids.push_back("~/GRIDS/md/md050.02601"); grids.push_back("~/GRIDS/md/md063.04096"); grids.push_back("~/GRIDS/md/md089.08100"); grids.push_back("~/GRIDS/md/md127.16384"); grids.push_back("~/GRIDS/md/md165.27556"); #endif #if 0 grids.push_back("~/GRIDS/geoff/scvtimersesh_100k_nodes.ascii"); grids.push_back("~/GRIDS/geoff/scvtimersesh_500k_nodes.ascii"); grids.push_back("~/GRIDS/geoff/scvtimersesh_1m_nodes.ascii"); #endif //grids.push_back("~/GRIDS/geoff/scvtimersesh_1m_nodes.ascii"); grids.push_back("~/sphere_grids/md063.04096"); grids.push_back("~/sphere_grids/md079.06400"); grids.push_back("~/sphere_grids/md089.08100"); grids.push_back("~/sphere_grids/md100.10201"); grids.push_back("~/sphere_grids/md127.16384"); grids.push_back("~/sphere_grids/md141.20164"); grids.push_back("~/sphere_grids/md165.27556"); grids.push_back("~/sphere_grids/scvtmesh001.100000"); grids.push_back("~/sphere_grids/scvtmesh002.500000"); grids.push_back("~/sphere_grids/scvtmesh003.1000000"); for (size_t i = 0; i < grids.size(); i++) { std::string& grid_name = grids[i]; std::string weight_timer_name = grid_name + " Calc Weights"; timers[weight_timer_name] = new EB::Timer(weight_timer_name.c_str()); // Get contours from rbfzone.blogspot.com to choose eps_c1 and eps_c2 based on stencil_size (n) unsigned int stencil_size = 40; double eps_c1 = 0.027; double eps_c2 = 0.274; GridReader* grid = new GridReader(grid_name, 4); grid->setMaxStencilSize(stencil_size); // We do not read until generate is called: Grid::GridLoadErrType err = grid->loadFromFile(); if (err == Grid::NO_GRID_FILES) { grid->generate(); if (writeIntermediate) { grid->writeToFile(); } } std::cout << "Generate Stencils\n"; Grid::GridLoadErrType st_err = grid->loadStencilsFromFile(); if (st_err == Grid::NO_STENCIL_FILES) { // grid->generateStencils(Grid::ST_BRUTE_FORCE); #if 1 grid->generateStencils(Grid::ST_KDTREE); #else grid->setNSHashDims(50, 50,50); grid->generateStencils(Grid::ST_HASH); #endif if (writeIntermediate) { grid->writeToFile(); } } std::cout << "Generate RBFFD Weights\n"; timers[weight_timer_name]->start(); RBFFD der(RBFFD::LSFC | RBFFD::XSFC | RBFFD::YSFC | RBFFD::ZSFC, grid, 3, 0); der.setEpsilonByParameters(eps_c1, eps_c2); int der_err = der.loadAllWeightsFromFile(); if (der_err) { der.computeAllWeightsForAllStencils(); timers[weight_timer_name]->start(); if (writeIntermediate) { der.writeAllWeightsToFile(); } } if (!primed) { cout << "Priming GPU with dummy operations (removes compile from benchmarks)\n"; run_test<DUMMY, DUMMY>(der, *grid); primed = true; } cout << "Running Tests\n" << std::endl; { run_test<COO_CPU, COO_CPU>(der, *grid); run_test<COO_CPU, COO_GPU>(der, *grid); run_test<CSR_CPU, CSR_CPU>(der, *grid); run_test<CSR_CPU, CSR_GPU>(der, *grid); run_test<COO_CPU, CSR_GPU>(der, *grid); run_test<CSR_CPU, COO_GPU>(der, *grid); } delete(grid); } timers.printAll(); timers.writeToFile(); return EXIT_SUCCESS; }