void assemble_LHS ( RBFFD& der, Grid& grid, STL_Sparse_Mat& A){ unsigned int N = grid.getNodeListSize(); unsigned int n = grid.getMaxStencilSize(); //A_ptr = new MatType( N ); // MatType& A = *A_ptr; for (unsigned int i = 0; i < N; i++) { StencilType& sten = grid.getStencil(i); double* lapl = der.getStencilWeights(RBFFD::LSFC, i); // Off diagonals for (unsigned int j = 0; j < n; j++) { A[i][sten[j]] = -lapl[j]; } } }
int main(int argc, char** argv) { ProjectSettings* settings = new ProjectSettings(argc, argv); int dim = settings->GetSettingAs<int>("DIMENSION", ProjectSettings::required); int nb_interior = settings->GetSettingAs<int>("NB_INTERIOR", ProjectSettings::required); int nb_inner_boundary = settings->GetSettingAs<int>("NB_INNER_BOUNDARY", ProjectSettings::optional, "0"); int nb_outer_boundary = settings->GetSettingAs<int>("NB_OUTER_BOUNDARY", ProjectSettings::optional, "0"); int nb_boundary = nb_inner_boundary + nb_outer_boundary; int nb_total = nb_interior + nb_boundary; if (dim > 3) { cout << "ERROR! Dim > 3 Not supported!" << endl; exit(EXIT_FAILURE); } double inner_r = settings->GetSettingAs<double>("INNER_RADIUS", ProjectSettings::optional, "0.5"); double outer_r = settings->GetSettingAs<double>("OUTER_RADIUS", ProjectSettings::optional, "1.0"); double inner_axis_major = settings->GetSettingAs<double>("INNER_AXIS_MAJOR", ProjectSettings::optional, "0."); double inner_axis_minor = settings->GetSettingAs<double>("INNER_AXIS_MINOR", ProjectSettings::optional, "0."); double outer_axis_major = settings->GetSettingAs<double>("OUTER_AXIS_MAJOR", ProjectSettings::optional, "0."); double outer_axis_minor = settings->GetSettingAs<double>("OUTER_AXIS_MINOR", ProjectSettings::optional, "0."); int ns_nx = settings->GetSettingAs<int>("NS_NB_X", ProjectSettings::optional, "10"); int ns_ny = settings->GetSettingAs<int>("NS_NB_Y", ProjectSettings::optional, "10"); int ns_nz = settings->GetSettingAs<int>("NS_NB_Z", ProjectSettings::optional, "10"); double minX = settings->GetSettingAs<double>("MIN_X", ProjectSettings::optional, "-1."); double maxX = settings->GetSettingAs<double>("MAX_X", ProjectSettings::optional, "1."); double minY = settings->GetSettingAs<double>("MIN_Y", ProjectSettings::optional, "-1."); double maxY = settings->GetSettingAs<double>("MAX_Y", ProjectSettings::optional, "1."); double minZ = settings->GetSettingAs<double>("MIN_Z", ProjectSettings::optional, "-1."); double maxZ = settings->GetSettingAs<double>("MAX_Z", ProjectSettings::optional, "1."); double debug = settings->GetSettingAs<int>("DEBUG", ProjectSettings::optional, "0"); // 0 = Dirichlet, 1 = neumann, 2 = robin int boundary_condition = settings->GetSettingAs<int>("BOUNDARY_CONDITION", ProjectSettings::optional, "0"); // 0 = discrete rhs, 1 = exact (test discrete compat condition) int use_discrete_rhs = settings->GetSettingAs<int>("USE_DISCRETE_RHS", ProjectSettings::optional, "0"); // 0 = assume non-uniform diffusion, 1 = assume uniform int use_uniform_diffusion = settings->GetSettingAs<int>("USE_UNIFORM_DIFFUSION", ProjectSettings::optional, "1"); int run_derivative_tests = settings->GetSettingAs<int>("RUN_DERIVATIVE_TESTS", ProjectSettings::optional, "1"); int stencil_size = settings->GetSettingAs<int>("STENCIL_SIZE", ProjectSettings::required); int use_gpu = settings->GetSettingAs<int>("USE_GPU", ProjectSettings::optional, "1"); int nb_samples = settings->GetSettingAs<int>("NB_CVT_SAMPLES", ProjectSettings::required); int it_max_interior = settings->GetSettingAs<int>("NB_CVT_ITERATIONS", ProjectSettings::required); // Generate a CVT with nx*ny*nz nodes, in 1, 2 or 3D with 0 locked boundary nodes, // 20000 samples per iteration for 30 iterations NestedEllipseCVT* grid; if (nb_boundary) { // Specify the exact number of nodes on the boundary grid = new NestedEllipseCVT(nb_total, nb_inner_boundary, nb_outer_boundary, dim, new UniformDensity(), 0, nb_samples, it_max_interior); } else { // Guess the number of nodes on the boundary (usually looks nicer) grid = new NestedEllipseCVT(nb_total, dim, new UniformDensity(), 0, nb_samples, it_max_interior); } grid->setExtents(minX, maxX, minY, maxY, minZ, maxZ); if (!inner_axis_minor) { grid->setInnerRadius(inner_r); grid->setOuterRadius(outer_r); } else { grid->setInnerAxes(inner_axis_major, inner_axis_minor); grid->setOuterAxes(outer_axis_major, outer_axis_minor); } grid->setDebug(debug); grid->setMaxStencilSize(stencil_size); grid->setNSHashDims(ns_nx, ns_ny, ns_nz); int writeIntermediate = 2; Grid::GridLoadErrType err = grid->loadFromFile(); if (err == Grid::NO_GRID_FILES) { printf("************** Generating new Grid **************\n"); grid->setSortBoundaryNodes(true); grid->generate(); if(writeIntermediate > 0) { grid->writeToFile(); } } if ((err == Grid::NO_GRID_FILES) || (err == Grid::NO_STENCIL_FILES)) { std::cout << "Generating stencils files\n"; grid->setNSHashDims(ns_nx, ns_ny, ns_nz); // grid->generateStencils(Grid::ST_BRUTE_FORCE); // grid->generateStencils(Grid::ST_KDTREE); grid->generateStencils(Grid::ST_HASH); if(writeIntermediate > 0) { grid->writeToFile(); } } // 0: 2D problem; 1: 3D problem ExactSolution* exact_poisson; if (dim == 3) { std::cout << "ERROR! 3D not verified yet! exiting..." << std::endl; exit(EXIT_FAILURE); // exact_poisson = new ExactNCARPoisson1(); // 3D problem is not verified yet } else { exact_poisson = new ExactNCARPoisson2(); // 2D problem works with uniform diffusion } RBFFD* der; #if 0 if (use_gpu) { der = new RBFFD_CL(RBFFD::X|RBFFD::Y|RBFFD::Z|RBFFD::LAPL,grid, dim); } else { der = new RBFFD(RBFFD::X|RBFFD::Y|RBFFD::Z|RBFFD::LAPL,grid, dim); } #endif std::cout << "Computing weights for DIM = " << dim << std::endl; // No support for ViennaCL generated weights yet. der = new RBFFD(RBFFD::X|RBFFD::Y|RBFFD::Z|RBFFD::LAPL,grid, dim); // Enable variable epsilon. Not verified to be perfected in the derivative calculation. // But it has improved the heat equation already int use_var_eps = settings->GetSettingAs<int>("USE_VAR_EPSILON", ProjectSettings::optional, "0"); if (use_var_eps) { double alpha = settings->GetSettingAs<double>("VAR_EPSILON_ALPHA", ProjectSettings::optional, "1.0"); double beta = settings->GetSettingAs<double>("VAR_EPSILON_BETA", ProjectSettings::optional, "1.0"); der->setVariableEpsilon(alpha, beta); } else { double epsilon = settings->GetSettingAs<double>("EPSILON", ProjectSettings::required); der->setEpsilon(epsilon); } der->computeAllWeightsForAllStencils(); if (run_derivative_tests) { std::cout << "Running Derivative Tests\n"; DerivativeTests* der_test = new DerivativeTests(dim, der, grid, true); if (use_gpu) { // Applies weights on both GPU and CPU and compares results for the first 10 stencils der_test->compareGPUandCPUDerivs(10); } // Test approximations to derivatives of functions f(x,y,z) = 0, x, y, xy, etc. etc. der_test->testAllFunctions(); // For now we can only test eigenvalues on an MPI size of 1 (we could distribute with Par-Eiegen solver) if (settings->GetSettingAs<int>("DERIVATIVE_EIGENVALUE_TEST", ProjectSettings::optional, "0")) { // FIXME: why does this happen? Perhaps because X Y and Z are unidirectional? // Test X and 4 eigenvalues are > 0 // Test Y and 30 are > 0 // Test Z and 36 are > 0 // NOTE: the 0 here implies we compute the eigenvalues but do not run the iterations of the random perturbation test der_test->testEigen(RBFFD::LAPL, 0); } } NCARPoisson1* poisson; // if (use_gpu) { if (true) { poisson = new NonUniformPoisson1_CL(exact_poisson, grid, der, 0, dim); } else { poisson = new NCARPoisson1(exact_poisson, grid, der, 0, dim); } poisson->setBoundaryCondition(boundary_condition); poisson->setUseDiscreteRHS(use_discrete_rhs); poisson->setUseUniformDiffusivity(use_uniform_diffusion); poisson->initialConditions(); poisson->solve(); delete(poisson); // delete(der); delete(grid); delete(settings); #if 0 Grid* grid2 = new Grid(); grid2->loadFromFile("initial_grid.ascii"); grid2->writeToFile("final_grid.ascii"); cout.flush(); #endif exit(EXIT_SUCCESS); }
int main(int argc, char** argv) { Communicator* comm_unit = new Communicator(argc, argv); ProjectSettings* settings = new ProjectSettings(argc, argv, comm_unit->getRank()); int dim = settings->GetSettingAs<int>("DIMENSION", ProjectSettings::required); int nx = settings->GetSettingAs<int>("NB_X", ProjectSettings::required); int ny = 1; int nz = 1; if (dim > 1) { ny = settings->GetSettingAs<int>("NB_Y", ProjectSettings::required); } if (dim > 2) { nz = settings->GetSettingAs<int> ("NB_Z", ProjectSettings::required); } if (dim > 3) { cout << "ERROR! Dim > 3 Not supported!" << endl; exit(EXIT_FAILURE); } double minX = settings->GetSettingAs<double>("MIN_X", ProjectSettings::optional, "-1."); double maxX = settings->GetSettingAs<double>("MAX_X", ProjectSettings::optional, "1."); double minY = settings->GetSettingAs<double>("MIN_Y", ProjectSettings::optional, "-1."); double maxY = settings->GetSettingAs<double>("MAX_Y", ProjectSettings::optional, "1."); double minZ = settings->GetSettingAs<double>("MIN_Z", ProjectSettings::optional, "-1."); double maxZ = settings->GetSettingAs<double>("MAX_Z", ProjectSettings::optional, "1."); double stencil_size = settings->GetSettingAs<int>("STENCIL_SIZE", ProjectSettings::required); int use_gpu = settings->GetSettingAs<int>("USE_GPU", ProjectSettings::optional, "1"); Grid* grid = NULL; if (dim == 1) { grid = new RegularGrid(nx, 1, minX, maxX, 0., 0.); } else if (dim == 2) { grid = new RegularGrid(nx, ny, minX, maxX, minY, maxY); } else if (dim == 3) { grid = new RegularGrid(nx, ny, nz, minX, maxX, minY, maxY, minZ, maxZ); } else { cout << "ERROR! Dim > 3 Not Supported!" << endl; } grid->setSortBoundaryNodes(true); grid->generate(); grid->generateStencils(stencil_size, Grid::ST_BRUTE_FORCE); // nearest nb_points grid->writeToFile(); // 0: 2D problem; 1: 3D problem //ExactSolution* exact_heat_regulargrid = new ExactRegularGrid(dim, 1.0, 1.0); RBFFD* der; if (use_gpu) { der = new RBFFD_CL(RBFFD::X | RBFFD::Y | RBFFD::Z | RBFFD::LAPL, grid, dim); } else { der = new RBFFD(RBFFD::X | RBFFD::Y | RBFFD::Z | RBFFD::LAPL, grid, dim); } double epsilon = settings->GetSettingAs<double>("EPSILON"); der->setEpsilon(epsilon); printf("start computing weights\n"); //vector<StencilType>& stencil = grid->getStencils(); vector<NodeType>& rbf_centers = grid->getNodeList(); der->computeAllWeightsForAllStencils(); cout << "end computing weights" << endl; vector<double> u(rbf_centers.size(),1.); cout << "start computing derivative (on CPU)" << endl; vector<double> xderiv_cpu(rbf_centers.size()); vector<double> xderiv_gpu(rbf_centers.size()); vector<double> yderiv_cpu(rbf_centers.size()); vector<double> yderiv_gpu(rbf_centers.size()); vector<double> zderiv_cpu(rbf_centers.size()); vector<double> zderiv_gpu(rbf_centers.size()); vector<double> lderiv_cpu(rbf_centers.size()); vector<double> lderiv_gpu(rbf_centers.size()); // Verify that the CPU works // NOTE: we pass booleans at the end of the param list to indicate that // the function "u" is new (true) or same as previous calls (false). This // helps avoid overhead of passing "u" to the GPU. der->RBFFD::applyWeightsForDeriv(RBFFD::X, u, xderiv_cpu, true); der->RBFFD::applyWeightsForDeriv(RBFFD::Y, u, yderiv_cpu, false); // originally false der->RBFFD::applyWeightsForDeriv(RBFFD::Z, u, zderiv_cpu, false); // orig false der->RBFFD::applyWeightsForDeriv(RBFFD::LAPL, u, lderiv_cpu, false); // orig false der->applyWeightsForDeriv(RBFFD::X, u, xderiv_gpu, true); der->applyWeightsForDeriv(RBFFD::Y, u, yderiv_gpu, false); // orig false der->applyWeightsForDeriv(RBFFD::Z, u, zderiv_gpu, false); // orig: false der->applyWeightsForDeriv(RBFFD::LAPL, u, lderiv_gpu, false); // orig: false double max_diff = 0.; for (size_t i = 0; i < rbf_centers.size(); i++) { double xdiff = fabs(xderiv_gpu[i] - xderiv_cpu[i]); double ydiff = fabs(yderiv_gpu[i] - yderiv_cpu[i]); double zdiff = fabs(zderiv_gpu[i] - zderiv_cpu[i]); double ldiff = fabs(lderiv_gpu[i] - lderiv_cpu[i]); if (xdiff > max_diff) { max_diff = xdiff; } if (ydiff > max_diff) { max_diff = ydiff; } if (zdiff > max_diff) { max_diff = zdiff; } if (ldiff > max_diff) { max_diff = ldiff; } // std::cout << "cpu_x_deriv[" << i << "] - gpu_x_deriv[" << i << "] = " << xderiv_cpu[i] - xderiv_gpu[i] << std::endl; if (( xdiff > 1e-5) || ( ydiff > 1e-5) || ( zdiff > 1e-5) || ( ldiff > 1e-5)) { std::cout << "WARNING! SINGLE PRECISION GPU COULD NOT CALCULATE DERIVATIVE WELL ENOUGH!\n"; std::cout << "Test failed on " << i << std::endl; std::cout << "X: " << xderiv_gpu[i] - xderiv_cpu[i] << std:: endl; std::cout << "X: " << xderiv_gpu[i] << ", " << xderiv_cpu[i] << std:: endl; std::cout << "Y: " << yderiv_gpu[i] - yderiv_cpu[i] << std:: endl; std::cout << "Y: " << yderiv_gpu[i] << ", " << yderiv_cpu[i] << std:: endl; std::cout << "Z: " << zderiv_gpu[i] - zderiv_cpu[i] << std:: endl; std::cout << "Z: " << zderiv_gpu[i] << ", " << zderiv_cpu[i] << std:: endl; std::cout << "LAPL: " << lderiv_gpu[i] - lderiv_cpu[i] << std:: endl; exit(EXIT_FAILURE); } } std::cout << "Max difference between weights: " << max_diff << std::endl; std::cout << "CONGRATS! ALL DERIVATIVES WERE CALCULATED THE SAME IN OPENCL AND ON THE CPU\n"; // (WITH AN AVERAGE ERROR OF:" << avg_error << std::endl; // der->applyWeightsForDeriv(RBFFD::Y, u, yderiv); // der->applyWeightsForDeriv(RBFFD::LAPL, u, lapl_deriv); #if 0 if (settings->GetSettingAs<int>("RUN_DERIVATIVE_TESTS")) { RBFFDTests* der_test = new DerivativeTests(); der_test->testAllFunctions(*der, *grid); } #endif // delete(subdomain); delete(grid); delete(settings); cout.flush(); exit(EXIT_SUCCESS); }
int main(int argc, char** argv) { TimerList tm; tm["total"] = new Timer("[Main] Total runtime for this proc"); tm["grid"] = new Timer("[Main] Grid generation"); tm["stencils"] = new Timer("[Main] Stencil generation"); tm["settings"] = new Timer("[Main] Load settings"); tm["decompose"] = new Timer("[Main] Decompose domain"); tm["consolidate"] = new Timer("[Main] Consolidate subdomain solutions"); tm["updates"] = new Timer("[Main] Broadcast solution updates"); tm["send"] = new Timer("[Main] Send subdomains to other processors (master only)"); tm["receive"] = new Timer("[Main] Receive subdomain from master (clients only)"); tm["timestep"] = new Timer("[Main] Advance One Timestep"); tm["tests"] = new Timer("[Main] Test stencil weights"); tm["weights"] = new Timer("[Main] Compute all stencils weights"); tm["oneWeight"] = new Timer("[Main] Compute single stencil weights"); tm["heat_init"] = new Timer("[Main] Initialize heat"); // grid should only be valid instance for MASTER Grid* grid = NULL; Domain* subdomain; tm["total"]->start(); Communicator* comm_unit = new Communicator(argc, argv); cout << " Got Rank: " << comm_unit->getRank() << endl; cout << " Got Size: " << comm_unit->getSize() << endl; tm["settings"]->start(); ProjectSettings* settings = new ProjectSettings(argc, argv, comm_unit->getRank()); int dim = settings->GetSettingAs<int>("DIMENSION", ProjectSettings::required); //----------------- fillGlobalProjectSettings(dim, settings); //----------------- int max_num_iters = settings->GetSettingAs<int>("MAX_NUM_ITERS", ProjectSettings::required); double max_global_rel_error = settings->GetSettingAs<double>("MAX_GLOBAL_REL_ERROR", ProjectSettings::optional, "1e-1"); double max_local_rel_error = settings->GetSettingAs<double>("MAX_LOCAL_REL_ERROR", ProjectSettings::optional, "1e-1"); int use_gpu = settings->GetSettingAs<int>("USE_GPU", ProjectSettings::optional, "1"); int local_sol_dump_frequency = settings->GetSettingAs<int>("LOCAL_SOL_DUMP_FREQUENCY", ProjectSettings::optional, "100"); int global_sol_dump_frequency = settings->GetSettingAs<int>("GLOBAL_SOL_DUMP_FREQUENCY", ProjectSettings::optional, "200"); int prompt_to_continue = settings->GetSettingAs<int>("PROMPT_TO_CONTINUE", ProjectSettings::optional, "0"); int debug = settings->GetSettingAs<int>("DEBUG", ProjectSettings::optional, "0"); double start_time = settings->GetSettingAs<double>("START_TIME", ProjectSettings::optional, "0.0"); double end_time = settings->GetSettingAs<double>("END_TIME", ProjectSettings::optional, "1.0"); double dt = settings->GetSettingAs<double>("DT", ProjectSettings::optional, "1e-5"); int timescheme = settings->GetSettingAs<int>("TIME_SCHEME", ProjectSettings::optional, "1"); int weight_method = settings->GetSettingAs<int>("WEIGHT_METHOD", ProjectSettings::optional, "1"); int compute_eigenvalues = settings->GetSettingAs<int>("DERIVATIVE_EIGENVALUE_TEST", ProjectSettings::optional, "0"); int use_eigen_dt = settings->GetSettingAs<int>("USE_EIGEN_DT", ProjectSettings::optional, "1"); if (comm_unit->isMaster()) { int ns_nx = settings->GetSettingAs<int>("NS_NB_X", ProjectSettings::optional, "10"); int ns_ny = settings->GetSettingAs<int>("NS_NB_Y", ProjectSettings::optional, "10"); int ns_nz = settings->GetSettingAs<int>("NS_NB_Z", ProjectSettings::optional, "10"); int stencil_size = settings->GetSettingAs<int>("STENCIL_SIZE", ProjectSettings::required); tm["settings"]->stop(); grid = getGrid(dim); grid->setMaxStencilSize(stencil_size); Grid::GridLoadErrType err = grid->loadFromFile(); if (err == Grid::NO_GRID_FILES) { printf("************** Generating new Grid **************\n"); //grid->setSortBoundaryNodes(true); // grid->setSortBoundaryNodes(true); tm["grid"]->start(); grid->generate(); tm["grid"]->stop(); grid->writeToFile(); } if ((err == Grid::NO_GRID_FILES) || (err == Grid::NO_STENCIL_FILES)) { std::cout << "Generating stencils files\n"; tm["stencils"]->start(); grid->setNSHashDims(ns_nx, ns_ny, ns_nz); // grid->generateStencils(Grid::ST_BRUTE_FORCE); // DEFINTELY: exact grid->generateStencils(Grid::ST_KDTREE); // MIGHT BE: approximate // grid->generateStencils(Grid::ST_HASH); tm["stencils"]->stop(); grid->writeToFile(); tm.writeToFile("gridgen_timer_log"); } int x_subdivisions = comm_unit->getSize(); // reduce this to impact y dimension as well int y_subdivisions = (comm_unit->getSize() - x_subdivisions) + 1; // TODO: load subdomain from disk // Construct a new domain given a grid. // TODO: avoid filling sets Q, B, etc; just think of it as a copy constructor for a grid Domain* original_domain = new Domain(dim, grid, comm_unit->getSize()); // pre allocate pointers to all of the subdivisions std::vector<Domain*> subdomain_list(x_subdivisions*y_subdivisions); // allocate and fill in details on subdivisions std::cout << "Generating subdomains\n"; tm["decompose"]->start(); //original_domain->printVerboseDependencyGraph(); original_domain->generateDecomposition(subdomain_list, x_subdivisions, y_subdivisions); tm["decompose"]->stop(); tm["send"]->start(); subdomain = subdomain_list[0]; for (int i = 1; i < comm_unit->getSize(); i++) { std::cout << "Sending subdomain[" << i << "]\n"; comm_unit->sendObject(subdomain_list[i], i); } tm["send"]->stop(); printf("----------------------\nEND MASTER ONLY\n----------------------\n\n\n"); } else { tm["settings"]->stop(); cout << "MPI RANK " << comm_unit->getRank() << ": waiting to receive subdomain" << endl; tm["receive"]->start(); subdomain = new Domain(); // EMPTY object that will be filled by MPI comm_unit->receiveObject(subdomain, 0); // Receive from CPU (0) tm["receive"]->stop(); } comm_unit->barrier(); if (debug) { subdomain->printVerboseDependencyGraph(); subdomain->printNodeList("All Centers Needed by This Process"); printf("CHECKING STENCILS: "); for (int irbf = 0; irbf < (int)subdomain->getStencilsSize(); irbf++) { // printf("Stencil[%d] = ", irbf); StencilType& s = subdomain->getStencil(irbf); if (irbf == s[0]) { // printf("PASS\n"); // subdomain->printStencil(s, "S"); } else { printf("FAIL on stencil %d\n", irbf); exit(EXIT_FAILURE); } } printf("OK\n"); } RBFFD* der; if (use_gpu) { der = new RBFFD_CL(RBFFD::LAPL | RBFFD::X | RBFFD::Y | RBFFD::Z, subdomain, dim, comm_unit->getRank()); } else { der = new RBFFD(RBFFD::LAPL | RBFFD::X | RBFFD::Y | RBFFD::Z, subdomain, dim, comm_unit->getRank()); } int use_var_eps = settings->GetSettingAs<int>("USE_VAR_EPSILON", ProjectSettings::optional, "0"); if (use_var_eps) { double alpha = settings->GetSettingAs<double>("VAR_EPSILON_ALPHA", ProjectSettings::optional, "1.0"); double beta = settings->GetSettingAs<double>("VAR_EPSILON_BETA", ProjectSettings::optional, "1.0"); //der->setVariableEpsilon(subdomain->getStencilRadii(), subdomain->getStencils(), alpha, beta); der->setVariableEpsilon(alpha, beta); } else { double epsilon = settings->GetSettingAs<double>("EPSILON", ProjectSettings::required); der->setEpsilon(epsilon); } #if 0 der->setWeightType(RBFFD::ContourSVD); der->setWeightType(RBFFD::Direct); #else der->setWeightType((RBFFD::WeightType)weight_method); #endif // Try loading all the weight files int err = der->loadFromFile(RBFFD::X); err += der->loadFromFile(RBFFD::Y); err += der->loadFromFile(RBFFD::Z); err += der->loadFromFile(RBFFD::LAPL); if (err) { printf("start computing weights\n"); tm["weights"]->start(); // NOTE: good test for Direct vs Contour // Grid 11x11, vareps=0.05; Look at stencil 12. SHould have -100, 25, // 25, 25, 25 (i.e., -4,1,1,1,1) not sure why scaling is off. der->computeAllWeightsForAllStencils(); tm["weights"]->stop(); cout << "end computing weights" << endl; der->writeToFile(RBFFD::X); der->writeToFile(RBFFD::Y); der->writeToFile(RBFFD::Z); der->writeToFile(RBFFD::LAPL); cout << "end write weights to file" << endl; } if (settings->GetSettingAs<int>("RUN_DERIVATIVE_TESTS", ProjectSettings::optional, "1")) { bool weightsPreComputed = true; bool exitIfTestFailed = settings->GetSettingAs<int>("BREAK_ON_DERIVATIVE_TESTS", ProjectSettings::optional, "1"); tm["tests"]->start(); // The test class only computes weights if they havent been done already DerivativeTests* der_test = new DerivativeTests(dim, der, subdomain, weightsPreComputed); if (use_gpu) { // Applies weights on both GPU and CPU and compares results for the first 10 stencils der_test->compareGPUandCPUDerivs(10); } // Test approximations to derivatives of functions f(x,y,z) = 0, x, y, xy, etc. etc. der_test->testAllFunctions(exitIfTestFailed); // For now we can only test eigenvalues on an MPI size of 1 (we could distribute with Par-Eiegen solver) if (comm_unit->getSize() == 1) { if (compute_eigenvalues) { // FIXME: why does this happen? Perhaps because X Y and Z are unidirectional? // Test X and 4 eigenvalues are > 0 // Test Y and 30 are > 0 // Test Z and 36 are > 0 // NOTE: the 0 here implies we compute the eigenvalues but do not run the iterations of the random perturbation test der_test->testEigen(RBFFD::LAPL, 0); } } tm["tests"]->stop(); } // SOLVE HEAT EQUATION ExactSolution* exact = getExactSolution(dim); TimeDependentPDE* pde; tm["heat_init"]->start(); // We need to provide comm_unit to pass ghost node info #if 0 if (use_gpu) { pde = new HeatPDE(subdomain, der, comm_unit, true); } else #endif { // Implies initial conditions are generated // true here indicates the weights are computed. pde = new HeatPDE(subdomain, der, comm_unit, uniformDiffusion, true); } pde->setStartEndTime(start_time, end_time); pde->fillInitialConditions(exact); // Broadcast updates for timestep, initial conditions for ghost nodes, etc. tm["updates"]->start(); comm_unit->broadcastObjectUpdates(pde); comm_unit->barrier(); tm["updates"]->stop(); tm["heat_init"]->stop(); //TODO: pde->setRelErrTol(max_global_rel_error); // Setup a logging class that will monitor our iteration and dump intermediate files #if USE_VTK // TODO: update VtuPDEWriter for the new PDE classes PDEWriter* writer = new VtuPDEWriter(subdomain, pde, comm_unit, local_sol_dump_frequency, global_sol_dump_frequency); #else PDEWriter* writer = new PDEWriter(subdomain, pde, comm_unit, local_sol_dump_frequency, global_sol_dump_frequency); #endif // Test DT: // 1) get the minimum avg stencil radius (for stencil area--i.e., dx^2) double avgdx = 1000.; std::vector<StencilType>& sten = subdomain->getStencils(); for (size_t i=0; i < sten.size(); i++) { double dx = subdomain->getStencilRadius(i); if (dx < avgdx) { avgdx = dx; } } // Laplacian = d^2/dx^2 double sten_area = avgdx*avgdx; // Not sure where Gordon came up with this parameter. // for second centered difference and euler time we have nu = 0.5 double nu = 0.1; // dt <= nu/dx^2 // is valid for stability in some FD schemes. double max_dt = nu*(sten_area); printf("dt = %f (max_dt = %f; 0.5dx^2 = %f)\n", dt, max_dt, 0.5*sten_area); // This appears to be consistent with Chinchipatnam2006 (Thesis) // TODO: get more details on CFL for RBFFD // note: checking stability only works if we have all weights for all // nodes, so we dont do it in parallel if (compute_eigenvalues && (comm_unit->getSize() == 1)) { RBFFD::EigenvalueOutput eigs = der->getEigenvalues(); max_dt = 2. / eigs.max_neg_eig; printf("Suggested max_dt based on eigenvalues (2/lambda_max)= %f\n", max_dt); // CFL condition: if (dt > max_dt) { std::cout << "WARNING! your choice of timestep (" << dt << ") is TOO LARGE for to maintain stability of system. According to eigenvalues, it must be less than " << max_dt << std::endl; if (use_eigen_dt) { dt = max_dt; } else { //exit(EXIT_FAILURE); } } } std::cout << "[MAIN] ********* USING TIMESTEP dt=" << dt << " ********** " << std::endl; // subdomain->printCenterMemberships(subdomain->G, "G = " ); //subdomain->printBoundaryIndices("INDICES OF GLOBAL BOUNDARY NODES: "); int iter; int num_iters = (int) ((end_time - start_time) / dt); std::cout << "NUM_ITERS = " << num_iters << std::endl; for (iter = 0; iter < num_iters && iter < max_num_iters; iter++) { writer->update(iter); #if 0 char label[256]; sprintf(label, "LOCAL INPUT SOLUTION [local_indx (global_indx)] FOR ITERATION %d", iter); pde->printSolution(label); #endif tm["timestep"]->start(); pde->advance((TimeDependentPDE::TimeScheme)timescheme, dt); tm["timestep"]->stop(); // This just double checks that all procs have ghost node info. // pde->advance(..) should broadcast intermediate updates as needed, // but updated solution. tm["updates"]->start(); comm_unit->broadcastObjectUpdates(pde); comm_unit->barrier(); tm["updates"]->stop(); if (!(iter % local_sol_dump_frequency)) { std::cout << "\n*********** Rank " << comm_unit->getRank() << " Local Solution [ Iteration: " << iter << " (t = " << pde->getTime() << ") ] *************" << endl; pde->checkLocalError(exact, max_local_rel_error); } if (!(iter % global_sol_dump_frequency)) { tm["consolidate"]->start(); comm_unit->consolidateObjects(pde); comm_unit->barrier(); tm["consolidate"]->stop(); if (comm_unit->isMaster()) { std::cout << "\n*********** Global Solution [ Iteration: " << iter << " (t = " << pde->getTime() << ") ] *************" << endl; pde->checkGlobalError(exact, grid, max_global_rel_error); } } #if 0 sprintf(label, "LOCAL UPDATED SOLUTION [local_indx (global_indx)] AFTER %d ITERATIONS", iter+1); pde->printSolution(label); #endif // double nrm = pde->maxNorm(); if (prompt_to_continue && comm_unit->isMaster()) { std::string buf; cout << "Press [Enter] to continue" << std::endl; cin.get(); } } #if 1 printf("after heat\n"); // NOTE: all local subdomains have a U_G solution which is consolidated // into the MASTER process "global_U_G" solution. tm["consolidate"]->start(); comm_unit->consolidateObjects(pde); comm_unit->barrier(); tm["consolidate"]->stop(); // subdomain->writeGlobalSolutionToFile(-1); std::cout << "Checking Solution on Master\n"; if (comm_unit->getRank() == 0) { pde->writeGlobalGridAndSolutionToFile(grid->getNodeList(), (char*) "FINAL_SOLUTION.txt"); #if 0 // NOTE: the final solution is assembled, but we have to use the // GLOBAL node list instead of a local subdomain node list cout << "FINAL ITER: " << iter << endl; std::vector<double> final_sol(grid->getNodeListSize()); ifstream fin; fin.open("FINAL_SOLUTION.txt"); int count = 0; for (int count = 0; count < final_sol.size(); count++) { Vec3 node; double val; fin >> node[0] >> node[1] >> node[2] >> val; if (fin.good()) { final_sol[count] = val; // std::cout << "Read: " << node << ", " << final_sol[count] << std::endl; } } fin.close(); #endif std::cout << "============== Verifying Accuracy of Final Solution =============\n"; pde->checkGlobalError(exact, grid, max_global_rel_error); std::cout << "============== Solution Valid =============\n"; delete(grid); }
void assemble_System_Stokes( RBFFD& der, Grid& grid, UBLAS_MAT_t& A, UBLAS_VEC_t& F, UBLAS_VEC_t& U_exact, UBLAS_VEC_t& U_exact_compressed){ double eta = 1.; //double Ra = 1.e6; // We have different nb_stencils and nb_nodes when we parallelize. The subblocks might not be full unsigned int nb_stencils = grid.getStencilsSize(); // unsigned int nb_nodes = grid.getNodeListSize(); // unsigned int max_stencil_size = grid.getMaxStencilSize(); unsigned int N = nb_stencils; unsigned int nb_bnd = grid.getBoundaryIndicesSize(); // --------------------------------------------------- //------------- Fill the RHS of the System ------------- // This is our manufactured solution: SphericalHarmonic::Sph32 UU; SphericalHarmonic::Sph32105 VV; SphericalHarmonic::Sph32 WW; SphericalHarmonic::Sph32 PP; std::vector<NodeType>& nodes = grid.getNodeList(); //------------- Fill F ------------- // U for (unsigned int j = 0; j < nb_bnd; j++) { unsigned int row_ind = j + 0*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(row_ind) = UU.eval(Xx, Yy, Zz); } for (unsigned int j = nb_bnd; j < N; j++) { unsigned int row_ind = j + 0*(N-nb_bnd); unsigned int uncompressed_row_ind = j + 0*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(uncompressed_row_ind) = UU.eval(Xx,Yy,Zz); U_exact_compressed(row_ind-nb_bnd) = UU.eval(Xx,Yy,Zz); F(row_ind-nb_bnd) = -eta * UU.lapl(Xx,Yy,Zz) + PP.d_dx(Xx,Yy,Zz); } // V for (unsigned int j = 0; j < nb_bnd; j++) { unsigned int row_ind = j + 1*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(row_ind) = VV.eval(Xx,Yy,Zz); } for (unsigned int j = nb_bnd; j < N; j++) { unsigned int row_ind = j + 1*(N-nb_bnd); unsigned int uncompressed_row_ind = j + 1*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(uncompressed_row_ind) = VV.eval(Xx,Yy,Zz); U_exact_compressed(row_ind-nb_bnd) = VV.eval(Xx,Yy,Zz); F(row_ind-nb_bnd) = -eta * VV.lapl(Xx,Yy,Zz) + PP.d_dy(Xx,Yy,Zz); } // W for (unsigned int j = 0; j < nb_bnd; j++) { unsigned int row_ind = j + 2*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(row_ind) = WW.eval(Xx,Yy,Zz); } for (unsigned int j = nb_bnd; j < N; j++) { unsigned int row_ind = j + 2*(N-nb_bnd); unsigned int uncompressed_row_ind = j + 2*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(uncompressed_row_ind) = WW.eval(Xx,Yy,Zz); U_exact_compressed(row_ind-nb_bnd) = WW.eval(Xx,Yy,Zz); F(row_ind-nb_bnd) = -eta * WW.lapl(Xx,Yy,Zz) + PP.d_dz(Xx,Yy,Zz); } // P for (unsigned int j = 0; j < nb_bnd; j++) { unsigned int row_ind = j + 3*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(row_ind) = PP.eval(Xx,Yy,Zz); } for (unsigned int j = nb_bnd; j < N; j++) { unsigned int row_ind = j + 3*(N-nb_bnd); unsigned int uncompressed_row_ind = j + 3*(N); NodeType& node = nodes[j]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); U_exact(uncompressed_row_ind) = PP.eval(Xx,Yy,Zz); U_exact_compressed(row_ind-nb_bnd) = PP.eval(Xx,Yy,Zz); F(row_ind-nb_bnd) = UU.d_dx(Xx,Yy,Zz) + VV.d_dy(Xx,Yy,Zz) + WW.d_dz(Xx,Yy,Zz); } // ----------------- Fill LHS -------------------- // // U (block) row for (unsigned int i = nb_bnd; i < N; i++) { StencilType& st = grid.getStencil(i); // System has form: // -lapl(U) + grad(P) = f // div(U) = 0 // TODO: change these to *SFC weights (when computed) double* ddx = der.getStencilWeights(RBFFD::XSFC, i); double* lapl = der.getStencilWeights(RBFFD::LSFC, i); unsigned int diag_row_ind = i + 0*(N-nb_bnd); for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 0*(N-nb_bnd); if (st[j] < (int)nb_bnd) { // Need the exact solution at stencil node j NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= -eta * UU.lapl(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = -eta * lapl[j]; } } for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 3*(N-nb_bnd); if (st[j] < (int)nb_bnd) { // Need the exact solution at stencil node j NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= PP.d_dx(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = ddx[j]; } } } // V (block) row for (unsigned int i = nb_bnd; i < N; i++) { StencilType& st = grid.getStencil(i); double* ddy = der.getStencilWeights(RBFFD::YSFC, i); double* lapl = der.getStencilWeights(RBFFD::LSFC, i); unsigned int diag_row_ind = i + 1*(N-nb_bnd); for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 1*(N-nb_bnd); if (st[j] < (int)nb_bnd) { // Need the exact solution at stencil node j NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= -eta * VV.lapl(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = -eta * lapl[j]; } } for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 3*(N-nb_bnd); if (st[j] < (int)nb_bnd) { // Need the exact solution at stencil node j NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= PP.d_dy(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = ddy[j]; } } } // W (block) row for (unsigned int i = nb_bnd; i < N; i++) { StencilType& st = grid.getStencil(i); double* ddz = der.getStencilWeights(RBFFD::ZSFC, i); double* lapl = der.getStencilWeights(RBFFD::LSFC, i); unsigned int diag_row_ind = i + 2*(N-nb_bnd); for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 2*(N-nb_bnd); if (st[j] < (int)nb_bnd) { NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= -eta * WW.lapl(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = -eta * lapl[j]; } } for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 3*(N-nb_bnd); if (st[j] < (int)nb_bnd) { NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= PP.d_dz(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = ddz[j]; } } } // P (block) row for (unsigned int i = nb_bnd; i < N; i++) { StencilType& st = grid.getStencil(i); double* ddx = der.getStencilWeights(RBFFD::XSFC, i); double* ddy = der.getStencilWeights(RBFFD::YSFC, i); double* ddz = der.getStencilWeights(RBFFD::ZSFC, i); unsigned int diag_row_ind = i + 3*(N-nb_bnd); // ddx(U)-component for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 0*(N-nb_bnd); if (st[j] < (int)nb_bnd) { NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= UU.d_dx(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = ddx[j]; } } // ddy(V)-component for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 1*(N-nb_bnd); if (st[j] < (int)nb_bnd) { NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= VV.d_dx(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = ddy[j]; } } // ddz(W)-component for (unsigned int j = 0; j < st.size(); j++) { unsigned int diag_col_ind = st[j] + 2*(N-nb_bnd); if (st[j] < (int)nb_bnd) { NodeType& node = nodes[st[j]]; double Xx = node.x(); double Yy = node.y(); double Zz = node.z(); F[diag_row_ind-nb_bnd] -= WW.d_dx(Xx, Yy, Zz); } else { A(diag_row_ind-nb_bnd, diag_col_ind-nb_bnd) = ddz[j]; } } } }