// Performs a full solve with the PPCG solver void ppcg_driver(Chunk* chunks, Settings* settings, double rx, double ry, double* error) { int tt; double rro = 0.0; int num_ppcg_iters = 0; // Perform CG initialisation cg_init_driver(chunks, settings, rx, ry, &rro); // Iterate till convergence for(tt = 0; tt < settings->max_iters; ++tt) { // If we have already ran PPCG inner iterations, continue // If we are error switching, check the error // If not error switching, perform preset iterations // Perform enough iterations to converge eigenvalues bool is_switch_to_ppcg = (num_ppcg_iters) || (settings->error_switch ? (*error < settings->eps_lim) && (tt > CG_ITERS_FOR_EIGENVALUES) : (tt > settings->presteps) && (*error < ERROR_SWITCH_MAX)); if(!is_switch_to_ppcg) { // Perform a CG iteration cg_main_step_driver(chunks, settings, tt, &rro, error); } else { num_ppcg_iters++; // If first step perform initialisation if(num_ppcg_iters == 1) { // Initialise the eigenvalues and Chebyshev coefficients eigenvalue_driver_initialise(chunks, settings, tt); cheby_coef_driver( chunks, settings, settings->ppcg_inner_steps); ppcg_init_driver(chunks, settings, &rro); } ppcg_main_step_driver(chunks, settings, &rro, error); } halo_update_driver(chunks, settings, 1); if(fabs(*error) < settings->eps) break; } print_and_log(settings, "CG: \t\t\t%d iterations\n", tt-num_ppcg_iters+1); print_and_log(settings, "PPCG: \t\t\t%d iterations (%d inner iterations per)\n", num_ppcg_iters, settings->ppcg_inner_steps); }
// Allocates all of the field buffers void kernel_initialise( Settings* settings, int x, int y, KView* density0, KView* density, KView* energy0, KView* energy, KView* u, KView* u0, KView* p, KView* r, KView* mi, KView* w, KView* kx, KView* ky, KView* sd, KView* volume, KView* x_area, KView* y_area, KView* cell_x, KView* cell_y, KView* cell_dx, KView* cell_dy, KView* vertex_dx, KView* vertex_dy, KView* vertex_x, KView* vertex_y, KView* comms_buffer, Kokkos::View<double*>::HostMirror* host_comms_mirror, double** cg_alphas, double** cg_betas, double** cheby_alphas, double** cheby_betas) { print_and_log(settings, "Performing this solve with the Kokkos Functors %s solver\n", settings->solver_name); Kokkos::initialize(); new(density0) KView("density0", x*y); new(density) KView("density", x*y); new(energy0) KView("energy0", x*y); new(energy) KView("energy", x*y); new(u) KView("u", x*y); new(u0) KView("u0", x*y); new(p) KView("p", x*y); new(r) KView("r", x*y); new(mi) KView("mi", x*y); new(w) KView("w", x*y); new(kx) KView("kx", x*y); new(ky) KView("ky", x*y); new(sd) KView("sd", x*y); new(volume) KView("volume", x*y); new(x_area) KView("x_area", (x+1)*y); new(y_area) KView("y_area", x*(y+1)); new(cell_x) KView("cell_x", x); new(cell_y) KView("cell_y", y); new(cell_dx) KView("cell_dx", x); new(cell_dy) KView("cell_dy", y); new(vertex_dx) KView("vertex_dx", (x+1)); new(vertex_dy) KView("vertex_dy", (y+1)); new(vertex_x) KView("vertex_x", (x+1)); new(vertex_y) KView("vertex_y", (y+1)); new(comms_buffer) KView("comms_buffer", MAX(x, y)*settings->halo_depth); new(host_comms_mirror) KView::HostMirror(); *host_comms_mirror = Kokkos::create_mirror_view(*comms_buffer); allocate_buffer(cg_alphas, settings->max_iters, 1); allocate_buffer(cg_betas, settings->max_iters, 1); allocate_buffer(cheby_alphas, settings->max_iters, 1); allocate_buffer(cheby_betas, settings->max_iters, 1); }
// Allocates all of the field buffers void kernel_initialise( Settings* settings, int x, int y, double** density0, double** density, double** energy0, double** energy, double** u, double** u0, double** p, double** r, double** mi, double** w, double** kx, double** ky, double** sd, double** volume, double** x_area, double** y_area, double** cell_x, double** cell_y, double** cell_dx, double** cell_dy, double** vertex_dx, double** vertex_dy, double** vertex_x, double** vertex_y, double** cg_alphas, double** cg_betas, double** cheby_alphas, double** cheby_betas) { print_and_log(settings, "Performing this solve with the serial %s solver\n", settings->solver_name); allocate_buffer(density0, x, y); allocate_buffer(density, x, y); allocate_buffer(energy0, x, y); allocate_buffer(energy, x, y); allocate_buffer(u, x, y); allocate_buffer(u0, x, y); allocate_buffer(p, x, y); allocate_buffer(r, x, y); allocate_buffer(mi, x, y); allocate_buffer(w, x, y); allocate_buffer(kx, x, y); allocate_buffer(ky, x, y); allocate_buffer(sd, x, y); allocate_buffer(volume, x, y); allocate_buffer(x_area, x+1, y); allocate_buffer(y_area, x, y+1); allocate_buffer(cell_x, x, 1); allocate_buffer(cell_y, 1, y); allocate_buffer(cell_dx, x, 1); allocate_buffer(cell_dy, 1, y); allocate_buffer(vertex_dx, x+1, 1); allocate_buffer(vertex_dy, 1, y+1); allocate_buffer(vertex_x, x+1, 1); allocate_buffer(vertex_y, 1, y+1); allocate_buffer(cg_alphas, settings->max_iters, 1); allocate_buffer(cg_betas, settings->max_iters, 1); allocate_buffer(cheby_alphas, settings->max_iters, 1); allocate_buffer(cheby_betas, settings->max_iters, 1); }
// Performs a full solve with the Jacobi solver kernels void jacobi_driver( Chunk* chunks, Settings* settings, double rx, double ry, double* error) { jacobi_init_driver(chunks, settings, rx, ry); // Iterate till convergence int tt; for(tt = 0; tt < settings->max_iters; ++tt) { jacobi_main_step_driver(chunks, settings, tt, error); halo_update_driver(chunks, settings, 1); if(fabs(*error) < settings->eps) break; } print_and_log(settings, "Jacobi: \t\t%d iterations\n", tt); }
int main(int argc, char* argv[]) { int return_val = 0, err; // // Install signal handler to perform cleanup before exit. // signal(SIGINT, (sig_fn_ptr) sigcleanup); signal(SIGHUP, (sig_fn_ptr) sigcleanup); signal(SIGTERM, (sig_fn_ptr) sigcleanup); if (argc < 2) { printf("\n%s\nNo parameters supplied. Entering interactive mode.\n" "For help on command line params, use --help switch.\n" "Hit Control-C at any time to exit program.\n", copyright_banner); read_arguments(); } else { for (int i = 1; i < argc; ) { i += process_argument(argv[i], argv[i+1]); } } if (display_help) { print_usage(); return 0; } if (testcount < 0 || testcount > 10000) { fatal("\nError: Test count value %d is out of range (0 - 10000)\n\n", testcount); } if (verifypasses < 1 || verifypasses > 100) { fatal("\nError: Verification passes value %d is out of range (1 - 100)\n\n", verifypasses); } if (filesize_MB < 1 || filesize_MB > 16777216) { fatal("\nError: Test file size %d is out of range (1 - 16777216)\n\n", filesize_MB); } // // In many operating systems it is advantageous to do file I/O in units of pages, // using buffers which start on page boundaries. Therefore we shall make the buffer // size a multiple of the page size and a multiple of the unit of work (uint32_t), // while trying to keep it at least as large as the bufsize_Mbytes constant. // pagesize = getpagesize(); uint32_t base_size = pagesize * sizeof(uint32_t); bufsize = ((0x100000 * bufsize_Mbytes) / base_size) + 1; bufsize = bufsize * base_size; // // For the purposes of the rest of the program, the buffer size is the number of uint32_ts // in a buffer, not the number of bytes. // bufsize /= sizeof(uint32_t); // // Translate the file size from megabytes to uint32_ts. // filesize = (off_t) filesize_MB * (1048576 / sizeof(uint32_t)); // // Create the log file. // logfile = fopen(logfilename, "a"); if (logfile == NULL) { fatal("Error: Couldn't open log file (%s).\n", logfilename); } time_t t = time(NULL); fprintf(logfile, "\n============================================================\n" "%s\n" "Start time: %s\n", copyright_banner, ctime(&t)); fprintf(logfile, "Test parameters:\n\t" "Test file size is %d megabytes\n\t", filesize_MB); fprintf(logfile, "%d%s test iteration%s\n\t", testcount, (testcount == 0) ? (" (infinite loop)") : (""), (testcount == 1) ? ("") : ("s") ); fprintf(logfile, "%d verification pass%s per test iteration\n", verifypasses, (verifypasses == 1) ? ("") : ("es") ); // // Test loop. // for (uint32_t testnum = 1; (testnum <= testcount) || (testcount == 0); testnum++) { print_and_log("\nBeginning test iteration #%d; creating test file...", testnum); putchar('\n'); process_testfile(CREATE, testfilename, filesize, testnum); fprintf(logfile, " (done)\n"); for (uint32_t pass = 1; pass <= verifypasses; pass++) { printf("Verifying integrity of test file, pass #%d\n", pass); err = process_testfile(VERIFY, testfilename, filesize, testnum); fprintf(logfile, "Completed test #%d verification pass #%d (%s)\n", testnum, pass, (err) ? "FAIL" : "OK"); if (err) { printf("Warning, corruption detected! Check logfile for details.\n"); return_val = 1; if (halt_on_error) { print_and_log("\nHalting test.\n"); fclose(logfile); logfile = NULL; return 1; } } } } print_and_log("\nFinished testing.\n\n"); // if (remove(testfilename)) { // print_and_log("Warning: test file could not be deleted!\n\n"); // } fclose(logfile); return return_val; }