Пример #1
0
// Performs a full solve with the PPCG solver
void ppcg_driver(Chunk* chunks, Settings* settings,
        double rx, double ry, double* error)
{
    int tt;
    double rro = 0.0;
    int num_ppcg_iters = 0;

    // Perform CG initialisation
    cg_init_driver(chunks, settings, rx, ry, &rro);

    // Iterate till convergence
    for(tt = 0; tt < settings->max_iters; ++tt)
    {
        // If we have already ran PPCG inner iterations, continue
        // If we are error switching, check the error
        // If not error switching, perform preset iterations
        // Perform enough iterations to converge eigenvalues
        bool is_switch_to_ppcg = (num_ppcg_iters) || (settings->error_switch
                ? (*error < settings->eps_lim) && (tt > CG_ITERS_FOR_EIGENVALUES)
                : (tt > settings->presteps) && (*error < ERROR_SWITCH_MAX));

        if(!is_switch_to_ppcg)
        {
            // Perform a CG iteration
            cg_main_step_driver(chunks, settings, tt, &rro, error);
        }
        else 
        {
            num_ppcg_iters++;

            // If first step perform initialisation
            if(num_ppcg_iters == 1)
            {
                // Initialise the eigenvalues and Chebyshev coefficients
                eigenvalue_driver_initialise(chunks, settings, tt);
                cheby_coef_driver(
                       chunks, settings, settings->ppcg_inner_steps);

                ppcg_init_driver(chunks, settings, &rro);
            }

            ppcg_main_step_driver(chunks, settings, &rro, error);
        }

        halo_update_driver(chunks, settings, 1);

        if(fabs(*error) < settings->eps) break;
    }

    print_and_log(settings, "CG: \t\t\t%d iterations\n", tt-num_ppcg_iters+1);
    print_and_log(settings, 
            "PPCG: \t\t\t%d iterations (%d inner iterations per)\n", 
            num_ppcg_iters, settings->ppcg_inner_steps);
}
Пример #2
0
// Allocates all of the field buffers
void kernel_initialise(
        Settings* settings, int x, int y, KView* density0, 
        KView* density, KView* energy0, KView* energy, KView* u, 
        KView* u0, KView* p, KView* r, KView* mi, 
        KView* w, KView* kx, KView* ky, KView* sd, 
        KView* volume, KView* x_area, KView* y_area, KView* cell_x, 
        KView* cell_y, KView* cell_dx, KView* cell_dy, KView* vertex_dx, 
        KView* vertex_dy, KView* vertex_x, KView* vertex_y, KView* comms_buffer,
        Kokkos::View<double*>::HostMirror* host_comms_mirror, 
        double** cg_alphas, double** cg_betas, double** cheby_alphas, 
        double** cheby_betas)
{
    print_and_log(settings,
            "Performing this solve with the Kokkos Functors %s solver\n",
            settings->solver_name);

    Kokkos::initialize();

    new(density0) KView("density0", x*y);
    new(density) KView("density", x*y);
    new(energy0) KView("energy0", x*y);
    new(energy) KView("energy", x*y);
    new(u) KView("u", x*y);
    new(u0) KView("u0", x*y);
    new(p) KView("p", x*y);
    new(r) KView("r", x*y);
    new(mi) KView("mi", x*y);
    new(w) KView("w", x*y);
    new(kx) KView("kx", x*y);
    new(ky) KView("ky", x*y);
    new(sd) KView("sd", x*y);
    new(volume) KView("volume", x*y);
    new(x_area) KView("x_area", (x+1)*y);
    new(y_area) KView("y_area", x*(y+1));
    new(cell_x) KView("cell_x", x);
    new(cell_y) KView("cell_y", y);
    new(cell_dx) KView("cell_dx", x);
    new(cell_dy) KView("cell_dy", y);
    new(vertex_dx) KView("vertex_dx", (x+1));
    new(vertex_dy) KView("vertex_dy", (y+1));
    new(vertex_x) KView("vertex_x", (x+1));
    new(vertex_y) KView("vertex_y", (y+1));

    new(comms_buffer) KView("comms_buffer", MAX(x, y)*settings->halo_depth);
    new(host_comms_mirror) KView::HostMirror(); 
    *host_comms_mirror = Kokkos::create_mirror_view(*comms_buffer);

    allocate_buffer(cg_alphas, settings->max_iters, 1);
    allocate_buffer(cg_betas, settings->max_iters, 1);
    allocate_buffer(cheby_alphas, settings->max_iters, 1);
    allocate_buffer(cheby_betas, settings->max_iters, 1);
}
Пример #3
0
// Allocates all of the field buffers
void kernel_initialise(
        Settings* settings, int x, int y, double** density0, 
        double** density, double** energy0, double** energy, double** u, 
        double** u0, double** p, double** r, double** mi, 
        double** w, double** kx, double** ky, double** sd, 
        double** volume, double** x_area, double** y_area, double** cell_x, 
        double** cell_y, double** cell_dx, double** cell_dy, double** vertex_dx, 
        double** vertex_dy, double** vertex_x, double** vertex_y, 
        double** cg_alphas, double** cg_betas, double** cheby_alphas, 
        double** cheby_betas)
{
    print_and_log(settings,
            "Performing this solve with the serial %s solver\n",
            settings->solver_name);

    allocate_buffer(density0, x, y);
    allocate_buffer(density, x, y);
    allocate_buffer(energy0, x, y);
    allocate_buffer(energy, x, y);
    allocate_buffer(u, x, y);
    allocate_buffer(u0, x, y);
    allocate_buffer(p, x, y);
    allocate_buffer(r, x, y);
    allocate_buffer(mi, x, y);
    allocate_buffer(w, x, y);
    allocate_buffer(kx, x, y);
    allocate_buffer(ky, x, y);
    allocate_buffer(sd, x, y);
    allocate_buffer(volume, x, y);
    allocate_buffer(x_area, x+1, y);
    allocate_buffer(y_area, x, y+1);
    allocate_buffer(cell_x, x, 1);
    allocate_buffer(cell_y, 1, y);
    allocate_buffer(cell_dx, x, 1);
    allocate_buffer(cell_dy, 1, y);
    allocate_buffer(vertex_dx, x+1, 1);
    allocate_buffer(vertex_dy, 1, y+1);
    allocate_buffer(vertex_x, x+1, 1);
    allocate_buffer(vertex_y, 1, y+1);
    allocate_buffer(cg_alphas, settings->max_iters, 1);
    allocate_buffer(cg_betas, settings->max_iters, 1);
    allocate_buffer(cheby_alphas, settings->max_iters, 1);
    allocate_buffer(cheby_betas, settings->max_iters, 1);
}
Пример #4
0
// Performs a full solve with the Jacobi solver kernels
void jacobi_driver(
        Chunk* chunks, Settings* settings, 
        double rx, double ry, double* error)
{
    jacobi_init_driver(chunks, settings, rx, ry);

    // Iterate till convergence
    int tt;
    for(tt = 0; tt < settings->max_iters; ++tt)
    {
        jacobi_main_step_driver(chunks, settings, tt, error);

        halo_update_driver(chunks, settings, 1);

        if(fabs(*error) < settings->eps) break;
    }

    print_and_log(settings, "Jacobi: \t\t%d iterations\n", tt);
}
Пример #5
0
int main(int argc, char* argv[])
{
	int return_val = 0, err;

	//
	// Install signal handler to perform cleanup before exit.
	//
	signal(SIGINT, (sig_fn_ptr) sigcleanup);
	signal(SIGHUP, (sig_fn_ptr) sigcleanup);
	signal(SIGTERM, (sig_fn_ptr) sigcleanup);

	if (argc < 2) {
		printf("\n%s\nNo parameters supplied.  Entering interactive mode.\n"
			   "For help on command line params, use --help switch.\n"
			   "Hit Control-C at any time to exit program.\n", copyright_banner);
		read_arguments();
	} else {
		for (int i = 1; i < argc; ) {
			i += process_argument(argv[i], argv[i+1]);
		}
	}

	if (display_help) {
		print_usage();
		return 0;
	}

	if (testcount < 0 || testcount > 10000) {
		fatal("\nError: Test count value %d is out of range (0 - 10000)\n\n", testcount);
	}
	if (verifypasses < 1 || verifypasses > 100) {
		fatal("\nError: Verification passes value %d is out of range (1 - 100)\n\n", verifypasses);
	}
	if (filesize_MB < 1 || filesize_MB > 16777216) {
		fatal("\nError: Test file size %d is out of range (1 - 16777216)\n\n", filesize_MB);
	}

	//
	// In many operating systems it is advantageous to do file I/O in units of pages,
	// using buffers which start on page boundaries.  Therefore we shall make the buffer
	// size a multiple of the page size and a multiple of the unit of work (uint32_t),
	// while trying to keep it at least as large as the bufsize_Mbytes constant.
	//
	pagesize = getpagesize();
	uint32_t base_size = pagesize * sizeof(uint32_t);
	bufsize = ((0x100000 * bufsize_Mbytes) / base_size) + 1;
	bufsize = bufsize * base_size;

	//
	// For the purposes of the rest of the program, the buffer size is the number of uint32_ts
	// in a buffer, not the number of bytes.
	//
	bufsize /= sizeof(uint32_t);

	//
	// Translate the file size from megabytes to uint32_ts.
	//
	filesize = (off_t) filesize_MB * (1048576 / sizeof(uint32_t));

	//
	// Create the log file.
	//
	logfile = fopen(logfilename, "a");
	if (logfile == NULL) {
		fatal("Error: Couldn't open log file (%s).\n", logfilename);
	}

	time_t t = time(NULL);

	fprintf(logfile,
			"\n============================================================\n"
			"%s\n"
			"Start time: %s\n",
			copyright_banner,
			ctime(&t));
	fprintf(logfile,
			"Test parameters:\n\t"
			"Test file size is %d megabytes\n\t",
			filesize_MB);
	fprintf(logfile,
			"%d%s test iteration%s\n\t",
			testcount,
			(testcount == 0) ? (" (infinite loop)") : (""),
			(testcount == 1) ? ("") : ("s") );
	fprintf(logfile,
			"%d verification pass%s per test iteration\n",
			verifypasses,
			(verifypasses == 1) ? ("") : ("es") );

	//
	// Test loop.
	//
	for (uint32_t testnum = 1; (testnum <= testcount) || (testcount == 0); testnum++) {
		print_and_log("\nBeginning test iteration #%d; creating test file...", testnum);
		putchar('\n');

		process_testfile(CREATE, testfilename, filesize, testnum);

		fprintf(logfile, " (done)\n");

		for (uint32_t pass = 1; pass <= verifypasses; pass++) {
			printf("Verifying integrity of test file, pass #%d\n", pass);

			err = process_testfile(VERIFY, testfilename, filesize, testnum);

			fprintf(logfile, "Completed test #%d verification pass #%d (%s)\n",
					testnum, pass, (err) ? "FAIL" : "OK");

			if (err) {
				printf("Warning, corruption detected!  Check logfile for details.\n");
				return_val = 1;

				if (halt_on_error) {
					print_and_log("\nHalting test.\n");
					fclose(logfile);
					logfile = NULL;
					return 1;
				}
			}
		}
	}

	print_and_log("\nFinished testing.\n\n");
	//	if (remove(testfilename)) {
	//		print_and_log("Warning: test file could not be deleted!\n\n");
	//	}

	fclose(logfile);

	return return_val;
}