static void init_setup_tab( void ) { init_wg(); init_wgs(); init_wgt0(); init_wgpt0(); init_wgst0(); init_wgspt0(); init_wgf(); init_wgfs(); init_wgft0(); init_wgfpt0(); init_wgfst0(); init_wgfspt0(); init_t0(); init_f(); init_ft0(); init_g(); init_gs(); init_gt0(); init_gst0(); init_gf(); init_gfs(); init_gft0(); init_gfst0(); }
void ft_print_c(va_list *ap, t_format *f) { t_ullong arg; arg = va_arg(*ap, t_ullong); update_format(f); get_result(arg, f); init_f(f); }
static void init_setup_tab( void ) { init_wg(); /* pos + col */ init_wgt0(); /* pos + col + tex0 */ init_wgpt0(); /* pos + col + p-tex0 (?) */ init_t0(); /* tex0 */ init_g(); /* col */ init_gt0(); /* col + tex */ init_wgf(); init_wgft0(); init_wgfpt0(); init_f(); init_gf(); init_gft0(); }
static void init_setup_tab( void ) { init_wg(); init_wgs(); init_wgt0(); init_wgt0t1(); init_wgpt0(); init_wgst0(); init_wgst0t1(); init_wgspt0(); init_wgf(); init_wgfs(); init_wgft0(); init_wgft0t1(); init_wgfpt0(); init_wgfst0(); init_wgfst0t1(); init_wgfspt0(); init_t0(); init_t0t1(); init_f(); init_ft0(); init_ft0t1(); init_g(); init_gs(); init_gt0(); init_gt0t1(); init_gst0(); init_gst0t1(); init_gf(); init_gfs(); init_gft0(); init_gft0t1(); init_gfst0(); init_gfst0t1(); /* Add proj texturing on t1 */ init_wgpt0t1(); init_wgspt0t1(); init_wgfpt0t1(); init_wgfspt0t1(); }
int main() { int enable_profiling = 0; #ifdef DO_TIMING enable_profiling = 1; #endif //print_platforms_devices(); cl_context ctx; cl_command_queue queue; create_context_on("NVIDIA", NULL, 0, &ctx, &queue, enable_profiling); // -------------------------------------------------------------------------- // load kernels // -------------------------------------------------------------------------- // read the cl file char buf[100]; sprintf(buf, "mg-kernel-ver%d.cl", VERSION); char *knl_text = read_file(buf); //get work group dimensions and gflop info. int wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt; if (sscanf(knl_text, "// workgroup: (%d,%d,%d) z_div:%d fetch_per_pt:%d flops_per_pt:%d", &wg_x, &wg_y, &wg_z, &z_div, &fetch_per_pt, &flops_per_pt) == 6) { wg_dims = 3; } else if (sscanf(knl_text, "// workgroup: (%d,%d) fetch_per_pt:%d flops_per_pt:%d", &wg_x, &wg_y, &fetch_per_pt, &flops_per_pt) == 4) { wg_dims = 2; wg_z = -1; z_div = -1; } else { perror("reading workgroup spec"); abort(); } #ifdef USE_DOUBLE char *compile_opt = "-DFTYPE=double"; #else char *compile_opt = "-DFTYPE=float"; #endif // creation of the kernel cl_kernel poisson_knl = kernel_from_string(ctx, knl_text, "fd_update", compile_opt); free(knl_text); // my compiler complains about this one. OJO!! // -------------------------------------------------------------------------- // set up grid // -------------------------------------------------------------------------- const unsigned points = POINTS; const ftype minus_bdry = -1, plus_bdry = 1; // We're dividing into (points-1) intervals. ftype dx = (plus_bdry-minus_bdry)/(points-1); // -------------------------------------------------------------------------- // allocate and initialize CPU memory // -------------------------------------------------------------------------- int use_alignment; unsigned dim_other = points; //if order 2 then 1 point extra on each side #ifdef USE_ALIGNMENT // adjusts dimension so that the next row starts in a number divisible by 16 unsigned dim_x = ((dim_other + 15) / 16) * 16; unsigned field_start = 0; use_alignment = 1; #else unsigned dim_x = dim_other; unsigned field_start = 0;// this one puts me right at the beginning use_alignment = 0; #endif // --------Allocate forcing uexact, r and u vectors ------------------------- const size_t field_size = 0+dim_x*dim_x*dim_x; // extra large to fit the 2^n constrain in GPU ftype *f = malloc(field_size*sizeof(ftype)); CHECK_SYS_ERROR(!f, "allocating f"); ftype *u = malloc (field_size*sizeof(ftype)); CHECK_SYS_ERROR(!u, "allocating u"); ftype *uexact = malloc (field_size*sizeof(ftype)); CHECK_SYS_ERROR(!uexact, "allocating uexact"); ftype *r = malloc(field_size * sizeof(ftype)); CHECK_SYS_ERROR(!r, "allocating residual r"); // -------------------------------------------------------------------------- // initialize // -------------------------------------------------------------------------- // zero out (necessary to initialize everything bec. I measure norms) for (size_t i = 0; i < field_size; ++i){ f[i] = 0; u[i] = 0; uexact[i] = 0; r[i] = 0; } // set up the forcing field init_f (points, f, dx, field_start, dim_x, dim_other, minus_bdry); // Initialize u with initial boundary conditions init_u ( points, u , minus_bdry, plus_bdry, dx, field_start, dim_x, dim_other); // Initialize the exact solution init_uexact(points, u, uexact, dx, field_size, field_start, dim_x, dim_other); // -------------------------------------------------------------------------- // Setup the v-cycles // -------------------------------------------------------------------------- unsigned n1, n2, n3, ncycles; n1 = 50; n2 = 60; n3 = 1; ncycles = 2; ftype *sweeps = malloc (ncycles*sizeof(ftype)); ftype *rnorm = malloc (ncycles*sizeof(ftype)); ftype *enorm = malloc (ncycles*sizeof(ftype)); ftype rtol = 1.0e-05; // Find the norm of the residual (choose your method) sweeps[0] =0; resid (r, f, u, dx, field_size, field_start, dim_x, dim_other); rnorm[0] = norm( r , field_size) * dx; U_error(u, uexact, r, field_size); enorm[0] = norm( r, field_size ) * dx; for(unsigned icycle = 1; icycle <= ncycles; icycle++){ mgv(f, u, dx, n1, n2, n3, field_size, points, use_alignment, dim_x, ctx, queue, poisson_knl, wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt); //update u through a v-cycle sweeps[icycle] = sweeps[icycle -1] + (4 * (n1 + n2)/3); resid (r, f, u, dx, field_size, field_start, dim_x, dim_other); rnorm[icycle] = norm( r, field_size ) * dx; U_error(u, uexact, r, field_size); enorm[icycle] = norm( r, field_size ) * dx; //cfacts = (rnorm(icycle)/rnorm(icycle - 1))^(1 / (n1 + n2)) not necessary //disp something here if I want to. //printf("norm of the cycle %f", enorm[icycle]); if(rnorm[icycle] <= rtol * rnorm[0]) break; } #ifdef DO_TIMING printf(" ftype:%d ver:%d align:%d pts:%d\tgflops:%.1f\tmcells:%.1f\tgbytes:%.1f [/sec]\tout_gflops:%.6f\n", (int) sizeof(ftype), VERSION, use_alignment, points, gflops_performed/seconds_taken, mcells_updated/seconds_taken, gbytes_accessed/seconds_taken, gflops_performed/tot_secs); #endif // -------------------------------------------------------------------------- // clean up // -------------------------------------------------------------------------- CALL_CL_GUARDED(clReleaseKernel, (poisson_knl)); CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); CALL_CL_GUARDED(clReleaseContext, (ctx)); }
void Lifter::init() { init_f("Lifter"); }
void Tracer::init() { init_f("Tracer"); }