Exemplo n.º 1
0
static void init_setup_tab( void )
{
   init_wg();
   init_wgs();
   init_wgt0();
   init_wgpt0();
   init_wgst0();
   init_wgspt0();
   init_wgf();
   init_wgfs();
   init_wgft0();
   init_wgfpt0();
   init_wgfst0();
   init_wgfspt0();
   init_t0();
   init_f();
   init_ft0();
   init_g();
   init_gs();
   init_gt0();
   init_gst0();
   init_gf();
   init_gfs();
   init_gft0();
   init_gfst0();
}
Exemplo n.º 2
0
void				ft_print_c(va_list *ap, t_format *f)
{
	t_ullong		arg;

	arg = va_arg(*ap, t_ullong);
	update_format(f);
	get_result(arg, f);
	init_f(f);
}
Exemplo n.º 3
0
static void init_setup_tab( void )
{
   init_wg();		/* pos + col */
   init_wgt0();		/* pos + col + tex0 */
   init_wgpt0();	/* pos + col + p-tex0 (?) */
   init_t0();		/* tex0 */
   init_g();		/* col */
   init_gt0();		/* col + tex */
   init_wgf();
   init_wgft0();
   init_wgfpt0();
   init_f();
   init_gf();
   init_gft0();
}
Exemplo n.º 4
0
static void init_setup_tab( void )
{
   init_wg();
   init_wgs();
   init_wgt0();
   init_wgt0t1();
   init_wgpt0();
   init_wgst0();
   init_wgst0t1();
   init_wgspt0();
   init_wgf();
   init_wgfs();
   init_wgft0();
   init_wgft0t1();
   init_wgfpt0();
   init_wgfst0();
   init_wgfst0t1();
   init_wgfspt0();
   init_t0();
   init_t0t1();
   init_f();
   init_ft0();
   init_ft0t1();
   init_g();
   init_gs();
   init_gt0();
   init_gt0t1();
   init_gst0();
   init_gst0t1();
   init_gf();
   init_gfs();
   init_gft0();
   init_gft0t1();
   init_gfst0();
   init_gfst0t1();
   /* Add proj texturing on t1 */
   init_wgpt0t1();
   init_wgspt0t1();
   init_wgfpt0t1();
   init_wgfspt0t1();
}
Exemplo n.º 5
0
int main()
{
  int enable_profiling = 0;
  #ifdef DO_TIMING
      enable_profiling = 1;
  #endif

  //print_platforms_devices();
  cl_context ctx;
  cl_command_queue queue;
  create_context_on("NVIDIA", NULL, 0, &ctx, &queue, enable_profiling);

  // --------------------------------------------------------------------------
  // load kernels
  // --------------------------------------------------------------------------
  // read the cl file

  char buf[100];
  sprintf(buf, "mg-kernel-ver%d.cl", VERSION);
  char *knl_text = read_file(buf);
  //get work group dimensions and gflop info.
  int wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt;
  if (sscanf(knl_text, "// workgroup: (%d,%d,%d) z_div:%d fetch_per_pt:%d flops_per_pt:%d", 
        &wg_x, &wg_y, &wg_z, &z_div, &fetch_per_pt, &flops_per_pt) == 6)
  {
    wg_dims = 3;
  }
  else if (sscanf(knl_text, "// workgroup: (%d,%d) fetch_per_pt:%d flops_per_pt:%d",
        &wg_x, &wg_y, &fetch_per_pt, &flops_per_pt) == 4)
  {
    wg_dims = 2;
    wg_z = -1;
    z_div = -1;
  }
  else
  {
    perror("reading workgroup spec");
    abort();
  }

  #ifdef USE_DOUBLE
  char *compile_opt = "-DFTYPE=double";
  #else
  char *compile_opt = "-DFTYPE=float";
  #endif

  // creation of the kernel
  cl_kernel poisson_knl = kernel_from_string(ctx, knl_text, "fd_update", compile_opt);
  free(knl_text); // my compiler complains about this one.  OJO!!
  // --------------------------------------------------------------------------
  // set up grid
  // --------------------------------------------------------------------------
  const unsigned points = POINTS;
  const ftype minus_bdry = -1, plus_bdry = 1;

  // We're dividing into (points-1) intervals.
  ftype dx = (plus_bdry-minus_bdry)/(points-1);

  // --------------------------------------------------------------------------
  // allocate and initialize CPU memory
  // --------------------------------------------------------------------------
  int use_alignment;
  unsigned dim_other = points; //if order 2 then 1 point extra on each side
  #ifdef USE_ALIGNMENT
  // adjusts dimension so that the next row starts in a number divisible by 16
  unsigned dim_x = ((dim_other + 15) / 16) * 16; 
  unsigned field_start = 0;
  use_alignment = 1; 
  #else
  unsigned dim_x = dim_other;
  unsigned field_start = 0;// this one puts me right at the beginning
  use_alignment = 0;
  #endif
  // --------Allocate forcing uexact, r and u vectors -------------------------
  const size_t field_size = 0+dim_x*dim_x*dim_x;  // extra large to fit the 2^n constrain in GPU
  ftype *f = malloc(field_size*sizeof(ftype));
  CHECK_SYS_ERROR(!f, "allocating f");
  ftype *u = malloc (field_size*sizeof(ftype));
  CHECK_SYS_ERROR(!u, "allocating u");  
  ftype *uexact = malloc (field_size*sizeof(ftype));
  CHECK_SYS_ERROR(!uexact, "allocating uexact");
  ftype *r = malloc(field_size * sizeof(ftype));
  CHECK_SYS_ERROR(!r, "allocating residual r");

  // --------------------------------------------------------------------------
  // initialize
  // --------------------------------------------------------------------------
    // zero out (necessary to initialize everything bec. I measure norms)
    for (size_t i = 0; i < field_size; ++i){
      f[i] = 0;
      u[i] = 0;
      uexact[i] = 0;
      r[i] = 0;
    }
    // set up the forcing field
    init_f (points, f, dx, field_start, dim_x, dim_other, minus_bdry);
    // Initialize u with initial boundary conditions
    init_u ( points, u , minus_bdry, plus_bdry, dx, field_start, dim_x, dim_other);
    // Initialize the exact solution
    init_uexact(points, u, uexact, dx, field_size, field_start, dim_x, dim_other);

    // --------------------------------------------------------------------------
    // Setup the v-cycles
    // --------------------------------------------------------------------------
  
    unsigned n1, n2, n3, ncycles;
    n1 = 50;
    n2 = 60;
    n3 = 1;
    ncycles = 2;
    ftype *sweeps = malloc (ncycles*sizeof(ftype));
    ftype *rnorm = malloc (ncycles*sizeof(ftype));
    ftype *enorm = malloc (ncycles*sizeof(ftype));
    ftype rtol = 1.0e-05;

    // Find the norm of the residual (choose your method)
    sweeps[0] =0;
    resid (r, f, u, dx, field_size, field_start, dim_x, dim_other);
    rnorm[0] = norm( r , field_size) * dx;
    U_error(u, uexact, r, field_size);
    enorm[0] = norm( r, field_size ) * dx;

    for(unsigned icycle = 1; icycle <= ncycles; icycle++){
       mgv(f, u, dx, n1, n2, n3, field_size, points, use_alignment, dim_x, ctx, queue, poisson_knl, wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt);  //update u through a v-cycle 
       sweeps[icycle] = sweeps[icycle -1] + (4 * (n1 + n2)/3);
       resid (r, f, u, dx, field_size, field_start, dim_x, dim_other);
       rnorm[icycle] = norm( r, field_size ) * dx;
       U_error(u, uexact, r, field_size);
       enorm[icycle] = norm( r, field_size ) * dx;
       //cfacts = (rnorm(icycle)/rnorm(icycle - 1))^(1 / (n1 + n2)) not necessary
       //disp something here if I want to.
       //printf("norm of the cycle %f", enorm[icycle]);
       if(rnorm[icycle] <= rtol * rnorm[0])
	  break;
    }
    #ifdef DO_TIMING
  printf(" ftype:%d ver:%d align:%d pts:%d\tgflops:%.1f\tmcells:%.1f\tgbytes:%.1f [/sec]\tout_gflops:%.6f\n", (int) sizeof(ftype), VERSION, use_alignment, points, gflops_performed/seconds_taken, mcells_updated/seconds_taken, gbytes_accessed/seconds_taken, gflops_performed/tot_secs);
#endif
  // --------------------------------------------------------------------------
  // clean up
  // --------------------------------------------------------------------------
  CALL_CL_GUARDED(clReleaseKernel, (poisson_knl));
  CALL_CL_GUARDED(clReleaseCommandQueue, (queue));
  CALL_CL_GUARDED(clReleaseContext, (ctx));
}
Exemplo n.º 6
0
void Lifter::init() {
    init_f("Lifter");
}
Exemplo n.º 7
0
void Tracer::init() {
  init_f("Tracer");
}