Exemple #1
0
void
Init_barracuda()
{
    id_times = rb_intern("times");
    id_new = rb_intern("new");
    id_to_s = rb_intern("to_s");
    id_data_type = rb_intern("data_type");
    id_buffer_data = rb_intern("buffer_data");

    rb_hTypes = rb_hash_new();
    rb_define_method(rb_mKernel, "Type", type_new, 1);
    types_hash_init();

    rb_mBarracuda = rb_define_module("Barracuda");
    rb_define_const(rb_mBarracuda, "VERSION",  rb_str_new2(VERSION_STRING));
    rb_define_const(rb_mBarracuda, "TYPES", rb_hTypes);

    rb_eProgramSyntaxError = rb_define_class_under(rb_mBarracuda, "SyntaxError", rb_eSyntaxError);
    rb_eOpenCLError = rb_define_class_under(rb_mBarracuda, "OpenCLError", rb_eStandardError);

    rb_cProgram = rb_define_class_under(rb_mBarracuda, "Program", rb_cObject);
    rb_define_alloc_func(rb_cProgram, program_s_allocate);
    rb_define_method(rb_cProgram, "initialize", program_initialize, -1);
    rb_define_method(rb_cProgram, "compile", program_compile, 1);
    rb_define_method(rb_cProgram, "method_missing", program_method_missing, -1);

    rb_cBuffer = rb_define_class_under(rb_mBarracuda, "Buffer", rb_cArray);
    rb_define_method(rb_cBuffer, "initialize", buffer_initialize, -1);
    rb_define_method(rb_cBuffer, "outvar", buffer_outvar, 0);
    rb_define_method(rb_cBuffer, "outvar?", buffer_is_outvar, 0);
    rb_define_method(rb_cBuffer, "mark_dirty", buffer_mark_dirty, 0);
    rb_define_method(rb_cBuffer, "dirty?", buffer_dirty, 0);

    rb_cType = rb_define_class_under(rb_mBarracuda, "Type", rb_cObject);
    rb_define_method(rb_cType, "initialize", type_initialize, 1);
    rb_define_method(rb_cType, "method_missing", type_method_missing, 1);
    rb_define_method(rb_cType, "object", type_object, 0);

    rb_define_method(rb_cArray, "outvar", array_to_outvar, 0);
    rb_define_method(rb_cObject, "to_type", object_to_type, 1);
    rb_define_method(rb_cFixnum, "to_type", fixnum_to_type, 1);
    rb_define_method(rb_cObject, "data_type", object_data_type_get, 0);
    rb_define_method(rb_cArray, "data_type", array_data_type_get, 0);
    rb_define_method(rb_cFixnum, "data_type", fixnum_data_type_get, 0);
    rb_define_method(rb_cBignum, "data_type", bignum_data_type_get, 0);
    rb_define_method(rb_cFloat, "data_type", float_data_type_get, 0);

    init_opencl();
}
Exemple #2
0
int main (int argc, char* const *argv)
{
  char filepath[MAXPATHLEN];
  filepath[0] = '\0';
  
  process_arguments(argc, argv, filepath);
  
  // Perform typical OpenCL setup in order to obtain a context and command
  // queue.
  init_opencl();
  
  // Check if the current architecture is compatible with the specified test options
  if (device_type == CL_DEVICE_TYPE_CPU)
  {
#if __LP64__
    if (is32bit)
      fprintf(stderr, "Warning: user specified the 'cpu32' option on the 64bit architecture.\n");
#else
    if (!is32bit)
      fprintf(stderr, "Warning: user specified the 'cpu64' option on the 32bit architecture.\n");
#endif
  }
  else if (device_type == CL_DEVICE_TYPE_GPU)
  {
    cl_int err;
    cl_uint address_bits = 0;
    err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(address_bits),
      &address_bits, NULL);
    
    if (!is32bit && (address_bits == 32))
      fprintf(stderr, "Warning: user specified the 'gpu64' option on the 32bit architecture.\n");
    else if (is32bit && (address_bits == 64))
      fprintf(stderr, "Warning: user specified the 'gpu32' option on the 64bit architecture.\n");
  }
  
  // Obtain a CL program and kernel from our pre-compiled bitcode file and
  // test it by running the kernel on some test data.
  create_program_from_bitcode(filepath);
  
  // Close everything down.
  shutdown_opencl();
  
  return 0;
}
Exemple #3
0
static PyObject *
pl_allocate_memory(PyObject *self, PyObject *args)
{
    PyObject *result = NULL;

    char format[4];
    format[0] = IndexFormatUnit;
    format[1] = IndexFormatUnit;
    format[2] = IndexFormatUnit;
    format[3] = '\0';

    if (PyArg_ParseTuple(args, format, &natoms, &nres, &nchains))
    {
        create_atoms_array(&atoms, natoms);
        create_coords_array(&coords, natoms);
        create_distances_array(&distances, natoms);
        create_distances_array(&invdistances, natoms);
        create_bool_array(&atommask, natoms);
        create_distmask_array(&distmask, natoms);
        set_atom_coords(atoms, coords, natoms);

        create_residues_array(&residues, nres);

        create_chains_array(&chains, nchains);

        send_system_to_modules(atoms, coords, distances, invdistances, atommask, distmask, natoms, residues, nres, chains, nchains);

        #ifdef __USE_OPENCL_CODEPATH__
        init_opencl(&clcontext, &cldevices, &clqueue); 
        set_opencl_memory(&clcoords, &cldistmask, &cldistances, clcontext, coords, distmask, distances, natoms);
        compile_opencl_kernels(&clprogram, &cldistkernel, &cldistsqkernel, &clinvdistkernel, clcontext);
        send_opencl_to_modules(&clprogram, &cldistkernel, &cldistsqkernel, &clinvdistkernel, &clcontext, &clqueue, &clcoords, &cldistmask, &cldistances);
        #endif

        result = Py_BuildValue("i", 0);
    }

    return result;
}
Exemple #4
0
void run_opencl_test(use_gpu){
  
  init_opencl(use_gpu);
  load_cl_kernels(&clData);
  allocate_cl_buffers(&clData);
  transfer_buffers_to_gpu();
   
  flush_cl_queue();
   
  run_cl_advect_density(&clData, dt);
   
  flush_cl_queue();
   
  transfer_buffers_to_cpu();
   
  flush_cl_queue();
   
  
//  printf("dens[%d] = %3.2f\n",IX(16,3,0),g_dens[IX(16,3,0)]);
//  
//  if(g_dens[IX(16,3,0)] > 0.0f)
//  {
//    printf("Success!!\n");
//  }
//
//  for (int i = 0; i < clData.n; ++i)
//  {
//    if(i == 112) {
//      int j = i*clData.dn;
//      printf("debug_data1[%d] = %3.2f, %3.2f, %3.2f, %3.2f\n",i,clData.debug_data1[j], clData.debug_data1[j+1], clData.debug_data1[j+2], clData.debug_data1[j+3]);
//    }
//    
//  }
   
  cleanup_cl(&clData);

  
}
Exemple #5
0
int
main(int argc, char **argv)
{
	size_t g_work_size, l_work_size;
	cl_int error;

	if (argc != 2) {
		fprintf(stderr, "Usage: worms matchlist\n");
		exit(EXIT_FAILURE);
	}

	init_opencl();
	load_round_configs(argv[1]);
	prepare_job_scenario();

	/* Start this processing scenario. */
	g_work_size = NUM_STREAM_PROCS;
	l_work_size = NUM_STREAM_PROCS;
	error = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, &g_work_size,
					&l_work_size, 0, NULL,
					&kernel_completion);
	check_error("enquing kernel", error);

	unsigned int output_data[256];
	error = clEnqueueReadBuffer(cmd_queue, output_buf, true, 0, 1024,
					output_data, 1, &kernel_completion,
					NULL);
	unsigned int i;
	for (i = 0; i < 256; i++) {
		printf("%d,", output_data[i]);
		if ((i % 8) == 7)
			printf("\n");
	}

	exit(EXIT_SUCCESS);
}
Exemple #6
0
    ortho() :
      pump_factor_(4) 
    {

//         glVertexPointer(4, GL_FLOAT, 0, ptr);
        //glEnableClientState( GL_VERTEX_ARRAY );
//         glColorPointer(

        //std::ifstream is( "cryistal-castle-hidden-ramp.txt" );
//         std::ifstream is( "house1.txt" );
        //std::ifstream is( "cryistal-castle-tree-wave.txt" );

//         assert( is.good() );
//         height_fields_ = crystal_bits::load_crystal(is, pump_factor_);
//         std::cout << "hf: " << height_fields_.size() << "\n";
//         
//         
//         
//         scene_static_.init_solid(height_fields_);
//         
        
//         scene_static_.init_solid_from_crystal(is, pump_factor_);
        

//         scene_static_.init_planes();

//         uint64_t scene_hash = scene_static_.hash();
//         
//         try {
//             std::ifstream is( "ff.bin" );
//             
//             
//             light_static_ = light_static( is, scene_hash );
//         } catch( std::runtime_error x ) {
//             
//             std::cerr << "load failed. recreating. error:\n" << x.what() << std::endl;
//             
//             light_static_ = setup_formfactors(scene_static_.planes(), scene_static_.solid());    
//         }
//         
//         if( !false ) {
//             std::ofstream os( "ff.bin" );
//             light_static_.write(os, scene_hash);
//         }
//         
//         
//         light_dynamic_ = light_dynamic(scene_static_.planes().size() );
        
        CL_OpenGLWindowDescription desc;
        desc.set_size( CL_Size( 1024, 768 ), true );

        desc.set_depth_size(16);
        //std::cout << "depth: " << desc.get_depth_size();
        
        wnd_ = CL_DisplayWindow(desc);

        CL_GraphicContext_GL gc = wnd_.get_gc();
//      //CL_Mat4f proj = CL_Mat4f::ortho( 0, 1024, 0, 768, 100, -100 );


        gc.set_active();
#ifdef TEST_OPENCL
        try {
            init_opencl();
        } catch( cl::Error x ) {
            
//             std::array<void*, 256> bt;
//             //void *bt[256];
//             
//             size_t size = backtrace( bt.data(), bt.size() );
//             char **strings = backtrace_symbols( bt.data(), size );
//             std::cout << "backtrace: " << size << "\n";
//             for( size_t i = 0; i < size; ++i ) {
//                 std::cout << i << " " << strings[i] << "\n";
//             }
//             free( strings );
            
            std::cerr << "opencl initialization failed\ncall: " << x.what() << "\nerror code: " << cl_str_error( x.err() ) << "\n";            
            throw;
        }
#endif
      //  throw 0;
        
        glMatrixMode(GL_PROJECTION);                        //hello

        
        

        CL_Mat4f proj = CL_Mat4f::perspective( 60, 1.5, 2, 200 );
//      CL_Mat4f proj = CL_Mat4f::ortho( -20.0 * pump_factor_, 20.0 * pump_factor_, -15.0 * pump_factor_, 15.0 * pump_factor_, 0, 200 );
        //CL_Mat4f proj = CL_Mat4f::ortho( -40, 40, -30, 30, 0, 200 );


        glLoadMatrixf( proj.matrix );


        //CL_Texture tex(gc, 64, 64 );


        struct texel {
            GLubyte col[3];
            GLubyte alpha;

            texel() {
                col[0] = 128;
                col[1] = 128;
                col[2] = 128;
                alpha = 255;

            }
        };

        std::array<texel,64 * 64> tex_data;

        glLightModelf(GL_LIGHT_MODEL_LOCAL_VIEWER, 0.0);
//      glGenTextures(1, &texName);
//      glBindTexture(GL_TEXTURE_2D, texName);
//      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
//      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
//      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
//      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
//      glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 64, 64, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_data.data());


//      gc.set_map_mode(cl_user_projection);
//      gc.set_projection(proj);
//
//      gc.flush_batcher();
//      glMatrixMode(GL_PROJECTION);


        glMatrixMode(GL_MODELVIEW);


        glEnable(GL_DEPTH_TEST);
        glDepthMask(GL_TRUE);
        glDepthFunc(GL_LESS);
        
        glEnable(GL_TEXTURE_2D);
        glShadeModel(GL_FLAT);
    }
Exemple #7
0
void runTimings(int use_gpu){
  int ntrips = 10;
  char device_name[256];
  
  timestamp_type time1, time2;
  
  ////////////////////////////////////////////////////
  ///GPU TIMINGS
  ////////////////////////////////////////////////////
  
  init_opencl(use_gpu);
  load_cl_kernels(&clData);
  allocate_cl_buffers(&clData);
  
 
  print_device_info_from_queue(clData.queue);
  get_device_name_from_queue(clData.queue, device_name, 256);
  
  transfer_buffers_to_gpu();
  
  double advectionVelocityTimeGPU, advectionDensityTimeGPU, divergenceTimeGPU, projectJacobiTimeGPU, projectCGTimeGPU, pressureApplyTimeGPU;

    
  transfer_buffers_to_gpu();

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_advect_velocity(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  advectionVelocityTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;



  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_calculate_divergence(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  divergenceTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  transfer_buffers_to_cpu();
  flush_cl_queue();
  
  //This needs ntrips different divergence matrices to get accurate timings.
  //This is because by the time the second time it is called it will detect
  //the system is solved and exit after one matrix
  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    transfer_cl_float_buffer_from_device(&clData,clData.buf_pressure,g_pressure,clData.n,true);
    transfer_cl_float_buffer_from_device(&clData,clData.buf_divergence,g_divergence,clData.n,true);
    
    run_cl_cg_no_mtx(&clData,g_pressure, g_divergence,  g_cg_r, g_cg_d, g_cg_q, clData.n, 10, 0.0001f);
    flush_cl_queue();
    
    transfer_cl_float_buffer_to_device(&clData,clData.buf_pressure,g_pressure,clData.n,true);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  projectCGTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;




  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    for(int i = 0; i < 20; ++i)
    {
      run_cl_pressure_solve(&clData, dt);
    }
  }
  flush_cl_queue();
  get_timestamp(&time2);
  projectJacobiTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;



  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_pressure_apply(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  pressureApplyTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_advect_density(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  advectionDensityTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU","Advection Velocity",advectionVelocityTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionVelocityTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU","Advection Density",advectionDensityTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionDensityTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU", "Divergence",divergenceTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/divergenceTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU", "Projection Jacobi",projectJacobiTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectJacobiTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t",device_name,NX,NY,NZ,"GPU", "Projection Conjugate Gradient",projectCGTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectCGTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU","Pressure Apply",pressureApplyTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/pressureApplyTimeGPU);
  

  cleanup_cl(&clData);
  
  
  
  
  ////////////////////////////////////////////////////
  ///CPU TIMINGS
  ////////////////////////////////////////////////////
  double advectionVelocityTimeCPU, advectionDensityTimeCPU, divergenceTimeCPU, projectJacobiTimeCPU, projectCGTimeCPU, pressureApplyTimeCPU;

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    advect_velocity_RK2(dt, g_u, g_v, g_w, g_u_prev, g_v_prev, g_w_prev);
  }
  get_timestamp(&time2);
  advectionVelocityTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  //project(dt,g_u,g_v, g_w, g_divergence, g_pressure, g_pressure_prev, g_laplacian_matrix,useCG);
  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    calculate_divergence(g_divergence, g_u, g_v, g_w, dt);
  }
  get_timestamp(&time2);
  divergenceTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  //This needs ntrips different divergence matrices to get accurate timings.
  //This is because by the time the second time it is called it will detect
  //the system is solved and exit after one matrix
  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    pressure_solve_cg_no_matrix(g_pressure, g_divergence, g_cg_r, g_cg_d, g_cg_q);
  }
  get_timestamp(&time2);
  projectCGTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    pressure_solve(g_pressure,g_pressure_prev, g_divergence, dt);
  }
  get_timestamp(&time2);
  projectJacobiTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    pressure_apply(g_u, g_v, g_w, g_pressure, dt);
  }
  get_timestamp(&time2);
  pressureApplyTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    advectRK2(dt,g_dens,g_dens_prev, g_u, g_v, g_w);
  }
  get_timestamp(&time2);
  advectionDensityTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Advection Velocity",advectionVelocityTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionVelocityTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Advection Density",advectionDensityTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionDensityTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Divergence",divergenceTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/divergenceTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Projection Jacobi",projectJacobiTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectJacobiTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Projection Conjugate Gradient",projectCGTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectCGTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Pressure Apply",pressureApplyTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/pressureApplyTimeCPU);
  
}
Exemple #8
0
int main ( int argc, char ** argv )
{
	// Parse command line options
    //
    int use_gpu = 1;
    int use_interop = 0;
    for(int i = 0; i < argc && argv; i++)
    {
        if(!argv[i])
            continue;
            
        if(strstr(argv[i], "cpu"))
            use_gpu = 0;        

        else if(strstr(argv[i], "gpu"))
            use_gpu = 1;

        else if(strstr(argv[i], "interop"))
            use_interop = 1;
    }

    printf("Parameter detect %s device (%s)\n",use_gpu==1?"GPU":"CPU",use_interop==1?"Share OpenGL":"Not Sharing OpenGL");

	OPENCL_SHARE_WITH_OPENGL = use_interop;

  	//testCG();
  	win_x = 512;
	win_y = 512;
  
  
	glutInit ( &argc, argv );
  
	open_glut_window ();
 
  //test_opencl_opengl_interop();
  
  
  dt = 0.1f;
  force = 10.0f;
  source = 10.0f;
	

	printf ( "\n\nHow to use this demo:\n\n" );
	printf ( "\t Add densities with the left mouse button\n" );
	printf ( "\t Add velocities with the left mouse button and dragging the mouse\n" );
	printf ( "\t Toggle density/velocity display with the 'v' key\n" );
	printf ( "\t Clear the simulation by pressing the 'x' key\n" );
  printf ( "\t switch poisson solvers from jacobi to conjugate gradient by pressing the 'c' key\n" );
  printf ( "\t switch advection scheme from RK2 to MacCormack by pressing the 'm' key\n" );
  printf ( "\t toggle vorticity confinement by pressing the 'o' key\n" );
  
	printf ( "\t Quit by pressing the 'q' key\n" );

	dvel = 0;

	step = 0;
	maccormack = 0;
	vorticity = 0;
  useCG = 0;
	
	if ( !allocate_data () ) exit ( 1 );
	clear_data ();
  

  
  //setupMatrix(g_laplacian_matrix);

//	FOR_EACH_FACE
//	{
//		//if(i < NX - NX*0.4 && i > NX*0.4 
//		//	&&
//		//   j < NY - NY*0.4 && j > NY*0.4 )
//		{
//			g_u_prev[IX(i,j,0)] =  -0.01 * cosf(3.14159 * 2.0 * i/NX);
//			g_v_prev[IX(i,j,0)] =  0.01 * sinf(3.14159 * 2.0 * j/NY);
//		}
//	}

#if RUN_TIMINGS
  runTimings(use_gpu);
  exit(0);
#endif
  
	copy_grid(g_u_prev, g_u);
	copy_grid(g_v_prev, g_v);
  
  g_dens_prev[IX(16,3,0)] = 10.0f;
  //g_u_prev[IX(16,3,0)] = 10.0f;
  
	/*
	calculate_divergence(g_divergence, g_u_prev, g_v_prev, g_w_prev, dt);
	pressure_solve(g_pressure,g_pressure_prev, g_divergence, dt);
	pressure_apply(g_u_prev, g_v_prev, g_w_prev, g_pressure, dt);
	//project(dt,g_u_prev,g_v_prev, g_w_prev, g_divergence, g_pressure, g_pressure_prev);
	SWAP(g_u_prev,g_u);
	SWAP(g_v_prev,g_v);
	SWAP(g_w_prev,g_w);

	if(!check_divergence(g_u_prev, g_v_prev, g_w_prev))
	{
		printf("Initial field wasn't divergence free!\n");
	}
	*/


//print_platforms_devices();
//  run_opencl_test(use_gpu);
  
//	run_tests();
   
  
#if USE_OPENCL
   init_opencl(use_gpu);
   load_cl_kernels(&clData);
   allocate_cl_buffers(&clData);
  
  
   transfer_buffers_to_gpu();
   
   flush_cl_queue();
#endif
   
   
  

	glutMainLoop ();

#if USE_OPENCL
   cleanup_cl(&clData);
#endif
  
	exit ( 0 );
}