int main (int argc, const char * argv[]) {
	printf("4. OpenCL Profile No Overhead in the Loop\n");

	float range = BIG_RANGE;
	float *in, *out;
		
	// ======== Initialize
	init_all_perfs();
	create_data(&in, &out);
	start_perf_measurement(&total_perf);
	
	// ======== Setup OpenCL
	setup_cl(argc, argv, &opencl_device, &opencl_context, &opencl_queue);
		
	// ======== Setup the computation
	setup_cl_compute();
	start_perf_measurement(&write_perf);
	copy_data_to_device(in, out);
	stop_perf_measurement(&write_perf);	
	
	// ======== Compute
	while (range > LIMIT) {

		// Calculation
		start_perf_measurement(&update_perf);
		update_cl(get_in_buffer(), get_out_buffer());
		stop_perf_measurement(&update_perf);

		// Read back the data
		start_perf_measurement(&read_perf);
		read_back_data(get_out_buffer(), out);
		stop_perf_measurement(&read_perf);
		
		// Compute Range
		start_perf_measurement(&range_perf);
		range = find_range(out, SIZE*SIZE);
		stop_perf_measurement(&range_perf);
		
		iterations++;

		printf("Iteration %d, range=%f.\n", iterations, range);
	}	
	
	// ======== Finish and cleanup OpenCL
	start_perf_measurement(&finish_perf);
	clFinish(opencl_queue);
	stop_perf_measurement(&finish_perf);
		
	start_perf_measurement(&cleanup_perf);
	cleanup_cl();
	stop_perf_measurement(&cleanup_perf);
	
	stop_perf_measurement(&total_perf);
	print_perfs();
	
	free(in);
	free(out);
}
Пример #2
0
void run_opencl_test(use_gpu){
  
  init_opencl(use_gpu);
  load_cl_kernels(&clData);
  allocate_cl_buffers(&clData);
  transfer_buffers_to_gpu();
   
  flush_cl_queue();
   
  run_cl_advect_density(&clData, dt);
   
  flush_cl_queue();
   
  transfer_buffers_to_cpu();
   
  flush_cl_queue();
   
  
//  printf("dens[%d] = %3.2f\n",IX(16,3,0),g_dens[IX(16,3,0)]);
//  
//  if(g_dens[IX(16,3,0)] > 0.0f)
//  {
//    printf("Success!!\n");
//  }
//
//  for (int i = 0; i < clData.n; ++i)
//  {
//    if(i == 112) {
//      int j = i*clData.dn;
//      printf("debug_data1[%d] = %3.2f, %3.2f, %3.2f, %3.2f\n",i,clData.debug_data1[j], clData.debug_data1[j+1], clData.debug_data1[j+2], clData.debug_data1[j+3]);
//    }
//    
//  }
   
  cleanup_cl(&clData);

  
}
Пример #3
0
void runTimings(int use_gpu){
  int ntrips = 10;
  char device_name[256];
  
  timestamp_type time1, time2;
  
  ////////////////////////////////////////////////////
  ///GPU TIMINGS
  ////////////////////////////////////////////////////
  
  init_opencl(use_gpu);
  load_cl_kernels(&clData);
  allocate_cl_buffers(&clData);
  
 
  print_device_info_from_queue(clData.queue);
  get_device_name_from_queue(clData.queue, device_name, 256);
  
  transfer_buffers_to_gpu();
  
  double advectionVelocityTimeGPU, advectionDensityTimeGPU, divergenceTimeGPU, projectJacobiTimeGPU, projectCGTimeGPU, pressureApplyTimeGPU;

    
  transfer_buffers_to_gpu();

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_advect_velocity(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  advectionVelocityTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;



  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_calculate_divergence(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  divergenceTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  transfer_buffers_to_cpu();
  flush_cl_queue();
  
  //This needs ntrips different divergence matrices to get accurate timings.
  //This is because by the time the second time it is called it will detect
  //the system is solved and exit after one matrix
  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    transfer_cl_float_buffer_from_device(&clData,clData.buf_pressure,g_pressure,clData.n,true);
    transfer_cl_float_buffer_from_device(&clData,clData.buf_divergence,g_divergence,clData.n,true);
    
    run_cl_cg_no_mtx(&clData,g_pressure, g_divergence,  g_cg_r, g_cg_d, g_cg_q, clData.n, 10, 0.0001f);
    flush_cl_queue();
    
    transfer_cl_float_buffer_to_device(&clData,clData.buf_pressure,g_pressure,clData.n,true);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  projectCGTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;




  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    for(int i = 0; i < 20; ++i)
    {
      run_cl_pressure_solve(&clData, dt);
    }
  }
  flush_cl_queue();
  get_timestamp(&time2);
  projectJacobiTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;



  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_pressure_apply(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  pressureApplyTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    run_cl_advect_density(&clData, dt);
  }
  flush_cl_queue();
  get_timestamp(&time2);
  advectionDensityTimeGPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU","Advection Velocity",advectionVelocityTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionVelocityTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU","Advection Density",advectionDensityTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionDensityTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU", "Divergence",divergenceTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/divergenceTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU", "Projection Jacobi",projectJacobiTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectJacobiTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t",device_name,NX,NY,NZ,"GPU", "Projection Conjugate Gradient",projectCGTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectCGTimeGPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"GPU","Pressure Apply",pressureApplyTimeGPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/pressureApplyTimeGPU);
  

  cleanup_cl(&clData);
  
  
  
  
  ////////////////////////////////////////////////////
  ///CPU TIMINGS
  ////////////////////////////////////////////////////
  double advectionVelocityTimeCPU, advectionDensityTimeCPU, divergenceTimeCPU, projectJacobiTimeCPU, projectCGTimeCPU, pressureApplyTimeCPU;

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    advect_velocity_RK2(dt, g_u, g_v, g_w, g_u_prev, g_v_prev, g_w_prev);
  }
  get_timestamp(&time2);
  advectionVelocityTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  //project(dt,g_u,g_v, g_w, g_divergence, g_pressure, g_pressure_prev, g_laplacian_matrix,useCG);
  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    calculate_divergence(g_divergence, g_u, g_v, g_w, dt);
  }
  get_timestamp(&time2);
  divergenceTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  //This needs ntrips different divergence matrices to get accurate timings.
  //This is because by the time the second time it is called it will detect
  //the system is solved and exit after one matrix
  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    pressure_solve_cg_no_matrix(g_pressure, g_divergence, g_cg_r, g_cg_d, g_cg_q);
  }
  get_timestamp(&time2);
  projectCGTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    pressure_solve(g_pressure,g_pressure_prev, g_divergence, dt);
  }
  get_timestamp(&time2);
  projectJacobiTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;

  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    pressure_apply(g_u, g_v, g_w, g_pressure, dt);
  }
  get_timestamp(&time2);
  pressureApplyTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  get_timestamp(&time1);
  for(int i = 0; i < ntrips; ++i)
  {
    advectRK2(dt,g_dens,g_dens_prev, g_u, g_v, g_w);
  }
  get_timestamp(&time2);
  advectionDensityTimeCPU = timestamp_diff_in_seconds(time1,time2)/ntrips;


  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Advection Velocity",advectionVelocityTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionVelocityTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Advection Density",advectionDensityTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/advectionDensityTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Divergence",divergenceTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/divergenceTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Projection Jacobi",projectJacobiTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectJacobiTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Projection Conjugate Gradient",projectCGTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/projectCGTimeCPU);
  
  printf("%s\t%dx%dx%d\t%s\t%s\t %3.6f\ts\t", device_name,NX,NY,NZ,"CPU","Pressure Apply",pressureApplyTimeCPU);
  printf("%.3f\tMegaCells/s\n",(NX*NY*NZ)*1e-6/pressureApplyTimeCPU);
  
}
Пример #4
0
int main ( int argc, char ** argv )
{
	// Parse command line options
    //
    int use_gpu = 1;
    int use_interop = 0;
    for(int i = 0; i < argc && argv; i++)
    {
        if(!argv[i])
            continue;
            
        if(strstr(argv[i], "cpu"))
            use_gpu = 0;        

        else if(strstr(argv[i], "gpu"))
            use_gpu = 1;

        else if(strstr(argv[i], "interop"))
            use_interop = 1;
    }

    printf("Parameter detect %s device (%s)\n",use_gpu==1?"GPU":"CPU",use_interop==1?"Share OpenGL":"Not Sharing OpenGL");

	OPENCL_SHARE_WITH_OPENGL = use_interop;

  	//testCG();
  	win_x = 512;
	win_y = 512;
  
  
	glutInit ( &argc, argv );
  
	open_glut_window ();
 
  //test_opencl_opengl_interop();
  
  
  dt = 0.1f;
  force = 10.0f;
  source = 10.0f;
	

	printf ( "\n\nHow to use this demo:\n\n" );
	printf ( "\t Add densities with the left mouse button\n" );
	printf ( "\t Add velocities with the left mouse button and dragging the mouse\n" );
	printf ( "\t Toggle density/velocity display with the 'v' key\n" );
	printf ( "\t Clear the simulation by pressing the 'x' key\n" );
  printf ( "\t switch poisson solvers from jacobi to conjugate gradient by pressing the 'c' key\n" );
  printf ( "\t switch advection scheme from RK2 to MacCormack by pressing the 'm' key\n" );
  printf ( "\t toggle vorticity confinement by pressing the 'o' key\n" );
  
	printf ( "\t Quit by pressing the 'q' key\n" );

	dvel = 0;

	step = 0;
	maccormack = 0;
	vorticity = 0;
  useCG = 0;
	
	if ( !allocate_data () ) exit ( 1 );
	clear_data ();
  

  
  //setupMatrix(g_laplacian_matrix);

//	FOR_EACH_FACE
//	{
//		//if(i < NX - NX*0.4 && i > NX*0.4 
//		//	&&
//		//   j < NY - NY*0.4 && j > NY*0.4 )
//		{
//			g_u_prev[IX(i,j,0)] =  -0.01 * cosf(3.14159 * 2.0 * i/NX);
//			g_v_prev[IX(i,j,0)] =  0.01 * sinf(3.14159 * 2.0 * j/NY);
//		}
//	}

#if RUN_TIMINGS
  runTimings(use_gpu);
  exit(0);
#endif
  
	copy_grid(g_u_prev, g_u);
	copy_grid(g_v_prev, g_v);
  
  g_dens_prev[IX(16,3,0)] = 10.0f;
  //g_u_prev[IX(16,3,0)] = 10.0f;
  
	/*
	calculate_divergence(g_divergence, g_u_prev, g_v_prev, g_w_prev, dt);
	pressure_solve(g_pressure,g_pressure_prev, g_divergence, dt);
	pressure_apply(g_u_prev, g_v_prev, g_w_prev, g_pressure, dt);
	//project(dt,g_u_prev,g_v_prev, g_w_prev, g_divergence, g_pressure, g_pressure_prev);
	SWAP(g_u_prev,g_u);
	SWAP(g_v_prev,g_v);
	SWAP(g_w_prev,g_w);

	if(!check_divergence(g_u_prev, g_v_prev, g_w_prev))
	{
		printf("Initial field wasn't divergence free!\n");
	}
	*/


//print_platforms_devices();
//  run_opencl_test(use_gpu);
  
//	run_tests();
   
  
#if USE_OPENCL
   init_opencl(use_gpu);
   load_cl_kernels(&clData);
   allocate_cl_buffers(&clData);
  
  
   transfer_buffers_to_gpu();
   
   flush_cl_queue();
#endif
   
   
  

	glutMainLoop ();

#if USE_OPENCL
   cleanup_cl(&clData);
#endif
  
	exit ( 0 );
}