コード例 #1
0
ファイル: mvt.cpp プロジェクト: EwanC/OCL_Visualiser
int main(void) {
  DATA_TYPE *a;
  DATA_TYPE *x1;
  DATA_TYPE *x2;
  DATA_TYPE *x1_outputFromGpu;
  DATA_TYPE *x2_outputFromGpu;
  DATA_TYPE *y_1;
  DATA_TYPE *y_2;

  /////////////////////////
  size_t oldSizes[1] = { N };
  size_t newSizes[1];
  getNewSizes(oldSizes, NULL, newSizes, NULL, "mvt_kernel1", 1);
  N = newSizes[0];
  /////////////////////////

  a = (DATA_TYPE *)malloc(N * N * sizeof(DATA_TYPE));
  x1 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  x2 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  x1_outputFromGpu = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  x2_outputFromGpu = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  y_1 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));
  y_2 = (DATA_TYPE *)malloc(N * sizeof(DATA_TYPE));

  init_arrays(a, x1, x2, y_1, y_2);

  platform = new Platform(PLATFORM_ID);
  context = platform->getContext();
  Device device = platform->getDevice(DEVICE_ID);
  Queue queue(*context,device,Queue::EnableProfiling); 
  
  cl_mem_init(a, x1, x2, y_1, y_2,queue);
  
  Program program(context,KERNEL_DIRECTORY KERNEL_FILE_NAME);
  if(!program.build(device)){
           std::cout << "Error building the program: \n";
           std::cout <<program.getBuildLog(device); 
  }
  kernel1=program.createKernel(kernel1Name.c_str());
  kernel2=program.createKernel(kernel2Name.c_str());
  cl_launch_kernel(queue);


  queue.readBuffer(*x1_mem_obj,N * sizeof(DATA_TYPE), x1_outputFromGpu);
  queue.readBuffer(*x2_mem_obj,N * sizeof(DATA_TYPE), x2_outputFromGpu);
  queue.finish();

  runMvt(a, x1, x2, y_1, y_2, x1_outputFromGpu,x2_outputFromGpu);
  cl_clean_up();

  free(a);
  free(x1);
  free(x2);
  free(x1_outputFromGpu);
  free(x2_outputFromGpu);
  free(y_1);
  free(y_2);

  return 0;
}
コード例 #2
0
ファイル: mvt.c プロジェクト: lnangong/polybenchGpu
int main()
{
	double t_start, t_end;
	
	DATA_TYPE a[N][N];
	DATA_TYPE x1[N];
	DATA_TYPE x1_outputFromGpu[N];
	DATA_TYPE x2[N];
	DATA_TYPE x2_outputFromGpu[N];
	DATA_TYPE y1[N];
	DATA_TYPE y2[N];


	//initialize the arrays for running on the CPU and GPU
    	init_array(a, x1, x1_outputFromGpu, x2, x2_outputFromGpu, y1, y2);

	#pragma hmpp mvt allocate

	#pragma hmpp mvt advancedload, args[a,x1,x2,y1,y2]

	t_start = rtclock();
	
	//run the algorithm on the GPU
	#pragma hmpp mvt callsite, args[x1,x2].advancedload=true, asynchronous
	runMvt(a, x1_outputFromGpu, x2_outputFromGpu, y1, y2); // parameters are initialized in decls.h and are initialized with init_array()

	#pragma hmpp mvt synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lf\n", t_end - t_start);
	
	#pragma hmpp mvt delegatedstore, args[x1,x2]

	#pragma hmpp mvt release

	t_start = rtclock();
	
	//run the algorithm on the CPU
	runMvt(a, x1, x2, y1, y2);
	
	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lf\n", t_end - t_start);
	
	compareResults(x1, x1_outputFromGpu, x2, x2_outputFromGpu);

	return 0;
}
コード例 #3
0
ファイル: mvt.c プロジェクト: rcfsousa/Polybench_OpenMP
int main(void) 
{
#ifdef ALOCACAO_NORMAL
    printf(">>>>>>>>>Versao Offload<<<<<<<<<<<\n");
#else
    printf(">>>>>>>>>Versao Memoria Compartilhada<<<<<<<<<<<\n");
#endif
    double t_start, t_end;
    double t_start_init, t_end_init;
    double t_start_init_off, t_end_init_off;
    double t_offload_start, t_offload_end;
    double total_kernel;
    int i;

#ifdef ALOCACAO_NORMAL
    a = (DATA_TYPE*)malloc(N*N*sizeof(DATA_TYPE));
    x1_outputFromGpu = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE));
    x2_outputFromGpu = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE));
    y_1 = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE));
    y_2 = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE));
#endif
    x1 = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE));
    x2 = (DATA_TYPE*)malloc(N*sizeof(DATA_TYPE));

    t_start_init = rtclock();
    read_cl_file();
    t_end_init = rtclock();
    tmp_read_cl_file = t_end_init - t_start_init;
    total_kernel = t_end_init - t_start_init;

    t_start_init = rtclock();
#ifndef MALI
    cl_initialization();
#else
    cl_initialization_Mali();
#endif
    t_end_init = rtclock();
    tmp_cl_initialization = t_end_init - t_start_init;
    total_kernel = t_end_init - t_start_init;

    t_start_init = rtclock();
    cl_mem_init();
    t_end_init = rtclock();
    tmp_cl_mem_init= t_end_init - t_start_init;
    total_kernel += t_end_init - t_start_init;

    //Está dentro da função a contagem
    init();


    //------------GPU---------------
    //Inicia tempo GPU	

#ifdef ALOCACAO_NORMAL
    t_start_init_off = rtclock();
    errcode = clEnqueueWriteBuffer(clCommandQue, a_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * N * N, a, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, x1_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * N, x1, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, x2_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * N, x2, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, y1_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * N, y_1, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, y2_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * N, y_2, 0, NULL, NULL);
    if(errcode != CL_SUCCESS) printf("Error in writing buffers\n");
    t_end_init_off = rtclock();
    tmp_clEnqueueWriteBuffer += t_end_init_off - t_start_init_off;
    total_kernel += t_end_init_off - t_start_init_off;
#endif

    t_start_init = rtclock();
    cl_load_prog();
    t_end_init = rtclock();
    tmp_cl_load_prog= t_end_init - t_start_init;
    total_kernel += t_end_init - t_start_init;

    t_start_init = rtclock();
    cl_launch_kernel();
    t_end_init = rtclock();
    tmp_cl_launch_kernel += t_end_init - t_start_init;
    total_kernel += t_end_init - t_start_init;

#ifdef ALOCACAO_NORMAL
    t_start_init_off = rtclock();
    errcode = clEnqueueReadBuffer(clCommandQue, x1_mem_obj, CL_TRUE, 0, N*sizeof(DATA_TYPE), x1_outputFromGpu, 0, NULL, NULL);
    errcode = clEnqueueReadBuffer(clCommandQue, x2_mem_obj, CL_TRUE, 0, N*sizeof(DATA_TYPE), x2_outputFromGpu, 0, NULL, NULL);
    if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");   
    t_end_init_off = rtclock();
    tmp_clEnqueueReadBuffer += t_end_init_off - t_start_init_off; 
    total_kernel += t_end_init_off - t_start_init_off;
#endif

    //--------------CPU------------------

    t_start = rtclock();
    runMvt();
    t_end = rtclock(); 
    tmp_serial = t_end - t_start;  


    compareResults(x1, x1_outputFromGpu, x2, x2_outputFromGpu);

    t_start_init_off = rtclock();
    cl_clean_up();
    t_end_init_off = rtclock();
    tmp_cl_clean_up+=t_end_init_off - t_start_init_off; 
    total_kernel += t_end_init_off - t_start_init_off;


    free(x1);
    free(x2);
#ifdef ALOCACAO_NORMAL
    free(a);
    free(x1_outputFromGpu);
    free(x2_outputFromGpu);
    free(y_1);
    free(y_2);
#endif

    printf("\n-------RESULTS-------\n");
    printf("Sizes N=%d\n\n", N);

    printf("read_cl_file -------------> %lf\n", tmp_read_cl_file);
    printf("cl_initialization --------> %lf\n", tmp_cl_initialization);
    printf("cl_mem_init --------------> %lf\n", tmp_cl_mem_init);
    printf("init ---------------------> %lf\n", tmp_init);
    printf("cl_load_prog -------------> %lf\n", tmp_cl_load_prog);
    printf("cl_launch_kernel ---------> %lf\n", tmp_cl_launch_kernel);   
    printf("serialExecution ----------> %lf\n", tmp_serial);
    printf("cl_clean_up --------------> %lf\n", tmp_cl_clean_up);
    printf("clEnqueueWriteBuffer -----> %lf\n", tmp_clEnqueueWriteBuffer);
    printf("clEnqueueReadBuffer-------> %lf\n", tmp_clEnqueueReadBuffer);
    printf("clEnqueueMapBuffer -------> %lf\n", tmp_clEnqueueMapBuffer);
    printf("clEnqueueUnmapMemObject --> %lf\n", tmp_clEnqueueUnmapMemObject);

    return 0;
}