Exemple #1
0
int main(void) 
{
#ifdef ALOCACAO_NORMAL
    printf(">>>>>>>>>Versao Offload<<<<<<<<<<<\n");
#else
    printf(">>>>>>>>>Versao Memoria Compartilhada<<<<<<<<<<<\n");
#endif
    double t_start, t_end;
    double t_start_init, t_end_init;
    double t_offload_start, t_offload_end, t_start_init_off, t_end_init_off;
    double total_kernel;

#ifdef ALOCACAO_NORMAL
    A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE));
    B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE));
    D = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE));
    E_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));
#endif

    C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE));
    E = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));

    t_start_init = rtclock();
    read_cl_file();
    t_end_init = rtclock();
    tmp_read_cl_file = t_end_init - t_start_init;
    total_kernel = t_end_init - t_start_init;

    t_start_init = rtclock();
#ifndef MALI
    cl_initialization();
#else
    cl_initialization_Mali();
#endif
    t_end_init = rtclock();
    tmp_cl_initialization = t_end_init - t_start_init;
    total_kernel = t_end_init - t_start_init;

    t_start_init = rtclock();
    cl_mem_init();
    t_end_init = rtclock();
    tmp_cl_mem_init= t_end_init - t_start_init;
    total_kernel += t_end_init - t_start_init;

    //Está dentro da função a contagem
    init();


    //------------GPU---------------
    //Inicia tempo GPU	

#ifdef ALOCACAO_NORMAL
    t_offload_start = rtclock();
    errcode = clEnqueueWriteBuffer(clCommandQue, a_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NK, A, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NK * NJ, B, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NJ, C, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, d_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NJ * NL, D, 0, NULL, NULL);
    errcode = clEnqueueWriteBuffer(clCommandQue, e_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, E, 0, NULL, NULL);
    if(errcode != CL_SUCCESS)printf("Error in writing buffers\n");
    t_offload_end = rtclock();
    tmp_clEnqueueWriteBuffer += t_offload_end - t_offload_start; 
    total_kernel+=t_offload_end - t_offload_start;
#endif

    t_start_init = rtclock();
    cl_load_prog();
    t_end_init = rtclock();
    tmp_cl_load_prog= t_end_init - t_start_init;
    total_kernel += t_end_init - t_start_init;

    t_start_init = rtclock();
    cl_launch_kernel();
    t_end_init = rtclock();
    tmp_cl_launch_kernel += t_end_init - t_start_init;
    total_kernel += t_end_init - t_start_init;

#ifdef ALOCACAO_NORMAL
    t_offload_start = rtclock();
    errcode = clEnqueueReadBuffer(clCommandQue, e_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, E_outputFromGpu, 0, NULL, NULL);
    if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");
    t_offload_end = rtclock();
    tmp_clEnqueueReadBuffer += t_offload_end - t_offload_start; 
    total_kernel+=t_offload_end - t_offload_start;
#endif

    //-------------CPU---------------


    t_start = rtclock();
    mm2_cpu(A, B, C, D, E);
    t_end = rtclock(); 
    tmp_serial = t_end - t_start;  

    compareResults();


    t_start_init_off = rtclock();
    cl_clean_up();
    t_end_init_off = rtclock();
    tmp_cl_clean_up+=t_end_init_off - t_start_init_off; 
    total_kernel += t_end_init_off - t_start_init_off;



#ifdef ALOCACAO_NORMAL
    free(C);
    free(A);
    free(B);
    free(D);
    free(E_outputFromGpu);
#endif

    free(E);

    printf("\n-------RESULTS-------\n");
    printf("Sizes NI=%d, NJ=%d, NK=%d e NL=%d\n\n", NI, NJ, NK, NL);

    printf("read_cl_file -------------> %lf\n", tmp_read_cl_file);
    printf("cl_initialization --------> %lf\n", tmp_cl_initialization);
    printf("cl_mem_init --------------> %lf\n", tmp_cl_mem_init);
    printf("init ---------------------> %lf\n", tmp_init);
    printf("cl_load_prog -------------> %lf\n", tmp_cl_load_prog);
    printf("cl_launch_kernel ---------> %lf\n", tmp_cl_launch_kernel);   
    printf("serialExecution ----------> %lf\n", tmp_serial);
    printf("cl_clean_up --------------> %lf\n", tmp_cl_clean_up);
    printf("clEnqueueWriteBuffer -----> %lf\n", tmp_clEnqueueWriteBuffer);
    printf("clEnqueueReadBuffer-------> %lf\n", tmp_clEnqueueReadBuffer);
    printf("clEnqueueMapBuffer -------> %lf\n", tmp_clEnqueueMapBuffer);
    printf("clEnqueueUnmapMemObject --> %lf\n", tmp_clEnqueueUnmapMemObject);

    return 0;
}
Exemple #2
0
int main(int argc, char *argv[])
{
	/* Retrieve problem size. */
	int ni = NI;
	int nj = NJ;
	int nk = NK;
	int nl = NL;

	/* Variable declaration/allocation. */
	DATA_TYPE alpha;
	DATA_TYPE beta;
	POLYBENCH_2D_ARRAY_DECL(tmp,DATA_TYPE,NI,NJ,ni,nj);
	POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NI,NK,ni,nk);
	POLYBENCH_2D_ARRAY_DECL(B,DATA_TYPE,NK,NJ,nk,nj);
	POLYBENCH_2D_ARRAY_DECL(C,DATA_TYPE,NL,NJ,nl,nj);
	POLYBENCH_2D_ARRAY_DECL(D,DATA_TYPE,NI,NL,ni,nl);
	POLYBENCH_2D_ARRAY_DECL(D_outputFromGpu,DATA_TYPE,NI,NL,ni,nl);
	
	/* Initialize array(s). */
  	init_array(ni, nj, nk, nl, &alpha, &beta, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D));

	read_cl_file();
	cl_initialization();
	cl_mem_init(POLYBENCH_ARRAY(tmp), POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D_outputFromGpu));
	cl_load_prog();

	cl_launch_kernel(ni, nj, nk, nl, alpha, beta);

	errcode = clEnqueueReadBuffer(clCommandQue, dOutputFromGpu_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, POLYBENCH_ARRAY(D_outputFromGpu), 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");


	#ifdef RUN_ON_CPU

		/* Start timer. */
	  	polybench_start_instruments;

		mm2_cpu(ni, nj, nk, nl, alpha, beta, POLYBENCH_ARRAY(tmp), POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D));
	
		/* Stop and print timer. */
		printf("CPU Time in seconds:\n");
	  	polybench_stop_instruments;
	 	polybench_print_instruments;

		compareResults(ni, nl, POLYBENCH_ARRAY(D), POLYBENCH_ARRAY(D_outputFromGpu));

	#else //prevent dead code elimination

		polybench_prevent_dce(print_array(ni, nl, POLYBENCH_ARRAY(D_outputFromGpu)));

	#endif //RUN_ON_CPU


	cl_clean_up();

	POLYBENCH_FREE_ARRAY(tmp);
	POLYBENCH_FREE_ARRAY(A);
	POLYBENCH_FREE_ARRAY(B);
	POLYBENCH_FREE_ARRAY(C);
	POLYBENCH_FREE_ARRAY(D);
	POLYBENCH_FREE_ARRAY(D_outputFromGpu);

	return 0;
}