int main(void) { #ifdef ALOCACAO_NORMAL printf(">>>>>>>>>Versao Offload<<<<<<<<<<<\n"); #else printf(">>>>>>>>>Versao Memoria Compartilhada<<<<<<<<<<<\n"); #endif double t_start, t_end; double t_start_init, t_end_init; double t_offload_start, t_offload_end, t_start_init_off, t_end_init_off; double total_kernel; #ifdef ALOCACAO_NORMAL A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); D = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE)); E_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); #endif C = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); E = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); t_start_init = rtclock(); read_cl_file(); t_end_init = rtclock(); tmp_read_cl_file = t_end_init - t_start_init; total_kernel = t_end_init - t_start_init; t_start_init = rtclock(); #ifndef MALI cl_initialization(); #else cl_initialization_Mali(); #endif t_end_init = rtclock(); tmp_cl_initialization = t_end_init - t_start_init; total_kernel = t_end_init - t_start_init; t_start_init = rtclock(); cl_mem_init(); t_end_init = rtclock(); tmp_cl_mem_init= t_end_init - t_start_init; total_kernel += t_end_init - t_start_init; //Está dentro da função a contagem init(); //------------GPU--------------- //Inicia tempo GPU #ifdef ALOCACAO_NORMAL t_offload_start = rtclock(); errcode = clEnqueueWriteBuffer(clCommandQue, a_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NK, A, 0, NULL, NULL); errcode = clEnqueueWriteBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NK * NJ, B, 0, NULL, NULL); errcode = clEnqueueWriteBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NJ, C, 0, NULL, NULL); errcode = clEnqueueWriteBuffer(clCommandQue, d_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NJ * NL, D, 0, NULL, NULL); errcode = clEnqueueWriteBuffer(clCommandQue, e_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, E, 0, NULL, NULL); if(errcode != CL_SUCCESS)printf("Error in writing buffers\n"); t_offload_end = rtclock(); tmp_clEnqueueWriteBuffer += t_offload_end - t_offload_start; total_kernel+=t_offload_end - t_offload_start; #endif t_start_init = rtclock(); cl_load_prog(); t_end_init = rtclock(); tmp_cl_load_prog= t_end_init - t_start_init; total_kernel += t_end_init - t_start_init; t_start_init = rtclock(); cl_launch_kernel(); t_end_init = rtclock(); tmp_cl_launch_kernel += t_end_init - t_start_init; total_kernel += t_end_init - t_start_init; #ifdef ALOCACAO_NORMAL t_offload_start = rtclock(); errcode = clEnqueueReadBuffer(clCommandQue, e_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, E_outputFromGpu, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); t_offload_end = rtclock(); tmp_clEnqueueReadBuffer += t_offload_end - t_offload_start; total_kernel+=t_offload_end - t_offload_start; #endif //-------------CPU--------------- t_start = rtclock(); mm2_cpu(A, B, C, D, E); t_end = rtclock(); tmp_serial = t_end - t_start; compareResults(); t_start_init_off = rtclock(); cl_clean_up(); t_end_init_off = rtclock(); tmp_cl_clean_up+=t_end_init_off - t_start_init_off; total_kernel += t_end_init_off - t_start_init_off; #ifdef ALOCACAO_NORMAL free(C); free(A); free(B); free(D); free(E_outputFromGpu); #endif free(E); printf("\n-------RESULTS-------\n"); printf("Sizes NI=%d, NJ=%d, NK=%d e NL=%d\n\n", NI, NJ, NK, NL); printf("read_cl_file -------------> %lf\n", tmp_read_cl_file); printf("cl_initialization --------> %lf\n", tmp_cl_initialization); printf("cl_mem_init --------------> %lf\n", tmp_cl_mem_init); printf("init ---------------------> %lf\n", tmp_init); printf("cl_load_prog -------------> %lf\n", tmp_cl_load_prog); printf("cl_launch_kernel ---------> %lf\n", tmp_cl_launch_kernel); printf("serialExecution ----------> %lf\n", tmp_serial); printf("cl_clean_up --------------> %lf\n", tmp_cl_clean_up); printf("clEnqueueWriteBuffer -----> %lf\n", tmp_clEnqueueWriteBuffer); printf("clEnqueueReadBuffer-------> %lf\n", tmp_clEnqueueReadBuffer); printf("clEnqueueMapBuffer -------> %lf\n", tmp_clEnqueueMapBuffer); printf("clEnqueueUnmapMemObject --> %lf\n", tmp_clEnqueueUnmapMemObject); return 0; }
int main(int argc, char *argv[]) { /* Retrieve problem size. */ int ni = NI; int nj = NJ; int nk = NK; int nl = NL; /* Variable declaration/allocation. */ DATA_TYPE alpha; DATA_TYPE beta; POLYBENCH_2D_ARRAY_DECL(tmp,DATA_TYPE,NI,NJ,ni,nj); POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NI,NK,ni,nk); POLYBENCH_2D_ARRAY_DECL(B,DATA_TYPE,NK,NJ,nk,nj); POLYBENCH_2D_ARRAY_DECL(C,DATA_TYPE,NL,NJ,nl,nj); POLYBENCH_2D_ARRAY_DECL(D,DATA_TYPE,NI,NL,ni,nl); POLYBENCH_2D_ARRAY_DECL(D_outputFromGpu,DATA_TYPE,NI,NL,ni,nl); /* Initialize array(s). */ init_array(ni, nj, nk, nl, &alpha, &beta, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D)); read_cl_file(); cl_initialization(); cl_mem_init(POLYBENCH_ARRAY(tmp), POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D_outputFromGpu)); cl_load_prog(); cl_launch_kernel(ni, nj, nk, nl, alpha, beta); errcode = clEnqueueReadBuffer(clCommandQue, dOutputFromGpu_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, POLYBENCH_ARRAY(D_outputFromGpu), 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); #ifdef RUN_ON_CPU /* Start timer. */ polybench_start_instruments; mm2_cpu(ni, nj, nk, nl, alpha, beta, POLYBENCH_ARRAY(tmp), POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(C), POLYBENCH_ARRAY(D)); /* Stop and print timer. */ printf("CPU Time in seconds:\n"); polybench_stop_instruments; polybench_print_instruments; compareResults(ni, nl, POLYBENCH_ARRAY(D), POLYBENCH_ARRAY(D_outputFromGpu)); #else //prevent dead code elimination polybench_prevent_dce(print_array(ni, nl, POLYBENCH_ARRAY(D_outputFromGpu))); #endif //RUN_ON_CPU cl_clean_up(); POLYBENCH_FREE_ARRAY(tmp); POLYBENCH_FREE_ARRAY(A); POLYBENCH_FREE_ARRAY(B); POLYBENCH_FREE_ARRAY(C); POLYBENCH_FREE_ARRAY(D); POLYBENCH_FREE_ARRAY(D_outputFromGpu); return 0; }