int main(int argc, char *argv[]) { double t_start, t_end; DATA_TYPE A[NI][NJ][NK]; DATA_TYPE B[NI][NJ][NK]; // CPU target results DATA_TYPE B_outputFromGpu[NI][NJ][NK]; // GPU exec results //initialize the arrays init(A); #pragma hmpp conv allocate #pragma hmpp conv advancedload, args[A;B] // Run GPU code t_start = rtclock(); #pragma hmpp conv callsite, args[A;B].advancedload=true, asynchronous conv3D(A, B_outputFromGpu); #pragma hmpp conv synchronize t_end = rtclock(); fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start); #pragma hmpp conv delegatedstore, args[B] #pragma hmpp conv release t_start = rtclock(); conv3D(A, B); t_end = rtclock(); fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(B, B_outputFromGpu); return 0; }
int main(int argc, char *argv[]) { int ni = NI; int nj = NJ; int nk = NK; POLYBENCH_3D_ARRAY_DECL(A,DATA_TYPE,NI,NJ,NK,ni,nj,nk); POLYBENCH_3D_ARRAY_DECL(B,DATA_TYPE,NI,NJ,NK,ni,nj,nk); POLYBENCH_3D_ARRAY_DECL(B_outputFromGpu,DATA_TYPE,NI,NJ,NK,ni,nj,nk); init(ni, nj, nk, POLYBENCH_ARRAY(A)); read_cl_file(); cl_initialization(); cl_mem_init(POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B)); cl_load_prog(); cl_launch_kernel(ni, nj, nk); errcode = clEnqueueReadBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, NI * NJ * NK * sizeof(DATA_TYPE), POLYBENCH_ARRAY(B_outputFromGpu), 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); #ifdef RUN_ON_CPU /* Start timer. */ polybench_start_instruments; conv3D(ni, nj, nk, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B)); /* Stop and print timer. */ printf("CPU Time in seconds:\n"); polybench_stop_instruments; polybench_print_instruments; compareResults(ni, nj, nk, POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(B_outputFromGpu)); #else //prevent dead code elimination polybench_prevent_dce(print_array(ni, nj, nk, POLYBENCH_ARRAY(B_outputFromGpu))); #endif //RUN_ON_CPU cl_clean_up(); POLYBENCH_FREE_ARRAY(A); POLYBENCH_FREE_ARRAY(B); POLYBENCH_FREE_ARRAY(B_outputFromGpu); return 0; }