示例#1
0
int main(int argc, char *argv[])
{
	double t_start, t_end;
	
	DATA_TYPE A[NI][NJ][NK];
	DATA_TYPE B[NI][NJ][NK];  // CPU target results
	DATA_TYPE B_outputFromGpu[NI][NJ][NK];  // GPU exec results

	//initialize the arrays
	init(A);

	#pragma hmpp conv allocate

	#pragma hmpp conv advancedload, args[A;B]
	
	// Run GPU code
	
	t_start = rtclock();
	
	#pragma hmpp conv callsite, args[A;B].advancedload=true, asynchronous
	conv3D(A, B_outputFromGpu);

	#pragma hmpp conv synchronize

    t_end = rtclock();
    fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
	
	#pragma hmpp conv delegatedstore, args[B]

	#pragma hmpp conv release

	t_start = rtclock();
	
	conv3D(A, B);
	
    t_end = rtclock();
    fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
	
	compareResults(B, B_outputFromGpu);

	return 0;
}
示例#2
0
int main(int argc, char *argv[])
{	
	int ni = NI;
	int nj = NJ;
	int nk = NK;

	POLYBENCH_3D_ARRAY_DECL(A,DATA_TYPE,NI,NJ,NK,ni,nj,nk);
	POLYBENCH_3D_ARRAY_DECL(B,DATA_TYPE,NI,NJ,NK,ni,nj,nk);
	POLYBENCH_3D_ARRAY_DECL(B_outputFromGpu,DATA_TYPE,NI,NJ,NK,ni,nj,nk);

	init(ni, nj, nk, POLYBENCH_ARRAY(A));

	read_cl_file();
	cl_initialization();
	cl_mem_init(POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B));
	cl_load_prog();

	cl_launch_kernel(ni, nj, nk);

	errcode = clEnqueueReadBuffer(clCommandQue, b_mem_obj, CL_TRUE, 0, NI * NJ * NK * sizeof(DATA_TYPE), POLYBENCH_ARRAY(B_outputFromGpu), 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");

	#ifdef RUN_ON_CPU

		/* Start timer. */
	  	polybench_start_instruments;

		conv3D(ni, nj, nk, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(B));
	
		/* Stop and print timer. */
		printf("CPU Time in seconds:\n");
	  	polybench_stop_instruments;
	 	polybench_print_instruments;

		compareResults(ni, nj, nk, POLYBENCH_ARRAY(B), POLYBENCH_ARRAY(B_outputFromGpu));

	#else //prevent dead code elimination

		polybench_prevent_dce(print_array(ni, nj, nk, POLYBENCH_ARRAY(B_outputFromGpu)));

	#endif //RUN_ON_CPU

	cl_clean_up();

	POLYBENCH_FREE_ARRAY(A);
	POLYBENCH_FREE_ARRAY(B);
	POLYBENCH_FREE_ARRAY(B_outputFromGpu);

	return 0;
}