int main(void) { int nx = NX; int ny = NY; POLYBENCH_2D_ARRAY_DECL(A,DATA_TYPE,NX,NY,nx,ny); POLYBENCH_1D_ARRAY_DECL(x,DATA_TYPE,NY,ny); POLYBENCH_1D_ARRAY_DECL(y,DATA_TYPE,NY,ny); POLYBENCH_1D_ARRAY_DECL(y_outputFromGpu,DATA_TYPE,NY,ny); POLYBENCH_1D_ARRAY_DECL(tmp,DATA_TYPE,NX,nx); init_array(nx, ny, POLYBENCH_ARRAY(x), POLYBENCH_ARRAY(A)); read_cl_file(); cl_initialization(); cl_mem_init(POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(x), POLYBENCH_ARRAY(y), POLYBENCH_ARRAY(tmp)); cl_load_prog(); cl_launch_kernel(nx, ny); errcode = clEnqueueReadBuffer(clCommandQue, y_mem_obj, CL_TRUE, 0, NY*sizeof(DATA_TYPE), POLYBENCH_ARRAY(y_outputFromGpu), 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); #ifdef RUN_ON_CPU /* Start timer. */ polybench_start_instruments; atax_cpu(nx, ny, POLYBENCH_ARRAY(A), POLYBENCH_ARRAY(x), POLYBENCH_ARRAY(y), POLYBENCH_ARRAY(tmp)); /* Stop and print timer. */ printf("CPU Time in seconds:\n"); polybench_stop_instruments; polybench_print_instruments; compareResults(ny, POLYBENCH_ARRAY(y), POLYBENCH_ARRAY(y_outputFromGpu)); #else print_array(ny, POLYBENCH_ARRAY(y_outputFromGpu)); #endif //RUN_ON_CPU cl_clean_up(); POLYBENCH_FREE_ARRAY(A); POLYBENCH_FREE_ARRAY(x); POLYBENCH_FREE_ARRAY(y); POLYBENCH_FREE_ARRAY(y_outputFromGpu); POLYBENCH_FREE_ARRAY(tmp); return 0; }
int main(void) { DATA_TYPE *A; DATA_TYPE *y; DATA_TYPE *tmp; ///////////////////////// size_t oldSizes[1] = { NY }; size_t newSizes[1]; getNewSizes(oldSizes, NULL, newSizes, NULL, "atax_kernel2", 1); NY = newSizes[0]; ///////////////////////// A = (DATA_TYPE *)malloc(NX_DEFAULT * NY * sizeof(DATA_TYPE)); y = (DATA_TYPE *)malloc(NY * sizeof(DATA_TYPE)); tmp = (DATA_TYPE *)malloc(NX_DEFAULT * sizeof(DATA_TYPE)); init_array(tmp, A); platform = new Platform(PLATFORM_ID); context = platform->getContext(); Device device = platform->getDevice(DEVICE_ID); Queue queue(*context,device,Queue::EnableProfiling); cl_mem_init(A, y, tmp,queue); Program program(context,KERNEL_DIRECTORY KERNEL_FILE_NAME); if(!program.build(device)){ std::cout << "Error building the program: \n"; std::cout <<program.getBuildLog(device); } kernel2=program.createKernel(kernel2Name.c_str()); cl_launch_kernel(queue); queue.readBuffer(*y_mem_obj,NY * sizeof(DATA_TYPE), y); queue.finish(); atax_cpu(A, tmp, y); cl_clean_up(); free(A); free(y); free(tmp); return 0; }
int main(int argc, char** argv) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* x; DATA_TYPE* y; DATA_TYPE* y_outputFromGpu; DATA_TYPE* tmp; A = (DATA_TYPE*)malloc(NX*NY*sizeof(DATA_TYPE)); x = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); y = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); y_outputFromGpu = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE)); tmp = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE)); fprintf(stdout, "<< Matrix Transpose and Vector Multiplication >>\n"); init_array(x, A); t_start = rtclock(); atax_OMP(A, x, y_outputFromGpu, tmp); t_end = rtclock(); fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); t_start = rtclock(); atax_cpu(A, x, y, tmp); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(y, y_outputFromGpu); free(A); free(x); free(y); free(y_outputFromGpu); free(tmp); return 0; }