int main(int argc, char* argv[]) //int main(void) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* B; DATA_TYPE* C; DATA_TYPE* D; DATA_TYPE* E; DATA_TYPE* F; DATA_TYPE* G; DATA_TYPE* G_outputFromGpu; if(argc==2){ printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]); cpu_offset = atoi(argv[1]); } A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); C = (DATA_TYPE*)malloc(NJ*NM*sizeof(DATA_TYPE)); D = (DATA_TYPE*)malloc(NM*NL*sizeof(DATA_TYPE)); E = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); F = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE)); G = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); G_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); int i; init_array(A, B, C, D); read_cl_file(); cl_initialization_fusion(); //cl_initialization(); cl_mem_init(A, B, C, D, E, F, G); cl_load_prog(); cl_launch_kernel(); errcode = clEnqueueReadBuffer(clCommandQue[0], g_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, G_outputFromGpu, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); t_start = rtclock(); mm3_cpu(A, B, C, D, E, F, G); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(G, G_outputFromGpu); cl_clean_up(); free(A); free(B); free(C); free(D); free(E); free(F); free(G); free(G_outputFromGpu); return 0; }
int main(int argc, char** argv) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* B; DATA_TYPE* C; DATA_TYPE* D; DATA_TYPE* E; DATA_TYPE* F; DATA_TYPE* G; DATA_TYPE* G_outputFromGpu; A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE)); C = (DATA_TYPE*)malloc(NJ*NM*sizeof(DATA_TYPE)); D = (DATA_TYPE*)malloc(NM*NL*sizeof(DATA_TYPE)); E = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE)); F = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE)); G = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); G_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE)); fprintf(stdout, "<< Linear Algebra: 3 Matrix Multiplications (E=A.B; F=C.D; G=E.F) >>\n"); init_array(A, B, C, D); t_start = rtclock(); mm3_OMP(A, B, C, D, E, F, G_outputFromGpu); t_end = rtclock(); fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start); t_start = rtclock(); mm3_cpu(A, B, C, D, E, F, G); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(G, G_outputFromGpu); free(A); free(B); free(C); free(D); free(E); free(F); free(G); free(G_outputFromGpu); return 0; }