T Linalg<T, H>::syr2k( const T &a, const T &b, const value_type &alpha, const value_type &beta, Uplo uplo) { T c(a.allocator()); syr2k(a, b, &c, alpha, beta, uplo); return c; }
void MTLmarks::DmatDmatRun(std::string benchmark) { if(benchmark == "dmatdmatadd"){ mtl_result = dmatdmatadd(size, steps); } else if(benchmark == "dmatdmatmult"){ mtl_result = dmatdmatmult(size, steps); } else if(benchmark == "cmajordmdmmult"){ mtl_result = cmajordmdmmult(size, steps); } else if(benchmark == "rmajordmdmmult"){ mtl_result = rmajordmdmmult(size, steps); } else if(benchmark == "nestedprod"){ mtl_result = nestedprod(size, steps); } else if(benchmark == "symm1"){ mtl_result = symm1(size, steps); } else if(benchmark == "symm1rect"){ mtl_result = symm1rect(size, steps); } else if(benchmark == "symm2"){ mtl_result = symm2(size, steps); } else if(benchmark == "syr2k"){ mtl_result = syr2k(size, steps); } else if(benchmark == "syr2krect"){ mtl_result = syr2krect(size, steps); } else if(benchmark == "syrk"){ mtl_result = syrk(size, steps); } else if(benchmark == "syrkrect"){ mtl_result = syrkrect(size, steps); } else if(benchmark == "custom"){ mtl_result = custom(size, steps); } else{ std::cerr << "MTLmarks benchmark does not exist." << std::endl; exit(1); } }
int main(void) { double t_start, t_end; DATA_TYPE* A; DATA_TYPE* B; DATA_TYPE* C; DATA_TYPE* C_outputFromGpu; A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); C_outputFromGpu = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); init_arrays(A, B, C); read_cl_file(); cl_initialization(); cl_mem_init(A, B, C); cl_load_prog(); cl_launch_kernel(); errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, N*M*sizeof(DATA_TYPE), C_outputFromGpu, 0, NULL, NULL); if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n"); t_start = rtclock(); syr2k(A, B, C); t_end = rtclock(); fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start); compareResults(C, C_outputFromGpu); cl_clean_up(); free(A); free(B); free(C); free(C_outputFromGpu); return 0; }
T& Linalg<T, H>::syr2k( const T &a, const T &b, T &c, const value_type &alpha, const value_type &beta, Uplo uplo) { return const_cast< T& >(syr2k( a, b, const_cast< const T& >(c), alpha, beta, uplo)); }
int main(void) { /* Prepare ctuning vars */ long ct_repeat=0; long ct_repeat_max=1; double t_start, t_end; DATA_TYPE* A; DATA_TYPE* B; DATA_TYPE* C; DATA_TYPE* C_outputFromGpu; #ifdef OPENME openme_init(NULL,NULL,NULL,0); openme_callback("PROGRAM_START", NULL); #endif /* Run kernel. */ if (getenv("CT_REPEAT_MAIN")!=NULL) ct_repeat_max=atol(getenv("CT_REPEAT_MAIN")); A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); B = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); C_outputFromGpu = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE)); srand(1); init_arrays(A, B, C); read_cl_file(); cl_initialization(); cl_mem_init(A, B, C); cl_load_prog(); #ifdef OPENME openme_callback("ACC_KERNEL_START", NULL); #endif for (ct_repeat=0; ct_repeat<ct_repeat_max; ct_repeat++) { cl_launch_kernel(); err_code = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, N*M*sizeof(DATA_TYPE), C_outputFromGpu, 0, NULL, NULL); if(err_code != CL_SUCCESS) { printf("Error in reading GPU mem\n"); exit(1); } } #ifdef OPENME openme_callback("ACC_KERNEL_END", NULL); #endif srand(1); init_arrays(A, B, C); #ifdef OPENME openme_callback("KERNEL_START", NULL); #endif for (ct_repeat=0; ct_repeat<ct_repeat_max; ct_repeat++) { syr2k(A, B, C); } #ifdef OPENME openme_callback("KERNEL_END", NULL); #endif compareResults(C, C_outputFromGpu); cl_clean_up(); free(A); free(B); free(C); free(C_outputFromGpu); #ifdef OPENME openme_callback("PROGRAM_END", NULL); #endif return 0; }