void ocl_create_context_and_comm_queue(){ // Create a context to run OpenCL on the OCL-enabled Device cl_int err; #if BE_OCL_VERBOSE printf("--- Creating OpenCL context"); #endif device_context = clCreateContext(0, 1, &ocl_device, NULL, NULL, &err); ocl_error_check(OCL_CREATE_CONTEXT, err); // Get the list of OCL_ devices associated with this context size_t ParmDataBytes; clGetContextInfo(device_context, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes); cl_device_id* OCL_Devices = (cl_device_id*)malloc(ParmDataBytes); clGetContextInfo(device_context, CL_CONTEXT_DEVICES, ParmDataBytes, OCL_Devices, NULL); // Create a command-queue on the first OCL_ device #if BE_OCL_VERBOSE printf("--- Creating OpenCL command queue"); #endif device_comm_queue = clCreateCommandQueue(device_context, OCL_Devices[0], 0, &err); ocl_error_check(OCL_CREATE_COMMAND_QUEUE, err); free(OCL_Devices); }
void free_device_array(device_array* dest){ cl_int err; err = clReleaseMemObject(dest->data); ocl_error_check(OCL_REALEASE_MEM_OBJECT, err); err = clReleaseMemObject(dest->info_dev); ocl_error_check(OCL_REALEASE_MEM_OBJECT, err); free(dest->info); }
void ocl_real_arr_arr_sca(const char* kernel_name, modelica_real* src_1, modelica_real* src_2, modelica_real* dest, int size_){ cl_program OpenCLProgram; cl_kernel OpenCLfunction; clock_t c0, c1; cl_int err; cl_int inc1, inc2; inc1=1; inc2=1; size_t WorkSize[1] = {static_cast<size_t>(size_)}; size_t localWorkSize[1] = {32}; // one dimensional Range if (!device_comm_queue) ocl_initialize(); //This can be moved out. left here hoping that similar ops will be called //sequentialy. If we kept them in one .cl file we dont have to build again OpenCLProgram = ocl_build_p_from_src("real_ar_ar_sca.cl", true); OpenCLfunction = clCreateKernel(OpenCLProgram, kernel_name, &err); ocl_error_check(OCL_CREATE_KERNEL, err); cl_mem device_array_1 = ocl_alloc_init_real_arr(src_1, size_); cl_mem device_array_2 = ocl_alloc_init_real_arr(src_2, size_); cl_mem result = ocl_alloc_init_real_arr(NULL, 1); err = clSetKernelArg(OpenCLfunction, 0, sizeof(cl_int),(void*)&size_); err |= clSetKernelArg(OpenCLfunction, 1, sizeof(cl_mem), (void*)&device_array_1); err |= clSetKernelArg(OpenCLfunction, 2, sizeof(cl_int), (void*)&inc1); err |= clSetKernelArg(OpenCLfunction, 3, sizeof(cl_mem), (void*)&device_array_2); err |= clSetKernelArg(OpenCLfunction, 4, sizeof(cl_int), (void*)&inc2); err |= clSetKernelArg(OpenCLfunction, 5, sizeof(cl_mem), (void*)&result); err |= clSetKernelArg(OpenCLfunction, 6, sizeof(modelica_real)*localWorkSize[0], NULL); ocl_error_check(OCL_SET_KER_ARGS, err); c0 = clock(); err = clEnqueueNDRangeKernel(device_comm_queue, OpenCLfunction, 1, NULL, WorkSize, localWorkSize, 0, NULL, NULL); ocl_error_check(OCL_ENQUE_ND_RANGE_KERNEL, err); clFinish(device_comm_queue); c1 = clock(); printf ("\telapsed CPU CLOCKS: %f sec\n", (float) (c1-c0)/1000); ocl_copy_back_to_host_real(result, dest, 1); clReleaseMemObject(device_array_1); clReleaseMemObject(device_array_2); clReleaseMemObject(result); clReleaseKernel(OpenCLfunction); clReleaseProgram(OpenCLProgram); }
void ocl_real_matrix_matrix_matrix(const char* kernel_name, modelica_real* src_1, int M, modelica_real* src_2, int N, modelica_real* dest, int K){ cl_program OpenCLProgram; cl_kernel OpenCLfunction; clock_t c0, c1; cl_int err; size_t WorkSize[2] = {static_cast<size_t>(M), static_cast<size_t>(N)}; size_t localWorkSize[2] = {16,16}; if (!device_comm_queue){ printf("------------------------------Initizlizing---------------------\n"); ocl_initialize(); } //This can be moved out. left here hoping that similar ops will be called //sequentialy. If we kept them in one .cl file we dont have to build again OpenCLProgram = ocl_build_p_from_src("matrix.cl", true); OpenCLfunction = clCreateKernel(OpenCLProgram, kernel_name, &err); ocl_error_check(OCL_CREATE_KERNEL, err); cl_mem device_array_1 = ocl_alloc_init_real_arr(src_1, M*K); cl_mem device_array_2 = ocl_alloc_init_real_arr(src_2, K*N); cl_mem result = ocl_alloc_init_real_arr(NULL, M*N); err = clSetKernelArg(OpenCLfunction, 0, sizeof(cl_mem), (void*)&result); err |= clSetKernelArg(OpenCLfunction, 1, sizeof(cl_mem), (void*)&device_array_1); err |= clSetKernelArg(OpenCLfunction, 2, sizeof(cl_mem), (void*)&device_array_2); err |= clSetKernelArg(OpenCLfunction, 3, sizeof(cl_int), (void*)&K); err |= clSetKernelArg(OpenCLfunction, 4, sizeof(cl_int), (void*)&N); ocl_error_check(OCL_SET_KER_ARGS, err); c0 = clock(); err = clEnqueueNDRangeKernel(device_comm_queue, OpenCLfunction, 2, NULL, WorkSize, localWorkSize, 0, NULL, NULL); ocl_error_check(OCL_ENQUE_ND_RANGE_KERNEL, err); clFinish(device_comm_queue); c1 = clock(); printf ("\telapsed CPU CLOCKS: %f sec\n", (float) (c1-c0)/1000); ocl_copy_back_to_host_real(result, dest, M*N); clReleaseMemObject(device_array_1); clReleaseMemObject(device_array_2); clReleaseMemObject(result); clReleaseKernel(OpenCLfunction); clReleaseProgram(OpenCLProgram); }
cl_kernel ocl_create_kernel(cl_program program, const char* kernel_name){ if (!device_comm_queue) ocl_initialize(); cl_kernel kernel; cl_int err; kernel = clCreateKernel(program, kernel_name, &err); ocl_error_check(OCL_CREATE_KERNEL, err); return kernel; }
void ocl_execute_kernel(cl_kernel kernel){ cl_int err = 0; timeval t1, t2; double elapsedTime; gettimeofday(&t1, NULL); if (WORK_DIM == 0){ size_t GlobalSize[1] = {GLOBAL_SIZE[0]}; // one dimensional Range //automatic division to workgroups by OpenCL. err = clEnqueueNDRangeKernel(device_comm_queue, kernel, 1, NULL, GlobalSize, NULL, 0, NULL, NULL); } else if (WORK_DIM == 1){ size_t GlobalSize[1] = {GLOBAL_SIZE[0]}; // one dimensional Range size_t LocalSize[1] = {LOCAL_SIZE[0]}; // one dimensional Range err = clEnqueueNDRangeKernel(device_comm_queue, kernel, 1, NULL, GlobalSize, LocalSize, 0, NULL, NULL); } else if (WORK_DIM == 2){ size_t GlobalSize[2] = {GLOBAL_SIZE[0], GLOBAL_SIZE[1]}; // two dimensional Range size_t LocalSize[2] = {LOCAL_SIZE[0], LOCAL_SIZE[1]}; // two dimensional Range //printf("Setting 2 dimensional arrangment with local size x = %d, local size y = %d, global size x = %d, global size x = %d \n", //LocalSize[0], LocalSize[1], GlobalSize[0], GlobalSize[1]); err = clEnqueueNDRangeKernel(device_comm_queue, kernel, 2, NULL, GlobalSize, LocalSize, 0, NULL, NULL); } else if (WORK_DIM == 3){ size_t GlobalSize[3] = {GLOBAL_SIZE[0], GLOBAL_SIZE[1], GLOBAL_SIZE[2]}; // three dimensional Range size_t LocalSize[3] = {LOCAL_SIZE[0], LOCAL_SIZE[1], LOCAL_SIZE[2]}; // three dimensional Range err = clEnqueueNDRangeKernel(device_comm_queue, kernel, 3, NULL, GlobalSize, LocalSize, 0, NULL, NULL); } clFinish(device_comm_queue); ocl_error_check(OCL_ENQUE_ND_RANGE_KERNEL, err); gettimeofday(&t2, NULL); elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; // sec to ms elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; // us to ms printf ("\tKernel Execution : %lf ms\n", elapsedTime); if(err) exit(1); }
void ocl_set_local_kernel_arg(cl_kernel kernel, int arg_nr, size_t in_size){ cl_int err; // Allocate the memory in local space for the data err = clSetKernelArg(kernel, arg_nr, in_size, NULL); ocl_error_check(OCL_SET_KER_ARGS, err); if(err){ printf("Error: setting argument nr: %d. Local variable\n", arg_nr + 1); exit(1); } }
void ocl_set_kernel_arg(cl_kernel kernel, int arg_nr, modelica_real in_arg){ cl_int err; err = clSetKernelArg(kernel, arg_nr, sizeof(modelica_real),(void*)&in_arg); //#ifdef SHOW_ARG_SET_ERRORS ocl_error_check(OCL_SET_KER_ARGS, err); if(err){ printf("Error: setting argument nr: %d\n", arg_nr + 1); exit(1); } //#endif }
void ocl_set_kernel_args(cl_kernel kernel, int count, ...){ cl_int err; va_list arguments; va_start(arguments, count); for (int i = 0; i < count; i++) { cl_mem tmp = va_arg(arguments, cl_mem); err = clSetKernelArg(kernel, i, sizeof(cl_mem),(void*)&tmp); //#ifdef SHOW_ARG_SET_ERRORS ocl_error_check(OCL_SET_KER_ARGS, err); if(err){ printf("Error: setting argument nr: %d\n", i + 1); exit(1); } //#endif } va_end(arguments); }
// Main function // ********************************************************************* int main(int argc, char **argv) { if (!device_comm_queue) ocl_initialize(); const char* program_source; program_source = load_source_file(argv[1]); cl_program ocl_program = clCreateProgramWithSource(device_context, 1, (const char**)&program_source, NULL, NULL); printf("********** program created.\n"); // Build the program (OpenCL JIT compilation) char options[100]; const char* flags = "-g -w -I\""; const char* OMHOME = getenv("OPENMODELICAHOME"); const char* OMINCL = "/include/omc\""; const char* OMBIN = "/bin\""; if ( OMHOME != NULL ) { strcpy(options, flags); strcat(options, OMHOME); strcat(options, OMINCL); strcat(options, " -I\""); strcat(options, OMHOME); strcat(options, OMBIN); printf("Building OpenCL code with flags %s\n",options); cl_int err; err = clBuildProgram(ocl_program, 0, NULL, options, NULL, NULL); ocl_error_check(OCL_BUILD_PROGRAM, err); size_t size; clGetProgramBuildInfo(ocl_program, ocl_device, CL_PROGRAM_BUILD_LOG, // Get build log size 0, NULL, &size); char * log = (char*)malloc(size); clGetProgramBuildInfo(ocl_program,ocl_device,CL_PROGRAM_BUILD_LOG,size,log, NULL); printf("\t\tCL_PROGRAM_BUILD_LOG: \t%s\n", log); free(log); if(err){ printf("Errors detected in compilation of OpenCL code:\n"); exit(1); } else printf("Program built successfuly.\n"); //if no error create the binary clGetProgramInfo(ocl_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); unsigned char * binary = (unsigned char*)malloc(size); printf("Size of program binary :\t%d\n",size); clGetProgramInfo(ocl_program, CL_PROGRAM_BINARIES, sizeof(size_t), &binary, NULL); printf("Program binary retrived.\n"); const char* binary_ext = ".bin"; char* binary_name = strcat(argv[1],binary_ext); printf("binary file name %s\n", binary_name); FILE * cache; cache = fopen(binary_name, "wb"); fwrite(binary, sizeof(char), size, cache); fclose(cache); //free(binary); err = 0; cl_program newprogram = clCreateProgramWithBinary(device_context, 1, &ocl_device, &size, (const unsigned char **)&binary, NULL, &err); if(!err) printf("Program created from binary\n"); else{ switch (err){ case CL_INVALID_CONTEXT: printf("Error building program:\n"); printf("CL_INVALID_CONTEXT \n"); break; case CL_INVALID_VALUE: printf("Error building program:\n"); printf("CL_INVALID_VALUE \n"); break; case CL_INVALID_DEVICE: printf("Error building program:\n"); printf("CL_INVALID_DEVICE \n"); break; case CL_INVALID_BINARY: printf("Error building program:\n"); printf("CL_INVALID_BINARY \n"); break; case CL_OUT_OF_HOST_MEMORY: printf("Error building program:\n"); printf("CL_OUT_OF_HOST_MEMORY \n"); break; } } return 0; } else { printf("Couldn't find OPENMODELICAHOME!\n"); exit(1); } ocl_clean_up(); return 0; }
void ocl_build_p_from_src(){ // Create OpenCL program with source code const char* program_source; program_source = load_source_file(omc_ocl_kernels_source); #if BE_OCL_VERBOSE printf("--- Creating OpenCL program"); #endif // omc_ocl_program declared in omc_ocl_util.h omc_ocl_program = clCreateProgramWithSource(device_context, 1, (const char**)&program_source, NULL, NULL); #if BE_OCL_VERBOSE printf("\t\t\t - OK.\n"); #endif free((void*)program_source); // Check for OpenModelica env variable. const char* OMHOME = getenv("OPENMODELICAHOME"); if ( OMHOME == NULL ) { printf("Couldn't find OPENMODELICAHOME!\n"); exit(1); } // Build the program (OpenCL JIT compilation). #if BE_OCL_VERBOSE printf("--- Building OpenCL program \n"); #endif char options[100]; const char* flags = "-I\""; const char* OMEXT = "/include/omc/c/\""; strcpy(options, flags); strcat(options, OMHOME); strcat(options, OMEXT); #if BE_OCL_VERBOSE printf("\t :Using flags %s\n",options); #endif // Build the OpenCL program. cl_int err = 0; err = clBuildProgram(omc_ocl_program, 0, NULL, options, NULL, NULL); ocl_error_check(OCL_BUILD_PROGRAM, err); // Get build log size. size_t size; clGetProgramBuildInfo(omc_ocl_program, ocl_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); // Get the build log. char * log = (char*)malloc(size); clGetProgramBuildInfo(omc_ocl_program,ocl_device,CL_PROGRAM_BUILD_LOG,size,log, NULL); if(err){ printf("Build failed: Errors detected in compilation of OpenCL code:\n"); printf("CL_PROGRAM_BUILD_LOG: \n%s\n", log); free(log); exit(1); } free(log); }