void ocl_real_arr_arr_sca(const char* kernel_name, modelica_real* src_1, modelica_real* src_2, modelica_real* dest, int size_){ cl_program OpenCLProgram; cl_kernel OpenCLfunction; clock_t c0, c1; cl_int err; cl_int inc1, inc2; inc1=1; inc2=1; size_t WorkSize[1] = {static_cast<size_t>(size_)}; size_t localWorkSize[1] = {32}; // one dimensional Range if (!device_comm_queue) ocl_initialize(); //This can be moved out. left here hoping that similar ops will be called //sequentialy. If we kept them in one .cl file we dont have to build again OpenCLProgram = ocl_build_p_from_src("real_ar_ar_sca.cl", true); OpenCLfunction = clCreateKernel(OpenCLProgram, kernel_name, &err); ocl_error_check(OCL_CREATE_KERNEL, err); cl_mem device_array_1 = ocl_alloc_init_real_arr(src_1, size_); cl_mem device_array_2 = ocl_alloc_init_real_arr(src_2, size_); cl_mem result = ocl_alloc_init_real_arr(NULL, 1); err = clSetKernelArg(OpenCLfunction, 0, sizeof(cl_int),(void*)&size_); err |= clSetKernelArg(OpenCLfunction, 1, sizeof(cl_mem), (void*)&device_array_1); err |= clSetKernelArg(OpenCLfunction, 2, sizeof(cl_int), (void*)&inc1); err |= clSetKernelArg(OpenCLfunction, 3, sizeof(cl_mem), (void*)&device_array_2); err |= clSetKernelArg(OpenCLfunction, 4, sizeof(cl_int), (void*)&inc2); err |= clSetKernelArg(OpenCLfunction, 5, sizeof(cl_mem), (void*)&result); err |= clSetKernelArg(OpenCLfunction, 6, sizeof(modelica_real)*localWorkSize[0], NULL); ocl_error_check(OCL_SET_KER_ARGS, err); c0 = clock(); err = clEnqueueNDRangeKernel(device_comm_queue, OpenCLfunction, 1, NULL, WorkSize, localWorkSize, 0, NULL, NULL); ocl_error_check(OCL_ENQUE_ND_RANGE_KERNEL, err); clFinish(device_comm_queue); c1 = clock(); printf ("\telapsed CPU CLOCKS: %f sec\n", (float) (c1-c0)/1000); ocl_copy_back_to_host_real(result, dest, 1); clReleaseMemObject(device_array_1); clReleaseMemObject(device_array_2); clReleaseMemObject(result); clReleaseKernel(OpenCLfunction); clReleaseProgram(OpenCLProgram); }
cl_kernel ocl_create_kernel(cl_program program, const char* kernel_name){ if (!device_comm_queue) ocl_initialize(); cl_kernel kernel; cl_int err; kernel = clCreateKernel(program, kernel_name, &err); ocl_error_check(OCL_CREATE_KERNEL, err); return kernel; }
void ocl_real_matrix_matrix_matrix(const char* kernel_name, modelica_real* src_1, int M, modelica_real* src_2, int N, modelica_real* dest, int K){ cl_program OpenCLProgram; cl_kernel OpenCLfunction; clock_t c0, c1; cl_int err; size_t WorkSize[2] = {static_cast<size_t>(M), static_cast<size_t>(N)}; size_t localWorkSize[2] = {16,16}; if (!device_comm_queue){ printf("------------------------------Initizlizing---------------------\n"); ocl_initialize(); } //This can be moved out. left here hoping that similar ops will be called //sequentialy. If we kept them in one .cl file we dont have to build again OpenCLProgram = ocl_build_p_from_src("matrix.cl", true); OpenCLfunction = clCreateKernel(OpenCLProgram, kernel_name, &err); ocl_error_check(OCL_CREATE_KERNEL, err); cl_mem device_array_1 = ocl_alloc_init_real_arr(src_1, M*K); cl_mem device_array_2 = ocl_alloc_init_real_arr(src_2, K*N); cl_mem result = ocl_alloc_init_real_arr(NULL, M*N); err = clSetKernelArg(OpenCLfunction, 0, sizeof(cl_mem), (void*)&result); err |= clSetKernelArg(OpenCLfunction, 1, sizeof(cl_mem), (void*)&device_array_1); err |= clSetKernelArg(OpenCLfunction, 2, sizeof(cl_mem), (void*)&device_array_2); err |= clSetKernelArg(OpenCLfunction, 3, sizeof(cl_int), (void*)&K); err |= clSetKernelArg(OpenCLfunction, 4, sizeof(cl_int), (void*)&N); ocl_error_check(OCL_SET_KER_ARGS, err); c0 = clock(); err = clEnqueueNDRangeKernel(device_comm_queue, OpenCLfunction, 2, NULL, WorkSize, localWorkSize, 0, NULL, NULL); ocl_error_check(OCL_ENQUE_ND_RANGE_KERNEL, err); clFinish(device_comm_queue); c1 = clock(); printf ("\telapsed CPU CLOCKS: %f sec\n", (float) (c1-c0)/1000); ocl_copy_back_to_host_real(result, dest, M*N); clReleaseMemObject(device_array_1); clReleaseMemObject(device_array_2); clReleaseMemObject(result); clReleaseKernel(OpenCLfunction); clReleaseProgram(OpenCLProgram); }
// Main function // ********************************************************************* int main(int argc, char **argv) { if (!device_comm_queue) ocl_initialize(); const char* program_source; program_source = load_source_file(argv[1]); cl_program ocl_program = clCreateProgramWithSource(device_context, 1, (const char**)&program_source, NULL, NULL); printf("********** program created.\n"); // Build the program (OpenCL JIT compilation) char options[100]; const char* flags = "-g -w -I\""; const char* OMHOME = getenv("OPENMODELICAHOME"); const char* OMINCL = "/include/omc\""; const char* OMBIN = "/bin\""; if ( OMHOME != NULL ) { strcpy(options, flags); strcat(options, OMHOME); strcat(options, OMINCL); strcat(options, " -I\""); strcat(options, OMHOME); strcat(options, OMBIN); printf("Building OpenCL code with flags %s\n",options); cl_int err; err = clBuildProgram(ocl_program, 0, NULL, options, NULL, NULL); ocl_error_check(OCL_BUILD_PROGRAM, err); size_t size; clGetProgramBuildInfo(ocl_program, ocl_device, CL_PROGRAM_BUILD_LOG, // Get build log size 0, NULL, &size); char * log = (char*)malloc(size); clGetProgramBuildInfo(ocl_program,ocl_device,CL_PROGRAM_BUILD_LOG,size,log, NULL); printf("\t\tCL_PROGRAM_BUILD_LOG: \t%s\n", log); free(log); if(err){ printf("Errors detected in compilation of OpenCL code:\n"); exit(1); } else printf("Program built successfuly.\n"); //if no error create the binary clGetProgramInfo(ocl_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL); unsigned char * binary = (unsigned char*)malloc(size); printf("Size of program binary :\t%d\n",size); clGetProgramInfo(ocl_program, CL_PROGRAM_BINARIES, sizeof(size_t), &binary, NULL); printf("Program binary retrived.\n"); const char* binary_ext = ".bin"; char* binary_name = strcat(argv[1],binary_ext); printf("binary file name %s\n", binary_name); FILE * cache; cache = fopen(binary_name, "wb"); fwrite(binary, sizeof(char), size, cache); fclose(cache); //free(binary); err = 0; cl_program newprogram = clCreateProgramWithBinary(device_context, 1, &ocl_device, &size, (const unsigned char **)&binary, NULL, &err); if(!err) printf("Program created from binary\n"); else{ switch (err){ case CL_INVALID_CONTEXT: printf("Error building program:\n"); printf("CL_INVALID_CONTEXT \n"); break; case CL_INVALID_VALUE: printf("Error building program:\n"); printf("CL_INVALID_VALUE \n"); break; case CL_INVALID_DEVICE: printf("Error building program:\n"); printf("CL_INVALID_DEVICE \n"); break; case CL_INVALID_BINARY: printf("Error building program:\n"); printf("CL_INVALID_BINARY \n"); break; case CL_OUT_OF_HOST_MEMORY: printf("Error building program:\n"); printf("CL_OUT_OF_HOST_MEMORY \n"); break; } } return 0; } else { printf("Couldn't find OPENMODELICAHOME!\n"); exit(1); } ocl_clean_up(); return 0; }