void Program::build(const std::string& options) { bool createdProgramsFromSource = false; if (_clPrograms.empty()) { createProgramsFromSource(); createdProgramsFromSource = true; } try { // build program for each device // TODO: how to build the program only for a subset of devices? for (auto& devicePtr : globalDeviceList) { _clPrograms[devicePtr->id()].build( std::vector<cl::Device>(1, devicePtr->clDevice()), options.c_str() ); } if (createdProgramsFromSource) { saveBinary(); } if (util::envVarValue("SKELCL_PRINT_BUILD_LOG") == "YES") { printBuildLog(); } } catch (cl::Error& err) { if (err.err() == CL_BUILD_PROGRAM_FAILURE) { LOG_ERROR(err); printBuildLog(); ABORT_WITH_ERROR(err); } else { ABORT_WITH_ERROR(err); } } }
void createKernel(const char* fileName,const char* kernelName){ size_t sourceSize; const char* kernelSourceCode = loadKernelSourceCode(fileName,&sourceSize); kernelProgramm = clCreateProgramWithSource(contextHandle,1,&kernelSourceCode,&sourceSize,&openCLErrorID); // printf("clCreateProgramWithSource: %d\n",openCLErrorID); size_t retSourceSize; openCLErrorID = clGetProgramInfo(kernelProgramm,CL_PROGRAM_SOURCE,NULL,NULL,&retSourceSize); char retSource[retSourceSize]; openCLErrorID = clGetProgramInfo(kernelProgramm,CL_PROGRAM_SOURCE,retSourceSize,retSource,NULL); printf("Kernel Source Code:\n--------------\n%s\n--------------\n",retSource); // compile an OpenCL Programm openCLErrorID = clBuildProgram(kernelProgramm,1,&deviceHandle,NULL,NULL,NULL); if(openCLErrorID != 0) { printBuildLog(kernelProgramm,deviceHandle); exit(0); } // printf("clBuildProgram: %d\n",openCLErrorID); // create an OpenCL Kernel Object kernel = clCreateKernel(kernelProgramm,kernelName,&openCLErrorID); // printf("clCreateKernel: %d\n",openCLErrorID); }
void OpenCLExecuter::ocl_init_progam(string fn) { cl_int err; // debugging variables //Load and compile the CL program char* cSourceCL = NULL; // Buffer to hold source for compilation size_t szKernelLength; // Byte size of kernel code string cSourceFile; // String containing our .cl kernel file int filelen; // Read the OpenCL kernel in from source file cSourceCL = ocl_file_contents(fn.data(), &filelen); if(cSourceCL==NULL) { printf("ERROR: Unable to open %s for reading\n", fn.data()); return; } szKernelLength = (size_t)filelen; //printf("file: %s\n", cSourceCL); // Create the program if(cpProgram)clReleaseProgram(cpProgram); cpProgram = clCreateProgramWithSource (ocl_wrapper->context, 1, (const char **)&cSourceCL, &szKernelLength, &err); printf("OPENCL: clCreateProgramWithSource: %s\n", ocl_wrapper->get_error(err)); // Build the program err = clBuildProgram (cpProgram, 0, NULL, NULL, NULL, NULL); printf("OPENCL: clBuildProgram: %s\n", ocl_wrapper->get_error(err)); // Get error log printBuildLog(&cpProgram); free(cSourceCL); }
// Host code int main(int argc, char** argv) { setbuf(stdout, NULL); printf("Simple vector addition\n"); initCL(); int N = 256; size_t size = N * sizeof(float); // Allocate input vectors h_A, h_B and h_C in host memory h_A = (float*)malloc(size); if (h_A == 0) Cleanup(); h_B = (float*)malloc(size); if (h_B == 0) Cleanup(); h_C = (float*)malloc(size); if (h_C == 0) Cleanup(); h_E = (float*)malloc(size); if (h_E == 0) Cleanup(); h_D = (float*)malloc(size); if (h_D == 0) Cleanup(); // Initialize input vectors RandomInit(h_A, N); RandomInit(h_B, N); RandomInit(h_D, N); // Allocate vectors in device memory // Copy vectors from host memory to device memory d_A = clCreateBuffer(contextHandle,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, h_A, &openCLErrorID); d_B = clCreateBuffer(contextHandle,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, h_B, &openCLErrorID); d_C = clCreateBuffer(contextHandle,CL_MEM_READ_WRITE, size, NULL, &openCLErrorID); d_D = clCreateBuffer(contextHandle,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,size, h_D, &openCLErrorID); d_E = clCreateBuffer(contextHandle,CL_MEM_READ_WRITE, size, NULL, &openCLErrorID); size_t sourceSize; const char* kernelSourceCode = loadKernelSourceCode("VecAdd.cl",&sourceSize); kernelProgramm = clCreateProgramWithSource(contextHandle,1,&kernelSourceCode,&sourceSize,&openCLErrorID); // printf("clCreateProgramWithSource: %d\n",openCLErrorID); size_t retSourceSize; openCLErrorID = clGetProgramInfo(kernelProgramm,CL_PROGRAM_SOURCE,NULL,NULL,&retSourceSize); char retSource[retSourceSize]; openCLErrorID = clGetProgramInfo(kernelProgramm,CL_PROGRAM_SOURCE,retSourceSize,retSource,NULL); printf("Kernel Source Code:\n--------------\n%s\n--------------\n",retSource); // compile an OpenCL Programm openCLErrorID = clBuildProgram(kernelProgramm,1,&deviceHandle,NULL,NULL,NULL); if(openCLErrorID != 0) printBuildLog(kernelProgramm,deviceHandle); // printf("clBuildProgram: %d\n",openCLErrorID); // create an OpenCL Kernel Object kernel = clCreateKernel(kernelProgramm,"VecAdd",&openCLErrorID); // printf("clCreateKernel: %d\n",openCLErrorID); openCLErrorID = clSetKernelArg(kernel,0,sizeof(cl_mem),&d_A); openCLErrorID = clSetKernelArg(kernel,1,sizeof(cl_mem),&d_B); openCLErrorID = clSetKernelArg(kernel,2,sizeof(cl_mem),&d_C); openCLErrorID = clSetKernelArg(kernel,3,sizeof(cl_mem),&d_D); openCLErrorID = clSetKernelArg(kernel,4,sizeof(cl_mem),&d_E); size_t globalWorkSize = 256; size_t localWorkSize = 1; // Invoke kernel openCLErrorID = clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0,NULL,NULL); // printf("clEnqueueNDRangeKernel: %d\n",openCLErrorID); // Copy result from device memory to host memory // h_C contains the result in host memory openCLErrorID = clEnqueueReadBuffer(commandQueue,d_C, CL_TRUE, 0, size, h_C, 0, NULL, NULL); // printf("clEnqueueReadBuffer C: %d\n",openCLErrorID); openCLErrorID = clEnqueueReadBuffer(commandQueue,d_E, CL_TRUE, 0, size, h_E, 0, NULL, NULL); // printf("clEnqueueReadBuffer E: %d\n",openCLErrorID); // Verify result // DONE: Print out E and verify the result. int i = 0; for (i = 0; i < N; ++i) { float sum = h_A[i] + h_B[i]; //printf("%f + %f = %f\n", h_A[i], h_B[i], h_C[i]); if (fabs(h_C[i] - sum) > 1e-5) break; float sumMult = h_A[i] + h_B[i] * h_D[i]; //printf("%f + %f * %f= %f\n", h_A[i], h_B[i],h_D[i], h_E[i]); if (fabs(h_E[i] - sumMult) > 1e-5) break; } printf("%s, i = %d\n", (i == N) ? "PASSED" : "FAILED",i); Cleanup(); }