CLWProgram::CLWProgram(cl_program program) : ReferenceCounter<cl_program, clRetainProgram, clReleaseProgram>(program) { cl_int status = CL_SUCCESS; cl_uint numKernels; status = clCreateKernelsInProgram(*this, 0, nullptr, &numKernels); ThrowIf(numKernels == 0, CL_BUILD_ERROR, "clCreateKernelsInProgram return 0 kernels"); ThrowIf(status != CL_SUCCESS, status, "clCreateKernelsInProgram failed"); std::vector<cl_kernel> kernels(numKernels); status = clCreateKernelsInProgram(*this, numKernels, &kernels[0], nullptr); ThrowIf(status != CL_SUCCESS, status, "clCreateKernelsInProgram failed"); std::for_each(kernels.begin(), kernels.end(), [this](cl_kernel k) { size_t size = 0; cl_int res; res = clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, 0, nullptr, &size); ThrowIf(res != CL_SUCCESS, res, "clGetKernelInfo failed"); std::vector<char> temp(size); res = clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, size, &temp[0], nullptr); ThrowIf(res != CL_SUCCESS, res, "clGetKernelInfo failed"); std::string funcName(temp.begin(), temp.end()-1); kernels_[funcName] = CLWKernel::Create(k); }); }
int main(int argc, char **argv) { cl_int err; const char *krn_src; cl_program empty, program; cl_context ctx; cl_device_id did; cl_command_queue queue; cl_uint num_krn; cl_kernel kernels[2]; poclu_get_any_device(&ctx, &did, &queue); TEST_ASSERT( ctx ); TEST_ASSERT( did ); TEST_ASSERT( queue ); /* Test creating a program from an empty source */ empty = clCreateProgramWithSource(ctx, 1, &empty_src, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(empty, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram"); err = clCreateKernelsInProgram(empty, 0, NULL, &num_krn); CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram"); TEST_ASSERT(num_krn == 0); krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl"); TEST_ASSERT(krn_src); program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram"); err = clCreateKernelsInProgram(program, 0, NULL, &num_krn); CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram"); // test_clCreateKernelsInProgram.cl has two kernel functions. TEST_ASSERT(num_krn == 2); err = clCreateKernelsInProgram(program, 2, kernels, NULL); CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram"); // make sure the kernels were actually created // Note: nothing in the specification says which kernel function // is kernels[0], which is kernels[1]. For now assume pocl/LLVM // orders these deterministacally err = clEnqueueTask(queue, kernels[0], 0, NULL, NULL); CHECK_OPENCL_ERROR_IN("clEnqueueTask"); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish"); err = clEnqueueTask(queue, kernels[1], 0, NULL, NULL); CHECK_OPENCL_ERROR_IN("clEnqueueTask"); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish"); return EXIT_SUCCESS; }
PassRefPtr<WebCLKernelList> WebCLProgram::createKernelsInProgram(ExceptionState& es) { cl_int err = 0; cl_kernel* kernelBuf = NULL; cl_uint num = 0; if (m_cl_program == NULL) { printf("Error: Invalid program object\n"); es.throwWebCLException( WebCLException::INVALID_PROGRAM, WebCLException::invalidProgramMessage); return nullptr; } err = clCreateKernelsInProgram (m_cl_program, 0, NULL, &num); if (err != CL_SUCCESS) { printf("Error: clCreateKernelsInProgram \n"); WebCLException::throwException(err, es); return nullptr; } if(num == 0) { printf("Warning: createKernelsInProgram - Number of Kernels is 0 \n"); es.throwWebCLException( WebCLException::FAILURE, WebCLException::failureMessage); return nullptr; } kernelBuf = (cl_kernel*)malloc (sizeof(cl_kernel) * num); if (!kernelBuf) { return nullptr; } err = clCreateKernelsInProgram (m_cl_program, num, kernelBuf, NULL); if (err != CL_SUCCESS) { WebCLException::throwException(err, es); } else { RefPtr<WebCLKernelList> o = WebCLKernelList::create(kernelBuf, num, m_cl_context.get(), this); printf("WebCLKernelList Size = %d \n\n\n\n", num); m_num_kernels = num; return o; } return nullptr; }
cl_int WINAPI wine_clCreateKernelsInProgram(cl_program program, cl_uint num_kernels, cl_kernel * kernels, cl_uint * num_kernels_ret) { cl_int ret; TRACE("\n"); ret = clCreateKernelsInProgram(program, num_kernels, kernels, num_kernels_ret); return ret; }
int main(int argc, char **argv) { cl_int err; const char *krn_src; cl_program program; cl_context ctx; cl_device_id did; cl_command_queue queue; cl_uint num_krn; cl_kernel kernels[2]; poclu_get_any_device(&ctx, &did, &queue); assert( ctx ); assert( did ); assert( queue ); krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl"); assert(krn_src); program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, NULL); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); assert(err == CL_SUCCESS); err = clCreateKernelsInProgram(program, 0, NULL, &num_krn); assert(err == CL_SUCCESS); // test_clCreateKernelsInProgram.cl has two kernel functions. assert(num_krn == 2); err = clCreateKernelsInProgram(program, 2, kernels, NULL); assert(err == CL_SUCCESS); // make sure the kernels were actually created // Note: nothing in the specification says which kernel function // is kernels[0], which is kernels[1]. For now assume pocl/LLVM // orders these deterministacally err = clEnqueueTask(queue, kernels[0], 0, NULL, NULL); assert(err == CL_SUCCESS); err = clEnqueueTask(queue, kernels[1], 0, NULL, NULL); assert(err == CL_SUCCESS); clFinish(queue); }
vector<Kernel> Program::createKernels() const { vector<Kernel> vec; cl_uint size; cl_int error; if((error = clCreateKernelsInProgram(_id, 0, nullptr, &size)) != CL_SUCCESS) { detail::reportError("Program::createKernels(): ", error); return vec; } vector<cl_kernel> buf(size); if(clCreateKernelsInProgram(_id, size, buf.data(), nullptr) != CL_SUCCESS) { detail::reportError("Program::createKernels(): ", error); return vec; } for(cl_uint i = 0; i < size; ++i) vec.push_back(Kernel(_ctx, buf[i])); return vec; }
cl_kernel createKernel( const char* source, cl_context context, const char* options, cl_int* error) { cl_int err; cl_device_id device; cl_program program; cl_kernel kernel; size_t logSize; char *log; err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device), &device, NULL); if (err != CL_SUCCESS) { if (error != NULL) { *error = err; } return NULL; } program = clCreateProgramWithSource(context, 1, &source, NULL, error); if (program == NULL) { return NULL; } err = clBuildProgram(program, 1, &device, options, NULL, NULL); if (err != CL_SUCCESS) { logSize = 0; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); log = (char*)calloc(1, logSize + 1); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL); printf("=== Build log ===\n%s\n", log); free(log); clReleaseProgram(program); if (error != NULL) { *error = err; } return NULL; } kernel = NULL; err = clCreateKernelsInProgram(program, 1, &kernel, NULL); clReleaseProgram(program); if (error != NULL) { *error = err; } return kernel; }
cl_int pl_load_code(PLContext *pl_ctx, PLCode *pl_code) { cl_program program; cl_int error; cl_int binary_status; program = clCreateProgramWithBinary(pl_ctx->ctx, 1, (const cl_device_id *)&pl_ctx->device_id, (const size_t *)&pl_code->len, (const u_char **)&pl_code->binary, &binary_status, &error); if (error != CL_SUCCESS) { return error; } error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (error != CL_SUCCESS) { clReleaseProgram(program); return error; } cl_uint kernel_count_ret; cl_kernel *kernels; if ((kernels = malloc(sizeof(cl_kernel) * pl_code->kernel_count)) == NULL) { clReleaseProgram(program); return CL_OUT_OF_HOST_MEMORY; } error = clCreateKernelsInProgram(program, pl_code->kernel_count, kernels, &kernel_count_ret); clReleaseProgram(program); if (error != CL_SUCCESS) { free(kernels); return error; } pl_ctx->kernel_count = kernel_count_ret; pl_ctx->kernels = kernels; return CL_SUCCESS; }
/// // main() for HelloWorld example // int main(int argc, char** argv) { cl_context context = 0; cl_command_queue commandQueue = 0; cl_program program = 0; cl_device_id device = 0; cl_kernel kernels[2] = { 0, 0 }; cl_mem memObjects[3] = { 0, 0, 0 }; cl_int errNum; // Create an OpenCL context on first available platform context = CreateContext(); if (context == NULL) { std::cerr << "Failed to create OpenCL context." << std::endl; return 1; } // Create a command-queue on the first device available // on the created context commandQueue = CreateCommandQueue(context, &device); if (commandQueue == NULL) { Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } // Create OpenCL program from HelloWorld.cl kernel source program = CreateProgram(context, device, "simple.cl"); if (program == NULL) { Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } // Create OpenCL kernel //clCreateKernel(program, "hello_kernel", NULL); cl_uint numberOfKernels = 0; errNum = clCreateKernelsInProgram(program, 0, NULL, &numberOfKernels ); if (errNum != CL_SUCCESS) { std::cerr << "Failed to get number of kernels" << std::endl; Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } else { std::cout << "numberOfKernels is:" << numberOfKernels << std::endl; } assert(numberOfKernels == 2 && "number of kernels was not as expected"); errNum = clCreateKernelsInProgram(program, 2, kernels, NULL ); if (errNum != CL_SUCCESS) { std::cerr << "Failed to retrieve kernels" << std::endl; Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } // Create memory objects that will be used as arguments to // kernels. First create host memory arrays that will be // used to store the arguments to the kernel float result[ARRAY_SIZE]; float a[ARRAY_SIZE]; float b[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) { a[i] = (float)i; b[i] = (float)(i * 2); } if (!CreateMemObjects(context, memObjects, a, b)) { Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } for (int i = 0; i < numberOfKernels; ++i) { // Set the kernel arguments (result, a, b) errNum = clSetKernelArg(kernels[i], 0, sizeof(cl_mem), &memObjects[0]); errNum |= clSetKernelArg(kernels[i], 1, sizeof(cl_mem), &memObjects[1]); errNum |= clSetKernelArg(kernels[i], 2, sizeof(cl_mem), &memObjects[2]); if (errNum != CL_SUCCESS) { std::cerr << "Error setting kernels[" << i << "] arguments." << std::endl; Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } } size_t globalWorkSize[1] = { ARRAY_SIZE }; size_t localWorkSize[1] = { 1 }; cl_event waitFor = NULL; for (int i = 0; i < numberOfKernels; ++i) { cl_uint numToWaitFor = 0; cl_event waitList[1] = { 0 }; cl_event* waitListP = 0; if (waitFor != NULL) { numToWaitFor = 1; waitList[0] = waitFor; waitListP = waitList; } // Queue the kernel up for execution across the array errNum = clEnqueueNDRangeKernel(commandQueue, kernels[i], 1, NULL, globalWorkSize, localWorkSize, numToWaitFor, waitListP, &waitFor); if (errNum != CL_SUCCESS) { std::cerr << "Error queuing kernel for execution." << std::endl; Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } } // Read the output buffer back to the Host errNum = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, ARRAY_SIZE * sizeof(float), result, 0, NULL, NULL); if (errNum != CL_SUCCESS) { std::cerr << "Error reading result buffer." << std::endl; Cleanup(context, commandQueue, program, kernels, memObjects); return 1; } // Output the result buffer for (int i = 0; i < ARRAY_SIZE; i++) { std::cout << result[i] << " "; } std::cout << std::endl; std::cout << "Executed program succesfully." << std::endl; Cleanup(context, commandQueue, program, kernels, memObjects); return 0; }
Vector<RefPtr<WebCLKernel>> WebCLProgram::createKernelsInProgram(ExceptionState& es) { if (isReleased()) { es.throwWebCLException(WebCLException::INVALID_PROGRAM, WebCLException::invalidProgramMessage); return Vector<RefPtr<WebCLKernel>>(); } if (!m_isProgramBuilt) { es.throwWebCLException(WebCLException::INVALID_PROGRAM_EXECUTABLE, WebCLException::invalidProgramExecutableMessage); return Vector<RefPtr<WebCLKernel>>(); } cl_uint num = 0; cl_int err = clCreateKernelsInProgram(m_clProgram, 0, nullptr, &num); if (err != CL_SUCCESS) { WebCLException::throwException(err, es); return Vector<RefPtr<WebCLKernel>>(); } if (num == 0) { es.throwWebCLException(WebCLException::FAILURE, WebCLException::failureMessage); return Vector<RefPtr<WebCLKernel>>(); } cl_kernel* kernelBuf = (cl_kernel*)malloc (sizeof(cl_kernel) * num); if (!kernelBuf) { return Vector<RefPtr<WebCLKernel>>(); } err = clCreateKernelsInProgram(m_clProgram, num, kernelBuf, nullptr); if (err != CL_SUCCESS) { WebCLException::throwException(err, es); return Vector<RefPtr<WebCLKernel>>(); } Vector<char> kernelName; size_t bytesOfKernelName = 0; Vector<RefPtr<WebCLKernel>> m_kernelList; for (size_t i = 0 ; i < num; i++) { err = clGetKernelInfo(kernelBuf[i], CL_KERNEL_FUNCTION_NAME, 0, nullptr, &bytesOfKernelName); if (err != CL_SUCCESS) { continue; } kernelName.reserveCapacity(bytesOfKernelName); kernelName.resize(bytesOfKernelName); err = clGetKernelInfo(kernelBuf[i], CL_KERNEL_FUNCTION_NAME, bytesOfKernelName, kernelName.data(), 0); if (err != CL_SUCCESS) { continue; } RefPtr<WebCLKernel> kernel = WebCLKernel::create(kernelBuf[i], context(), this, static_cast<const char*>(kernelName.data())); if (kernel) m_kernelList.append(kernel); kernelName.clear(); bytesOfKernelName = 0; } return m_kernelList; }
cl_int GLCLDraw::BuildFromSource(cl_program *program, const char *p) { cl_int ret; size_t codeSize; char *logBuf; char compile_options[2048]; cl_bool endian_little; compile_options[0] = '\0'; codeSize = strlen(p); *program = clCreateProgramWithSource(context, 1, (const char **)&p, (const size_t *)&codeSize, &ret); XM7_DebugLog(XM7_LOG_INFO, "CL: Build Result=%d", ret); if(ret < CL_SUCCESS) { return ret; } // Compile from source //strncat(compile_options, "-cl-fast-relaxed-math ", sizeof(compile_options) - 1); if(clGetDeviceInfo(device_id[using_device], CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool), &endian_little, NULL) == CL_SUCCESS){ if(endian_little == CL_TRUE) { strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=1 ", sizeof(compile_options) - 1); } else { // BIG strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=0 ", sizeof(compile_options) - 1); // Big endian } } else { strncat(compile_options, "-D_CL_KERNEL_LITTLE_ENDIAN=1 ", sizeof(compile_options) - 1); // Assume little endian } // build_callback = CL_LogProgramExecute; // ret = clBuildProgram(*program, 1, &device_id[using_device], compile_options, // build_callback, (void *)this); ret = clBuildProgram(*program, 1, &device_id[using_device], compile_options, NULL, NULL); XM7_DebugLog(XM7_LOG_INFO, "Compile Result=%d", ret); CL_LogProgramExecute(*program, (void *)this); if(ret != CL_SUCCESS) { // Printout error log. // clReleaseProgram(program); return ret; } ret = clCreateKernelsInProgram(*program, 1, kernels_array, &nkernels); if(ret < CL_SUCCESS) { XM7_DebugLog(XM7_LOG_INFO, "Unable to build CL kernel. Status=%d", ret); } else { char funcname[128]; int i = 0; size_t size; XM7_DebugLog(XM7_LOG_INFO, "Built %d CL kernel(s).", nkernels); #if 1 for(i = 0; i < nkernels; i++) { funcname[0] = '\0'; if(clGetKernelInfo(kernels_array[i], CL_KERNEL_FUNCTION_NAME, sizeof(funcname) / sizeof(char) - 1, funcname, size) == CL_SUCCESS){ XM7_DebugLog(XM7_LOG_INFO, "Kernel name:%s.", funcname); if((strncmp(funcname, "getvram8", strlen("getvram8")) == 0)) kernel_8colors = kernels_array[i]; if((strncmp(funcname, "getvram4096", strlen("getvram4096")) == 0)) kernel_4096colors = kernels_array[i]; if((strncmp(funcname, "getvram256k", strlen("getvram256k")) == 0)) kernel_256kcolors = kernels_array[i]; if((strncmp(funcname, "CreateTable", strlen("CreateTable")) == 0)) kernel_table = kernels_array[i]; if((strncmp(funcname, "CopyVram", strlen("CopyVram")) == 0)) kernel_copyvram = kernels_array[i]; } } #endif } return ret; }
int main(int argc, char** argv) { /* OpenCL 1.1 data structures */ cl_platform_id* platforms; cl_program program; cl_device_id device; cl_context context; /* OpenCL 1.1 scalar data types */ cl_uint numOfPlatforms; cl_int error; /* Get the number of platforms Remember that for each vendor's SDK installed on the computer, the number of available platform also increased. */ error = clGetPlatformIDs(0, NULL, &numOfPlatforms); if(error != CL_SUCCESS) { perror("Unable to find any OpenCL platforms"); exit(1); } platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms); printf("Number of OpenCL platforms found: %d\n", numOfPlatforms); error = clGetPlatformIDs(numOfPlatforms, platforms, NULL); if(error != CL_SUCCESS) { perror("Unable to find any OpenCL platforms"); exit(1); } // Search for a CPU/GPU device through the installed platforms // Build a OpenCL program and do not run it. for(cl_uint i = 0; i < numOfPlatforms; i++ ) { // Get the GPU device error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 1, &device, NULL); if(error != CL_SUCCESS) { // Otherwise, get the CPU error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, 1, &device, NULL); } if(error != CL_SUCCESS) { perror("Can't locate any OpenCL compliant device"); exit(1); } /* Create a context */ context = clCreateContext(NULL, 1, &device, NULL, NULL, &error); if(error != CL_SUCCESS) { perror("Can't create a valid OpenCL context"); exit(1); } /* Load the two source files into temporary datastores */ const char *file_names[] = {"simple.cl", "simple_2.cl"}; const int NUMBER_OF_FILES = 2; char* buffer[NUMBER_OF_FILES]; size_t sizes[NUMBER_OF_FILES]; loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes); /* Create the OpenCL program object */ program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error); if(error != CL_SUCCESS) { perror("Can't create the OpenCL program object"); exit(1); } /* Build OpenCL program object and dump the error message, if any */ char *program_log; const char options[] = "-cl-finite-math-only -cl-no-signed-zeros"; size_t log_size; //error = clBuildProgram(program, 1, &device, argv[1], NULL, NULL); // Uncomment the line below, comment the line above; re-build the program to use build options statically error = clBuildProgram(program, 1, &device, options, NULL, NULL); if(error != CL_SUCCESS) { // If there's an error whilst building the program, dump the log clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char*) malloc(log_size+1); program_log[log_size] = '\0'; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size+1, program_log, NULL); printf("\n=== ERROR ===\n\n%s\n=============\n", program_log); free(program_log); exit(1); } /* Query the program as to how many kernels were detected */ cl_uint numOfKernels; error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels); if (error != CL_SUCCESS) { perror("Unable to retrieve kernel count from program"); exit(1); } cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels); error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL); for(cl_uint i = 0; i < numOfKernels; i++) { char kernelName[32]; cl_uint argCnt; clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL); clGetKernelInfo(kernels[i], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL); printf("Kernel name: %s with arity: %d\n", kernelName, argCnt); } /* Clean up */ for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); } for(i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); } clReleaseProgram(program); clReleaseContext(context); } }
Kernel* OpenCL::createKernel( std::string strKernelSource ) { int iErr = 0; size_t sKernelLength = strKernelSource.length(); const char* program_buffer = strKernelSource.c_str(); cl_program program = clCreateProgramWithSource( m_context, 1, (const char**)&program_buffer, &sKernelLength, &iErr ); if( iErr != CL_SUCCESS ) { Log::getLog( "GPUAbstractionLayer" ) << Log::EL_ERROR << "Unable to create the program from the given source: " << strKernelSource.substr( 0, strKernelSource.find( '\n' ) ) << Log::endl; return NULL; } /* Build program */ std::string strBuildParams; // if this is a debug build and we are on a CPU use the debug option! #ifdef DEBUG cl_device_type devType; clGetDeviceInfo( m_device, CL_DEVICE_TYPE, sizeof( cl_device_type ), &devType, NULL ); if( devType == CL_DEVICE_TYPE_CPU ) strBuildParams += " -g"; #else strBuildParams += " -cl-unsafe-math-optimizations -cl-mad-enable -cl-no-signed-zeros"; #endif #ifdef MAC strBuildParams += " -DMAC"; #endif iErr = clBuildProgram( program, 0, NULL, strBuildParams.c_str(), NULL, NULL ); if( iErr != CL_SUCCESS ) { char* program_log; size_t log_size; /* Find size of log and print to std output */ clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size ); program_log = (char*)malloc( log_size + 1 ); program_log[ log_size ] = '\0'; clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL ); Log::getLog( "GPUAbstractionLayer" ) << Log::EL_FATAL_ERROR << "Error compiling the source:\n" << program_log << "\n" << Log::endl; free( program_log ); return NULL; } #ifdef DEBUG else { char* program_log; size_t log_size; /* Find size of log and print to std output */ clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size ); program_log = (char*)malloc( log_size + 1 ); program_log[ log_size ] = '\0'; clGetProgramBuildInfo( program, m_device, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL ); Log::getLog( "GPUAbstractionLayer" ) << Log::EL_INFO << "Build log:\n" << program_log << "\n" << Log::endl; free( program_log ); } #endif cl_kernel kernel; iErr = clCreateKernelsInProgram( program, 1, &kernel, NULL ); if( iErr != CL_SUCCESS ) { Log::getLog( "GPUAbstractionLayer" ) << Log::EL_ERROR << "Unable to create a kernel from the given source code" << Log::endl; return NULL; } size_t wgSize; iErr = clGetKernelWorkGroupInfo( kernel, m_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &wgSize, NULL ); if( iErr != CL_SUCCESS ) { Log::getLog( "GPUAbstractionLayer" ) << Log::EL_ERROR << "Failed to get kernel work group size (" << errorNumberToString( iErr ) << ")" << Log::endl; return NULL; } return new Kernel( m_commandQueue, kernel, std::min( (unsigned int)wgSize, m_uiMaxWorkGroupSize ) ); }
int main(int argc, char** argv) { /* OpenCL 1.1 data structures */ cl_platform_id* platforms; cl_program program; cl_device_id device; cl_context context; /* OpenCL 1.1 scalar data types */ cl_uint numOfPlatforms; cl_int error; /* Prepare an array of UserData via dynamic memory allocation */ UserData* ud_in = (UserData*) malloc( sizeof(UserData) * DATA_SIZE); // input to device UserData* ud_out = (UserData*) malloc( sizeof(UserData) * DATA_SIZE); // output from device for( int i = 0; i < DATA_SIZE; ++i) { (ud_in + i)->x = i; (ud_in + i)->y = i; (ud_in + i)->z = i; (ud_in + i)->w = 3 * i; } /* Get the number of platforms Remember that for each vendor's SDK installed on the computer, the number of available platform also increased. */ error = clGetPlatformIDs(0, NULL, &numOfPlatforms); if(error != CL_SUCCESS ) { perror("Unable to find any OpenCL platforms"); exit(1); } platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms); printf("Number of OpenCL platforms found: %d\n", numOfPlatforms); error = clGetPlatformIDs(numOfPlatforms, platforms, NULL); if(error != CL_SUCCESS ) { perror("Unable to find any OpenCL platforms"); exit(1); } // Search for a CPU/GPU device through the installed platforms // Build a OpenCL program and do not run it. for(cl_uint i = 0; i < numOfPlatforms; i++ ) { // Get the GPU device error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 1, &device, NULL); if(error != CL_SUCCESS) { // Otherwise, get the CPU error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, 1, &device, NULL); } if(error != CL_SUCCESS) { perror("Can't locate any OpenCL compliant device"); exit(1); } /* Create a context */ context = clCreateContext(NULL, 1, &device, NULL, NULL, &error); if(error != CL_SUCCESS) { perror("Can't create a valid OpenCL context"); exit(1); } /* Load the two source files into temporary datastores */ const char *file_names[] = {"user_test.cl"}; const int NUMBER_OF_FILES = 1; char* buffer[NUMBER_OF_FILES]; size_t sizes[NUMBER_OF_FILES]; loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes); /* Create the OpenCL program object */ program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error); if(error != CL_SUCCESS) { perror("Can't create the OpenCL program object"); exit(1); } /* Build OpenCL program object and dump the error message, if any */ char *program_log; size_t log_size; error = clBuildProgram(program, 1, &device, NULL, NULL, NULL); if(error != CL_SUCCESS) { // If there's an error whilst building the program, dump the log clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char*) malloc(log_size+1); program_log[log_size] = '\0'; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size+1, program_log, NULL); printf("\n=== ERROR ===\n\n%s\n=============\n", program_log); free(program_log); exit(1); } /* Query the program as to how many kernels were detected */ cl_uint numOfKernels; error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels); if (error != CL_SUCCESS) { perror("Unable to retrieve kernel count from program"); exit(1); } cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels); error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL); for(cl_uint i = 0; i < numOfKernels; i++) { char kernelName[32]; cl_uint argCnt; clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL); clGetKernelInfo(kernels[i], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL); printf("Kernel name: %s with arity: %d\n", kernelName, argCnt); printf("About to create command queue and enqueue this kernel...\n"); /* Create a command queue */ cl_command_queue cQ = clCreateCommandQueue(context, device, 0, &error); if (error != CL_SUCCESS) { perror("Unable to create command-queue"); exit(1); } /* Create a OpenCL buffer object */ cl_mem UDObj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(UserData) * DATA_SIZE, ud_in, &error); if (error != CL_SUCCESS) { perror("Unable to create buffer object"); exit(1); } /* Let OpenCL know that the kernel is suppose to receive an argument */ error = clSetKernelArg(kernels[i], 0, sizeof(cl_mem), &UDObj); if (error != CL_SUCCESS) { perror("Unable to set buffer object as kernel argument"); exit(1); } /* Enqueue the kernel to the command queue */ error = clEnqueueTask(cQ, kernels[i], 0, NULL, NULL); if (error != CL_SUCCESS) { perror("Unable to enqueue task to command-queue"); exit(1); } printf("Task has been enqueued successfully!\n"); /* Enqueue the read-back from device to host */ error = clEnqueueReadBuffer(cQ, UDObj, CL_TRUE, // blocking read 0, // write from the start sizeof(UserData) * DATA_SIZE, // how much to copy ud_out, 0, NULL, NULL); if ( valuesOK(ud_in, ud_out) ) { printf("Check passed!\n"); } else printf("Check failed!\n"); /* Release the command queue */ clReleaseCommandQueue(cQ); clReleaseMemObject(UDObj); } /* Clean up */ for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); } for(i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); } clReleaseProgram(program); clReleaseContext(context); } free(ud_in); free(ud_out); }
int main(int argc, char** argv) { /* OpenCL 1.1 data structures */ cl_platform_id* platforms; cl_program program; cl_context context; /* OpenCL 1.1 scalar data types */ cl_uint numOfPlatforms; cl_int error; cl_float16* ud_in = (cl_float16*) malloc( sizeof(cl_float16) * DATA_SIZE); // input to device cl_float16* ud_out = (cl_float16*) malloc( sizeof(cl_float16) * DATA_SIZE); // output from device for( int i = 0; i < DATA_SIZE; ++i) { ud_in[i] = (cl_float16){ (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i }; } /* Get the number of platforms Remember that for each vendor's SDK installed on the computer, the number of available platform also increased. */ error = clGetPlatformIDs(0, NULL, &numOfPlatforms); if(error != CL_SUCCESS ) { perror("Unable to find any OpenCL platforms"); exit(1); } platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms); printf("Number of OpenCL platforms found: %d\n", numOfPlatforms); error = clGetPlatformIDs(numOfPlatforms, platforms, NULL); if(error != CL_SUCCESS ) { perror("Unable to find any OpenCL platforms"); exit(1); } // Search for a CPU/GPU device through the installed platforms // Build a OpenCL program and do not run it. for(cl_uint i = 0; i < numOfPlatforms; i++ ) { cl_uint numOfDevices = 0; /* Determine how many devices are connected to your platform */ error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numOfDevices); if (error != CL_SUCCESS ) { perror("Unable to obtain any OpenCL compliant device info"); exit(1); } cl_device_id* devices = (cl_device_id*) alloca(sizeof(cl_device_id) * numOfDevices); /* Load the information about your devices into the variable 'devices' */ error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numOfDevices, devices, NULL); if (error != CL_SUCCESS ) { perror("Unable to obtain any OpenCL compliant device info"); exit(1); } printf("Number of detected OpenCL devices: %d\n", numOfDevices); /* Create a context */ cl_context_properties ctx[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], 0 }; context = clCreateContext(ctx, numOfDevices, devices, NULL, NULL, &error); if(error != CL_SUCCESS) { perror("Can't create a valid OpenCL context"); exit(1); } /* For each device, create a buffer and partition that data among the devices for compute! */ cl_mem inobj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float16) * DATA_SIZE, ud_in, &error); if(error != CL_SUCCESS) { perror("Can't create a buffer"); exit(1); } int offset = 0; for(int i = 0; i < numOfDevices; ++i, ++offset ) { /* Load the two source files into temporary datastores */ const char *file_names[] = {"vector_load.cl"}; const int NUMBER_OF_FILES = 1; char* buffer[NUMBER_OF_FILES]; size_t sizes[NUMBER_OF_FILES]; loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes); /* Create the OpenCL program object */ program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error); if(error != CL_SUCCESS) { perror("Can't create the OpenCL program object"); exit(1); } /* Build OpenCL program object and dump the error message, if any */ char *program_log; size_t log_size; error = clBuildProgram(program, 1, &devices[i], NULL, NULL, NULL); if(error != CL_SUCCESS) { // If there's an error whilst building the program, dump the log clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char*) malloc(log_size+1); program_log[log_size] = '\0'; clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, log_size+1, program_log, NULL); printf("\n=== ERROR ===\n\n%s\n=============\n", program_log); free(program_log); exit(1); } /* Query the program as to how many kernels were detected */ cl_uint numOfKernels; error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels); if (error != CL_SUCCESS) { perror("Unable to retrieve kernel count from program"); exit(1); } cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels); error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL); /* Loop thru each kernel and execute on device */ for(cl_uint j = 0; j < numOfKernels; j++) { char kernelName[32]; cl_uint argCnt; clGetKernelInfo(kernels[j], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL); clGetKernelInfo(kernels[j], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL); printf("Kernel name: %s with arity: %d\n", kernelName, argCnt); printf("About to create command queue and enqueue this kernel...\n"); /* Create a command queue */ cl_command_queue cQ = clCreateCommandQueue(context, devices[i], 0, &error); if (error != CL_SUCCESS) { perror("Unable to create command-queue"); exit(1); } /* Create a buffer and copy the data from the main buffer */ cl_mem outobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float16) * DATA_SIZE, 0, &error); if (error != CL_SUCCESS) { perror("Unable to create sub-buffer object"); exit(1); } /* Let OpenCL know that the kernel is suppose to receive an argument */ error = clSetKernelArg(kernels[j], 0, sizeof(cl_mem), &inobj); error = clSetKernelArg(kernels[j], 1, sizeof(cl_mem), &outobj); if (error != CL_SUCCESS) { perror("Unable to set buffer object in kernel"); exit(1); } /* Enqueue the kernel to the command queue */ size_t threadsPerGroup[] = {4}; size_t numOfGroups[] = { DATA_SIZE / threadsPerGroup[0] }; error = clEnqueueNDRangeKernel(cQ, kernels[j], 1, 0, numOfGroups, threadsPerGroup,0, NULL, NULL); if (error != CL_SUCCESS) { perror("Unable to enqueue task to command-queue"); exit(1); } printf("Task has been enqueued successfully!\n"); /* Enqueue the read-back from device to host */ error = clEnqueueReadBuffer(cQ, outobj, CL_TRUE, // blocking read 0, // read from the start sizeof(cl_float16)*DATA_SIZE, // how much to copy ud_out, 0, NULL, NULL); /* Check the returned data */ if ( valuesOK(ud_in, ud_out, DATA_SIZE) ) { printf("Check passed!\n"); } else printf("Check failed!\n"); /* Release the command queue */ clReleaseCommandQueue(cQ); clReleaseMemObject(outobj); } /* Clean up */ for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); } for(int i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); } clReleaseProgram(program); }// end of device loop and execution clReleaseMemObject(inobj); clReleaseContext(context); }// end of platform loop free(ud_in); free(ud_out); }
vx_status vxTargetInit(vx_target_t *target) { vx_status status = VX_ERROR_NO_RESOURCES; cl_int err = 0; vx_context context = target->base.context; cl_uint p, d, k; char *vx_incs = getenv("VX_CL_INCLUDE_DIR"); char *cl_dirs = getenv("VX_CL_SOURCE_DIR"); char cl_args[1024]; snprintf(cl_args, sizeof(cl_args), "-D VX_CL_KERNEL -I %s -I %s %s %s", (vx_incs?vx_incs:"C:\\Users\\Eric\\Desktop\\VS_OpenVX2\\example_multinode_graph\\cl_code"), cl_dirs, //#if !defined(__APPLE__) // "-D CL_USE_LUMINANCE", //#else "", //#endif #if defined(VX_INCLUDE_DIR) "-I "VX_INCLUDE_DIR" " #else " " #endif ); if (cl_dirs == NULL) { #ifdef VX_CL_SOURCE_DIR const char *sdir = VX_CL_SOURCE_DIR; int len = strlen(sdir); cl_dirs = malloc(len); strncpy(cl_dirs, sdir, len); #else return status; #endif } strncpy(target->name, name, VX_MAX_TARGET_NAME); target->priority = VX_TARGET_PRIORITY_OPENCL; context->num_platforms = CL_MAX_PLATFORMS; err = clGetPlatformIDs(CL_MAX_PLATFORMS, context->platforms, NULL); if (err != CL_SUCCESS) goto exit; for (p = 0; p < context->num_platforms; p++) { err = clGetDeviceIDs(context->platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &context->num_devices[p]); err = clGetDeviceIDs(context->platforms[p], CL_DEVICE_TYPE_ALL, context->num_devices[p] > CL_MAX_DEVICES ? CL_MAX_DEVICES : context->num_devices[p], context->devices[p], NULL); if (err == CL_SUCCESS) { cl_context_properties props[] = { (cl_context_properties)CL_CONTEXT_PLATFORM, (cl_context_properties)context->platforms[p], (cl_context_properties)0, }; for (d = 0; d < context->num_devices[p]; d++) { char deviceName[64]; cl_bool compiler = CL_FALSE; cl_bool available = CL_FALSE; cl_bool image_support = CL_FALSE; err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); CL_ERROR_MSG(err, "clGetDeviceInfo"); err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_COMPILER_AVAILABLE, sizeof(cl_bool), &compiler, NULL); CL_ERROR_MSG(err, "clGetDeviceInfo"); err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL); CL_ERROR_MSG(err, "clGetDeviceInfo"); err = clGetDeviceInfo(context->devices[p][d], CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL); CL_ERROR_MSG(err, "clGetDeviceInfo"); VX_PRINT(VX_ZONE_INFO, "Device %s (compiler=%s) (available=%s) (images=%s)\n", deviceName, (compiler?"TRUE":"FALSE"), (available?"TRUE":"FALSE"), (image_support?"TRUE":"FALSE")); } context->global[p] = clCreateContext(props, context->num_devices[p], context->devices[p], vxcl_platform_notifier, target, &err); if (err != CL_SUCCESS) break; /* check for supported formats */ if (err == CL_SUCCESS) { cl_uint f,num_entries = 0u; cl_image_format *formats = NULL; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; cl_mem_object_type type = CL_MEM_OBJECT_IMAGE2D; err = clGetSupportedImageFormats(context->global[p], flags, type, 0, NULL, &num_entries); formats = (cl_image_format *)malloc(num_entries * sizeof(cl_image_format)); err = clGetSupportedImageFormats(context->global[p], flags, type, num_entries, formats, NULL); for (f = 0; f < num_entries; f++) { char order[256]; char datat[256]; #define CASE_STRINGERIZE2(value, string) case value: strcpy(string, #value); break switch(formats[f].image_channel_order) { CASE_STRINGERIZE2(CL_R, order); CASE_STRINGERIZE2(CL_A, order); CASE_STRINGERIZE2(CL_RG, order); CASE_STRINGERIZE2(CL_RA, order); CASE_STRINGERIZE2(CL_RGB, order); CASE_STRINGERIZE2(CL_RGBA, order); CASE_STRINGERIZE2(CL_BGRA, order); CASE_STRINGERIZE2(CL_ARGB, order); CASE_STRINGERIZE2(CL_INTENSITY, order); CASE_STRINGERIZE2(CL_LUMINANCE, order); CASE_STRINGERIZE2(CL_Rx, order); CASE_STRINGERIZE2(CL_RGx, order); CASE_STRINGERIZE2(CL_RGBx, order); #if defined(CL_VERSION_1_2) && defined(cl_khr_gl_depth_images) CASE_STRINGERIZE2(CL_DEPTH, order); CASE_STRINGERIZE2(CL_DEPTH_STENCIL, order); #if defined(__APPLE__) CASE_STRINGERIZE2(CL_1RGB_APPLE, order); CASE_STRINGERIZE2(CL_BGR1_APPLE, order); CASE_STRINGERIZE2(CL_SFIXED14_APPLE, order); CASE_STRINGERIZE2(CL_BIASED_HALF_APPLE, order); CASE_STRINGERIZE2(CL_YCbYCr_APPLE, order); CASE_STRINGERIZE2(CL_CbYCrY_APPLE, order); CASE_STRINGERIZE2(CL_ABGR_APPLE, order); #endif #endif default: sprintf(order, "%x", formats[f].image_channel_order); break; } switch(formats[f].image_channel_data_type) { CASE_STRINGERIZE2(CL_SNORM_INT8, datat); CASE_STRINGERIZE2(CL_SNORM_INT16, datat); CASE_STRINGERIZE2(CL_UNORM_INT8, datat); CASE_STRINGERIZE2(CL_UNORM_INT16, datat); CASE_STRINGERIZE2(CL_UNORM_SHORT_565, datat); CASE_STRINGERIZE2(CL_UNORM_SHORT_555, datat); CASE_STRINGERIZE2(CL_UNORM_INT_101010, datat); CASE_STRINGERIZE2(CL_SIGNED_INT8, datat); CASE_STRINGERIZE2(CL_SIGNED_INT16, datat); CASE_STRINGERIZE2(CL_SIGNED_INT32, datat); CASE_STRINGERIZE2(CL_UNSIGNED_INT8, datat); CASE_STRINGERIZE2(CL_UNSIGNED_INT16, datat); CASE_STRINGERIZE2(CL_UNSIGNED_INT32, datat); CASE_STRINGERIZE2(CL_HALF_FLOAT, datat); CASE_STRINGERIZE2(CL_FLOAT, datat); #if defined(CL_VERSION_2_0) CASE_STRINGERIZE2(CL_UNORM_INT24, datat); #endif default: sprintf(order, "%x", formats[f].image_channel_data_type); break; } VX_PRINT(VX_ZONE_INFO, "%s : %s\n", order, datat); } } /* create a queue for each device */ for (d = 0; d < context->num_devices[p]; d++) { context->queues[p][d] = clCreateCommandQueue(context->global[p], context->devices[p][d], CL_QUEUE_PROFILING_ENABLE, &err); if (err == CL_SUCCESS) { } } char abs_source_path[VX_CL_MAX_PATH]; /* for each kernel */ for (k = 0; k < num_cl_kernels; k++) { char *sources = NULL; size_t programSze = 0; /* load the source file */ VX_PRINT(VX_ZONE_INFO, "Joiner: %s\n", FILE_JOINER); VX_PRINT(VX_ZONE_INFO, "Path: %s\n", cl_dirs); VX_PRINT(VX_ZONE_INFO, "Kernel[%u] File: %s\n", k, cl_kernels[k]->sourcepath); VX_PRINT(VX_ZONE_INFO, "Kernel[%u] Name: %s\n", k, cl_kernels[k]->kernelname); VX_PRINT(VX_ZONE_INFO, "Kernel[%u] ID: %s\n", k, cl_kernels[k]->description.name); int cl_dirs_len = strlen(cl_dirs); int sourcepath_len = strlen(cl_kernels[k]->sourcepath); strncpy(abs_source_path, cl_dirs, cl_dirs_len); strncpy(&abs_source_path[cl_dirs_len], cl_kernels[k]->sourcepath, sourcepath_len); abs_source_path[cl_dirs_len+sourcepath_len] = '\0'; sources = clLoadSources(abs_source_path, &programSze); VX_PRINT(VX_ZONE_INFO, "clLoadSources programSze:%d\n", programSze); /* create a program with this source */ cl_kernels[k]->program[p] = clCreateProgramWithSource(context->global[p], 1, (const char **)&sources, &programSze, &err); if (err == CL_SUCCESS) { err = clBuildProgram((cl_program)cl_kernels[k]->program[p], 1, (const cl_device_id *)context->devices, (const char *)cl_args, NULL, NULL); if (err != CL_SUCCESS) { CL_BUILD_MSG(err, "Build Error"); if (err == CL_BUILD_PROGRAM_FAILURE) { char log[10][1024]; size_t logSize = 0; clGetProgramBuildInfo((cl_program)cl_kernels[k]->program[p], (cl_device_id)context->devices[p][0], CL_PROGRAM_BUILD_LOG, sizeof(log), log, &logSize); VX_PRINT(VX_ZONE_ERROR, "%s", log); } } else { cl_int k2 = 0; cl_build_status bstatus = 0; size_t bs = 0; err = clGetProgramBuildInfo(cl_kernels[k]->program[p], context->devices[p][0], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &bstatus, &bs); VX_PRINT(VX_ZONE_INFO, "Status = %d (%d)\n", bstatus, err); /* get the cl_kernels from the program */ cl_kernels[k]->num_kernels[p] = 1; err = clCreateKernelsInProgram(cl_kernels[k]->program[p], 1, &cl_kernels[k]->kernels[p], NULL); VX_PRINT(VX_ZONE_INFO, "Found %u cl_kernels in %s (%d)\n", cl_kernels[k]->num_kernels[p], cl_kernels[k]->sourcepath, err); for (k2 = 0; (err == CL_SUCCESS) && (k2 < (cl_int)cl_kernels[k]->num_kernels[p]); k2++) { char kName[VX_MAX_KERNEL_NAME]; size_t size = 0; err = clGetKernelInfo(cl_kernels[k]->kernels[p], CL_KERNEL_FUNCTION_NAME, 0, NULL, &size); err = clGetKernelInfo(cl_kernels[k]->kernels[p], CL_KERNEL_FUNCTION_NAME, size, kName, NULL); VX_PRINT(VX_ZONE_INFO, "Kernel %s\n", kName); if (strncmp(kName, cl_kernels[k]->kernelname, VX_MAX_KERNEL_NAME) == 0) { vx_kernel_f kfunc = cl_kernels[k]->description.function; VX_PRINT(VX_ZONE_INFO, "Linked Kernel %s on target %s\n", cl_kernels[k]->kernelname, target->name); target->num_kernels++; target->base.context->num_kernels++; status = vxInitializeKernel(target->base.context, &target->kernels[k], cl_kernels[k]->description.enumeration, (kfunc == NULL ? vxclCallOpenCLKernel : kfunc), cl_kernels[k]->description.name, cl_kernels[k]->description.parameters, cl_kernels[k]->description.numParams, cl_kernels[k]->description.input_validate, cl_kernels[k]->description.output_validate, cl_kernels[k]->description.initialize, cl_kernels[k]->description.deinitialize); if (vxIsKernelUnique(&target->kernels[k]) == vx_true_e) { target->base.context->num_unique_kernels++; } else { VX_PRINT(VX_ZONE_KERNEL, "Kernel %s is NOT unqiue\n", target->kernels[k].name); } } } } } else { CL_ERROR_MSG(err, "Program"); } free(sources); } } } exit: if (err == CL_SUCCESS) { status = VX_SUCCESS; } else { status = VX_ERROR_NO_RESOURCES; } return status; }