int buildPointList_gpu(const oclMat& src, oclMat& list) { const int PIXELS_PER_THREAD = 16; int totalCount = 0; int err = CL_SUCCESS; cl_mem counter = clCreateBuffer(src.clCxt->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), &totalCount, &err); openCLSafeCall(err); const size_t blkSizeX = 32; const size_t blkSizeY = 4; size_t localThreads[3] = { blkSizeX, blkSizeY, 1 }; const int PIXELS_PER_BLOCK = blkSizeX * PIXELS_PER_THREAD; const size_t glbSizeX = src.cols % (PIXELS_PER_BLOCK) == 0 ? src.cols : MUL_UP(src.cols, PIXELS_PER_BLOCK); const size_t glbSizeY = src.rows % blkSizeY == 0 ? src.rows : MUL_UP(src.rows, blkSizeY); size_t globalThreads[3] = { glbSizeX, glbSizeY, 1 }; vector<pair<size_t , const void *> > args; args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&list.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&counter )); openCLExecuteKernel(src.clCxt, &imgproc_hough, "buildPointList", globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(src.clCxt->impl->clCmdQueue, counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL)); openCLSafeCall(clReleaseMemObject(counter)); return totalCount; }
cl_mem bindTexture(const oclMat &mat) { cl_mem texture; cl_image_format format; int err; int depth = mat.depth(); int channels = mat.channels(); switch(depth) { case CV_8U: format.image_channel_data_type = CL_UNSIGNED_INT8; break; case CV_32S: format.image_channel_data_type = CL_UNSIGNED_INT32; break; case CV_32F: format.image_channel_data_type = CL_FLOAT; break; default: throw std::exception(); break; } switch(channels) { case 1: format.image_channel_order = CL_R; break; case 3: format.image_channel_order = CL_RGB; break; case 4: format.image_channel_order = CL_RGBA; break; default: throw std::exception(); break; } #if CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = mat.cols; desc.image_height = mat.rows; desc.image_depth = 0; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); #else texture = clCreateImage2D( mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, mat.cols, mat.rows, 0, NULL, &err); #endif size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.cols, mat.rows, 1 }; cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); } else { devData = (cl_mem)mat.data; } clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { clFinish(mat.clCxt->impl->clCmdQueue); clReleaseMemObject(devData); } openCLSafeCall(err); return texture; }
cl_mem bindTexture(const oclMat &mat) { cl_mem texture; cl_image_format format; int err; int depth = mat.depth(); int channels = mat.oclchannels(); switch(depth) { case CV_8U: format.image_channel_data_type = CL_UNSIGNED_INT8; break; case CV_32S: format.image_channel_data_type = CL_UNSIGNED_INT32; break; case CV_32F: format.image_channel_data_type = CL_FLOAT; break; default: CV_Error(-1, "Image forma is not supported"); break; } switch(channels) { case 1: format.image_channel_order = CL_R; break; case 3: format.image_channel_order = CL_RGB; break; case 4: format.image_channel_order = CL_RGBA; break; default: CV_Error(-1, "Image format is not supported"); break; } #ifdef CL_VERSION_1_2 //this enables backwards portability to //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2)) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = mat.cols; desc.image_height = mat.rows; desc.image_depth = 0; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); } else #endif { texture = clCreateImage2D( *(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, mat.cols, mat.rows, 0, NULL, &err); } size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.cols, mat.rows, 1 }; cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); } else { devData = (cl_mem)mat.data; } clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); clReleaseMemObject(devData); } openCLSafeCall(err); return texture; }
cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const String& options) { cl_int status = 0; cl_program program = NULL; std::vector<char> binary; if (!enable_disk_cache || !readConfigurationFromFile(options, binary)) { program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status); openCLVerifyCall(status); cl_device_id device = getClDeviceID(ctx); status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); if(status == CL_SUCCESS) { if (enable_disk_cache) { size_t binarySize; openCLSafeCall(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, NULL)); std::vector<char> binary(binarySize); char* ptr = &binary[0]; openCLSafeCall(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char*), &ptr, NULL)); if (!writeConfigurationToFile(options, binary)) { std::cerr << "Can't write data to file: " << fileName_ << std::endl; } } } } else { cl_device_id device = getClDeviceID(ctx); size_t size = binary.size(); const char* ptr = &binary[0]; program = clCreateProgramWithBinary(getClContext(ctx), 1, &device, (const size_t *)&size, (const unsigned char **)&ptr, NULL, &status); openCLVerifyCall(status); status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); } if(status != CL_SUCCESS) { if (status == CL_BUILD_PROGRAM_FAILURE || status == CL_INVALID_BUILD_OPTIONS) { size_t buildLogSize = 0; openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize)); std::vector<char> buildLog; buildLog.resize(buildLogSize); memset(&buildLog[0], 0, buildLogSize); openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[0], NULL)); std::cout << std::endl << "BUILD LOG: " << (source->name ? source->name : "dynamic program") << ": " << options << "\n"; std::cout << &buildLog[0] << std::endl; } openCLVerifyCall(status); } return program; }
cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const String& options) { cl_int status = 0; cl_program program = NULL; std::vector<char> binary; if (!enable_disk_cache || !readConfigurationFromFile(options, binary)) { program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status); openCLVerifyCall(status); cl_device_id device = getClDeviceID(ctx); status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); if(status == CL_SUCCESS) { if (enable_disk_cache) { size_t binarySize; openCLSafeCall(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, NULL)); std::vector<char> binary(binarySize); char* ptr = &binary[0]; openCLSafeCall(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char*), &ptr, NULL)); if (!writeConfigurationToFile(options, binary)) { std::cerr << "Can't write data to file: " << fileName_ << std::endl; } } } } else { cl_device_id device = getClDeviceID(ctx); size_t size = binary.size(); const char* ptr = &binary[0]; program = clCreateProgramWithBinary(getClContext(ctx), 1, &device, (const size_t *)&size, (const unsigned char **)&ptr, NULL, &status); openCLVerifyCall(status); status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); } if(status != CL_SUCCESS) { if(status == CL_BUILD_PROGRAM_FAILURE) { cl_int logStatus; char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo(program, getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize); if(logStatus != CL_SUCCESS) std::cout << "Failed to build the program and get the build info." << std::endl; buildLog = new char[buildLogSize]; CV_DbgAssert(!!buildLog); memset(buildLog, 0, buildLogSize); openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); std::cout << "\nBUILD LOG: " << options << "\n"; std::cout << buildLog << std::endl; delete [] buildLog; } openCLVerifyCall(status); } return program; }