static bool calcLut(cv::InputArray _src, cv::OutputArray _dst, const int tilesX, const int tilesY, const cv::Size tileSize, const int clipLimit, const float lutScale) { cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc); if(k.empty()) return false; cv::UMat src = _src.getUMat(); _dst.create(tilesX * tilesY, 256, CV_8UC1); cv::UMat dst = _dst.getUMat(); int tile_size[2]; tile_size[0] = tileSize.width; tile_size[1] = tileSize.height; size_t localThreads[3] = { 32, 8, 1 }; size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; int idx = 0; idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src)); idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst)); idx = k.set(idx, tile_size); idx = k.set(idx, tilesX); idx = k.set(idx, clipLimit); k.set(idx, lutScale); return k.run(2, globalThreads, localThreads, false); }
static bool transform(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _lut, const int tilesX, const int tilesY, const cv::Size & tileSize) { cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc); if(k.empty()) return false; int tile_size[2]; tile_size[0] = tileSize.width; tile_size[1] = tileSize.height; cv::UMat src = _src.getUMat(); _dst.create(src.size(), src.type()); cv::UMat dst = _dst.getUMat(); cv::UMat lut = _lut.getUMat(); size_t localThreads[3] = { 32, 8, 1 }; size_t globalThreads[3] = { (size_t)src.cols, (size_t)src.rows, 1 }; int idx = 0; idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src)); idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst)); idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut)); idx = k.set(idx, src.cols); idx = k.set(idx, src.rows); idx = k.set(idx, tile_size); idx = k.set(idx, tilesX); k.set(idx, tilesY); return k.run(2, globalThreads, localThreads, false); }
static bool calcLut(cv::InputArray _src, cv::OutputArray _dst, const int tilesX, const int tilesY, const cv::Size tileSize, const int clipLimit, const float lutScale) { cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc); bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU; cv::String opts; if(is_cpu) opts = "-D CPU "; else opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple()); cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts); if(k.empty()) return false; cv::UMat src = _src.getUMat(); _dst.create(tilesX * tilesY, 256, CV_8UC1); cv::UMat dst = _dst.getUMat(); int tile_size[2]; tile_size[0] = tileSize.width; tile_size[1] = tileSize.height; size_t localThreads[3] = { 32, 8, 1 }; size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; int idx = 0; idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src)); idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst)); idx = k.set(idx, tile_size); idx = k.set(idx, tilesX); idx = k.set(idx, clipLimit); idx = k.set(idx, lutScale); if (!k.run(2, globalThreads, localThreads, false)) return false; return true; }