static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char channelMap[] = { ' ', ' ', '2', '4', '4' }; std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]); oclMat m(Mat(1, 1, dst.type(), scalar)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&m.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); }
void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); vector<pair<size_t , const void *> > args; cl_float4 val; val.s[0] = scalar.val[0]; val.s[1] = scalar.val[1]; val.s[2] = scalar.val[2]; val.s[3] = scalar.val[3]; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = 1; if(dst.type() == CV_8UC1) { globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; } int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); args.push_back( make_pair( sizeof(cl_float4) , (void *)&val )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, localThreads, args, dst.channels(), dst.depth()); }
/////////////////////////////////////////////////////////////////////////// //////////////////////////////// ConvertTo //////////////////////////////// /////////////////////////////////////////////////////////////////////////// static void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta) { std::string kernelName = "convert_to_S"; std::stringstream idxStr; idxStr << src.depth(); kernelName += idxStr.str(); float alpha_f = (float)alpha, beta_f = (float)beta; CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols); std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = 1; int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); if(dst.type() == CV_8UC1) { globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; } args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f )); args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f )); openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads, localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH); }
void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho_c, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, oclMat &u1, oclMat &u2, oclMat &error, float l_t, float theta, char calc_error) { Context* clCxt = I1wx.clCxt; size_t localThread[] = {32, 8, 1}; size_t globalThread[] = { I1wx.cols, I1wx.rows, 1 }; int I1wx_element_size = I1wx.elemSize(); int I1wx_step = I1wx.step/I1wx_element_size; int u1_element_size = u1.elemSize(); int u1_step = u1.step/u1_element_size; int u2_element_size = u2.elemSize(); int u2_step = u2.step/u2_element_size; int u1_offset_y = u1.offset/u1.step; int u1_offset_x = u1.offset%u1.step; u1_offset_x = u1_offset_x/u1.elemSize(); int u2_offset_y = u2.offset/u2.step; int u2_offset_x = u2.offset%u2.step; u2_offset_x = u2_offset_x/u2.elemSize(); String kernelName = "estimateUKernel"; vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols)); args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows)); args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step)); args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step)); args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data)); args.push_back( make_pair( sizeof(cl_float), (void*)&l_t)); args.push_back( make_pair( sizeof(cl_float), (void*)&theta)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error)); openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); }
static cl_mem bindTexture(const oclMat &mat, int depth, int channels) { cl_mem texture; cl_image_format format; int err; if(depth == 0) { format.image_channel_data_type = CL_UNSIGNED_INT8; } else if(depth == 5) { format.image_channel_data_type = CL_FLOAT; } if(channels == 1) { format.image_channel_order = CL_R; } else if(channels == 3) { format.image_channel_order = CL_RGB; } else if(channels == 4) { format.image_channel_order = CL_RGBA; } #ifdef CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = mat.step / mat.elemSize(); desc.image_height = mat.rows; desc.image_depth = NULL; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); #else texture = clCreateImage2D( mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, mat.step / mat.elemSize(), mat.rows, 0, NULL, &err); #endif size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 }; clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0); openCLSafeCall(err); return texture; }
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName) { std::vector<std::pair<size_t , const void *> > args; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); if (dst.type() == CV_8UC1) globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char channelMap[] = { ' ', ' ', '2', '4', '4' }; std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]); Mat mat(1, 1, dst.type(), scalar); #ifdef CL_VERSION_1_2 // this enables backwards portability to // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && dst.offset == 0 && dst.cols == dst.wholecols) { const int sizeofMap[][7] = { { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) }, { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) }, { 0 , 0 , 0 , 0 , 0 , 0 , 0 }, { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) }, }; int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, 0, dst.step * dst.rows, 0, NULL, NULL); } else #endif { oclMat m(mat); args.push_back( std::make_pair( sizeof(cl_mem) , (void*)&m.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel )); openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } }
void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut) { Context *clCxt = u1.clCxt; size_t localThread[] = {32, 8, 1}; size_t globalThread[] = { u1.cols, u1.rows, 1 }; int u1_element_size = u1.elemSize(); int u1_step = u1.step/u1_element_size; int u2_element_size = u2.elemSize(); int u2_step = u2.step/u2_element_size; int p11_element_size = p11.elemSize(); int p11_step = p11.step/p11_element_size; int u1_offset_y = u1.offset/u1.step; int u1_offset_x = u1.offset%u1.step; u1_offset_x = u1_offset_x/u1.elemSize(); int u2_offset_y = u2.offset/u2.step; int u2_offset_x = u2.offset%u2.step; u2_offset_x = u2_offset_x/u2.elemSize(); String kernelName = "estimateDualVariablesKernel"; vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step)); args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data)); args.push_back( make_pair( sizeof(cl_float), (void*)&taut)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); }
static void matmul_poly(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1, double degree1, bool flag) { Context *clCxt = Context::getContext(); String kernelName = "svm_poly"; int src_step = (int)src.step / src.elemSize(); int src2_step = (int)src2.step / src2.elemSize(); int dst_step = (int)dst.step / dst.elemSize(); int x = MIN(16, src_rows); int y = MIN(16, src2_cols); size_t localThreads[] = {x, y, 1}; size_t globalThreads[] = {src2_cols, src_rows, 1}; int width = var_count; char build_options[50]; if(flag) { sprintf(build_options, "-D ADDPOW"); } std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src2.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&width)); float alpha = 0.0f, beta = 0.0f, degree = 0.0f; if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { alpha = (float)alpha1; beta = (float)beta1; degree = (float)degree1; args.push_back(std::make_pair(sizeof(cl_float), (void* )&alpha)); args.push_back(std::make_pair(sizeof(cl_float), (void* )&beta)); args.push_back(std::make_pair(sizeof(cl_float), (void* )°ree)); } else { args.push_back(std::make_pair(sizeof(cl_double), (void* )&alpha1)); args.push_back(std::make_pair(sizeof(cl_double), (void* )&beta1)); args.push_back(std::make_pair(sizeof(cl_double), (void* )°ree1)); } openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options); }
void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat ¢ers) { //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) //{ // CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double"); // return; //} Context *clCxt = src.clCxt; int labels_step = (int)(labels.step/labels.elemSize()); String kernelname = "distanceToCenters"; int threadNum = src.rows > 256 ? 256 : src.rows; size_t localThreads[3] = {1, threadNum, 1}; size_t globalThreads[3] = {1, src.rows, 1}; std::vector<std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_int), (void *)&labels_step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers.rows)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&labels.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)¢ers.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dists.data)); openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL); }
static void copyTo(const oclMat &src, oclMat &m ) { CV_DbgAssert(!src.empty()); m.create(src.size(), src.type()); openCLCopyBuffer2D(src.clCxt, m.data, m.step, m.offset, src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset); }
void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures) { Context* clCxt = Context::getContext(); size_t local_thread[] = {32, 8, 1}; size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1}; int weight_step = (int)(weight.step/weight.elemSize()); int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize()); int mean_step = (int)(mean.step/mean.elemSize()); int dst_step = (int)(dst.step/dst.elemSize()); int dst_y = (int)(dst.offset/dst.step); int dst_x = (int)(dst.offset%dst.step); dst_x = dst_x/(int)dst.elemSize(); String kernel_name = "getBackgroundImage2_kernel"; std::vector<std::pair<size_t, const void*> > args; char build_option[50]; if(cn == 1) { snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); }else { snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); } args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data)); args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); }
static void lkSparse_run(oclMat &I, oclMat &J, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount, int level, /*dim3 block, */dim3 patch, Size winSize, int iters) { Context *clCxt = I.clCxt; int elemCntPerRow = I.step / I.elemSize(); std::string kernelName = "lkSparse"; size_t localThreads[3] = { 8, 8, 1 }; size_t globalThreads[3] = { 8 * ptcount, 8, 1}; int cn = I.oclchannels(); char calcErr; if (level == 0) { calcErr = 1; } else { calcErr = 0; } std::vector<std::pair<size_t , const void *> > args; cl_mem ITex = bindTexture(I); cl_mem JTex = bindTexture(J); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&level )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr )); try { openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); } catch(Exception&) { printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n"); releaseTexture(ITex); releaseTexture(JTex); ITex = (cl_mem)I.data; JTex = (cl_mem)J.data; localThreads[1] = globalThreads[1] = 32; args.insert( args.begin()+11, std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); } }
static void lkSparse_run(oclMat &I, oclMat &J, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount, int level, /*dim3 block, */dim3 patch, Size winSize, int iters) { Context *clCxt = I.clCxt; int elemCntPerRow = I.step / I.elemSize(); String kernelName = "lkSparse"; bool isImageSupported = support_image2d(); size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 }; size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1}; int cn = I.oclchannels(); char calcErr; if (level == 0) { calcErr = 1; } else { calcErr = 0; } std::vector<std::pair<size_t , const void *> > args; cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data; cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&level )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols )); if (!isImageSupported) args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr )); if(isImageSupported) { openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); releaseTexture(ITex); releaseTexture(JTex); } else { openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); } }
static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var, int nmixtures, float varThreshold, float backgroundRatio) { Context* clCxt = Context::getContext(); size_t local_thread[] = {32, 8, 1}; size_t global_thread[] = {frame.cols, frame.rows, 1}; int frame_step = (int)(frame.step/frame.elemSize()); int fgmask_step = (int)(fgmask.step/fgmask.elemSize()); int weight_step = (int)(weight.step/weight.elemSize()); int mean_step = (int)(mean.step/mean.elemSize()); int var_step = (int)(var.step/var.elemSize()); int fgmask_offset_y = (int)(fgmask.offset/fgmask.step); int fgmask_offset_x = (int)(fgmask.offset%fgmask.step); fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize(); int frame_offset_y = (int)(frame.offset/frame.step); int frame_offset_x = (int)(frame.offset%frame.step); frame_offset_x = frame_offset_x/(int)frame.elemSize(); char build_option[50]; if(cn == 1) { snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); }else { snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); } String kernel_name = "mog_withoutLearning_kernel"; std::vector<std::pair<size_t, const void*> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step)); args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold)); args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x)); args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); }
static void lkSparse_run(oclMat &I, oclMat &J, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount, int level, /*dim3 block, */dim3 patch, Size winSize, int iters) { Context *clCxt = I.clCxt; int elemCntPerRow = I.step / I.elemSize(); String kernelName = "lkSparse"; bool isImageSupported = support_image2d(); size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 }; size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1}; int cn = I.oclchannels(); char calcErr = level==0?1:0; std::vector<std::pair<size_t , const void *> > args; cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data; cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&level )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols )); if (!isImageSupported) args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr )); if(isImageSupported) { std::stringstream idxStr; idxStr << kernelName.c_str() << "_C" << I.oclchannels() << "_D" << I.depth(); cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str().c_str()); int wave_size = (int)queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); static char opt[32] = {0}; sprintf(opt, " -D WAVE_SIZE=%d", wave_size); openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt, CLFLUSH); releaseTexture(ITex); releaseTexture(JTex); } else openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); }
void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &xmap, oclMat &ymap) { CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F); CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F); CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous()); Mat K_Rinv = K * R.t(); CV_Assert(K_Rinv.isContinuous()); Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3 KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1); KRT_mat(Range::all(), Range(9, 11)) = T; oclMat KRT_oclMat(KRT_mat); // transfer K_Rinv and T into a single cl_mem xmap.create(dst_roi.size(), CV_32F); ymap.create(dst_roi.size(), CV_32F); int tl_u = dst_roi.tl().x; int tl_v = dst_roi.tl().y; int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); std::vector< std::pair<size_t, const void *> > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KRT_mat.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; size_t localThreads[3] = { 32, 8, 1 }; openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1); }
/////////////////////////////////////////////////////////////////////////// ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols && src.rows == dst.rows && src.cols == dst.cols); vector<pair<size_t , const void *> > args; int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1}, {2, 2, 1, 1, 1, 1, 1}, {8, 8, 8, 8 , 4, 4, 4}, //vector length is undefined when channels = 3 {1, 1, 1, 1, 1, 1, 1} }; size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; int vector_length = vector_lengths[dst.channels() -1][dst.depth()]; int offset_cols = divUp(dst.offset, dst.elemSize()) & (vector_length - 1); int cols = vector_length == 1 ? divUp(dst.cols, vector_length) : divUp(dst.cols + offset_cols, vector_length); globalThreads[0] = divUp(cols, localThreads[0]) * localThreads[0]; globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1]; globalThreads[2] = 1; int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads, localThreads, args, dst.channels(), dst.depth()); }
static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, int src2_cols, int var_count, double gamma1, bool flag) { Context *clCxt = Context::getContext(); String kernelName = "svm_rbf"; int width = var_count; int src_step = (int)src.step / src.elemSize(); int src_e_step = (int)src_e.step / src_e.elemSize(); int dst_step = (int)dst.step / dst.elemSize(); int x = MIN(16, src_rows); int y = MIN(16, src2_cols); size_t localThreads[] = {x, y, 1}; size_t globalThreads[] = {src2_cols, src_rows, 1}; char build_options[50]; if(flag) sprintf(build_options, "-D ADDEXP"); std::vector< std::pair<size_t, const void *> > args; args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src_e.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_e_step)); args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols)); args.push_back(std::make_pair(sizeof(cl_int), (void* )&width)); float gamma = 0.0f; if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { gamma = (float)gamma1; args.push_back(std::make_pair(sizeof(cl_float), (void* )&gamma)); } else args.push_back(std::make_pair(sizeof(cl_double), (void* )&gamma1)); openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options); }
void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap) { CV_Assert(M.rows == 3 && M.cols == 3); CV_Assert(dsize.area() > 0); xmap.create(dsize, CV_32FC1); ymap.create(dsize, CV_32FC1); float coeffs[3 * 3]; Mat coeffsMat(3, 3, CV_32F, (void *)coeffs); if (inverse) M.convertTo(coeffsMat, coeffsMat.type()); else { cv::Mat iM; invert(M, iM); iM.convertTo(coeffsMat, coeffsMat.type()); } oclMat coeffsOclMat(coeffsMat.reshape(1, 1)); int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); std::vector< std::pair<size_t, const void *> > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1); }
/////////////////////////////////////////////////////////////////////////// ////////////////////////////////// CopyTo ///////////////////////////////// /////////////////////////////////////////////////////////////////////////// static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName) { CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols && src.rows == dst.rows && src.cols == dst.cols && mask.type() == CV_8UC1); vector<pair<size_t , const void *> > args; std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"}, {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"}, {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"}, {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"} }; char compile_option[32]; sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str()); size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0]; globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1]; globalThreads[2] = 1; int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize(); int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize(); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); }
// FIXME: // This function cannot sort arrays with duplicated keys static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan) { CV_Error(-1, "This function is incorrect at the moment."); Context * cxt = Context::getContext(); size_t globalThreads[3] = {vecSize, 1, 1}; std::vector< std::pair<size_t, const void *> > args; char build_opt_buf [100]; genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf); //local String kernelname = "selectionSortLocal"; #ifdef ANDROID int lds_size = cxt->getDeviceInfo().maxWorkGroupSize * keys.elemSize(); #else int lds_size = GROUP_SIZE * keys.elemSize(); #endif args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize)); args.push_back(std::make_pair(lds_size, (void*)NULL)); #ifdef ANDROID openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf); #else size_t localThreads[3] = {GROUP_SIZE, 1, 1}; openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf); #endif //final kernelname = "selectionSortFinal"; args.pop_back(); #ifdef ANDROID openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf); #else openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf); #endif }
void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &xmap, oclMat &ymap) { CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F); CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F); Mat K_Rinv = K * R.t(); CV_Assert(K_Rinv.isContinuous()); oclMat KR_oclMat(K_Rinv.reshape(1, 1)); xmap.create(dst_roi.size(), CV_32F); ymap.create(dst_roi.size(), CV_32F); int tl_u = dst_roi.tl().x; int tl_v = dst_roi.tl().y; int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize(); int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize(); std::vector< std::pair<size_t, const void *> > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset)); args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 }; size_t localThreads[3] = { 32, 8, 1 }; openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1); }
void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio) { Context* clCxt = Context::getContext(); size_t local_thread[] = {32, 8, 1}; size_t global_thread[] = {(size_t)dst.cols, (size_t)dst.rows, 1}; int weight_step = (int)(weight.step/weight.elemSize()); int mean_step = (int)(mean.step/mean.elemSize()); int dst_step = (int)(dst.step/dst.elemSize()); char build_option[50]; if(cn == 1) { snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures); }else { snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures); } String kernel_name = "getBackgroundImage_kernel"; vector< pair<size_t, const void*> > args; args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data)); args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data)); args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data)); args.push_back(make_pair(sizeof(cl_int), (void*)&dst.rows)); args.push_back(make_pair(sizeof(cl_int), (void*)&dst.cols)); args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step)); args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step)); args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio)); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); }
void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2, oclMat &dst) { CV_Assert(src1.depth() <= CV_32F); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() && weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1); dst.create(src1.size(), src1.type()); size_t globalSize[] = { (size_t)dst.cols, (size_t)dst.rows, 1}; size_t localSize[] = { 16, 16, 1 }; int depth = dst.depth(), ocn = dst.oclchannels(); int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize(); int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize(); int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize(); int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize(); int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); const char * const channelMap[] = { "", "", "2", "4", "4" }; const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s", typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn], depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]); vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args, -1, -1, buildOptions.c_str()); }
void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy) { Context *clCxt = src.clCxt; size_t localThreads[3] = {32, 8, 1}; size_t globalThreads[3] = {src.cols, src.rows, 1}; int srcElementSize = src.elemSize(); int src_step = src.step/srcElementSize; int dElememntSize = dx.elemSize(); int dx_step = dx.step/dElememntSize; String kernelName = "centeredGradientKernel"; vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols)); args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows)); args.push_back( make_pair( sizeof(cl_int), (void*)&src_step)); args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step)); openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1); }
int cv::ocl::FAST_OCL::nonmaxSupressionOCL(oclMat& keypoints) { size_t localThreads[3] = {256, 1, 1}; size_t globalThreads[3] = {count_, 1, 1}; Context *clCxt = Context::getContext(); String kernelName = "nonmaxSupression"; std::vector< std::pair<size_t, const void *> > args; int counter = 0; int err = CL_SUCCESS; cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_COPY_HOST_PTR, sizeof(int), &counter, &err); int kpLocStep = kpLoc_.step / kpLoc_.elemSize(); int sStep = score_.step / score_.elemSize(); int kStep = keypoints.step / keypoints.elemSize(); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&count_)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&sStep)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&kStep)); openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL)); openCLSafeCall(clReleaseMemObject(counterCL)); return counter; }
cl_mem bindTexture(const oclMat &mat) { cl_mem texture; cl_image_format format; int err; int depth = mat.depth(); int channels = mat.oclchannels(); switch(depth) { case CV_8U: format.image_channel_data_type = CL_UNSIGNED_INT8; break; case CV_32S: format.image_channel_data_type = CL_UNSIGNED_INT32; break; case CV_32F: format.image_channel_data_type = CL_FLOAT; break; default: CV_Error(-1, "Image forma is not supported"); break; } switch(channels) { case 1: format.image_channel_order = CL_R; break; case 3: format.image_channel_order = CL_RGB; break; case 4: format.image_channel_order = CL_RGBA; break; default: CV_Error(-1, "Image format is not supported"); break; } #ifdef CL_VERSION_1_2 //this enables backwards portability to //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support if(Context::getContext()->supportsFeature(Context::CL_VER_1_2)) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = mat.cols; desc.image_height = mat.rows; desc.image_depth = 0; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); } else #endif { texture = clCreateImage2D( (cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, mat.cols, mat.rows, 0, NULL, &err); } size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.cols, mat.rows, 1 }; cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); } else { devData = (cl_mem)mat.data; } clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); clReleaseMemObject(devData); } openCLSafeCall(err); return texture; }
Moments ocl_moments(oclMat& src, bool binary) //for image { CV_Assert(src.oclchannels() == 1); if(src.type() == CV_64FC1 && !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!"); } if(binary) { oclMat mask; if(src.type() != CV_8UC1) { src.convertTo(mask, CV_8UC1); } oclMat src8u(src.size(), CV_8UC1); src8u.setTo(Scalar(255), mask); src = src8u; } const int TILE_SIZE = 256; CvMoments mom; memset(&mom, 0, sizeof(mom)); cv::Size size = src.size(); int blockx, blocky; blockx = (size.width + TILE_SIZE - 1)/TILE_SIZE; blocky = (size.height + TILE_SIZE - 1)/TILE_SIZE; oclMat dst_m; int tile_height = TILE_SIZE; size_t localThreads[3] = {1, tile_height, 1}; size_t globalThreads[3] = {blockx, size.height, 1}; if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { dst_m.create(blocky * 10, blockx, CV_64FC1); }else { dst_m.create(blocky * 10, blockx, CV_32FC1); } int src_step = (int)(src.step/src.elemSize()); int dstm_step = (int)(dst_m.step/dst_m.elemSize()); std::vector<std::pair<size_t , const void *> > args,args_sum; args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstm_step )); int binary_; if(binary) binary_ = 1; else binary_ = 0; args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary_)); char builOption[128]; if(binary || src.type() == CV_8UC1) { snprintf(builOption, 128, "-D CV_8UC1"); }else if(src.type() == CV_16UC1) { snprintf(builOption, 128, "-D CV_16UC1"); }else if(src.type() == CV_16SC1) { snprintf(builOption, 128, "-D CV_16SC1"); }else if(src.type() == CV_32FC1) { snprintf(builOption, 128, "-D CV_32FC1"); }else if(src.type() == CV_64FC1) { snprintf(builOption, 128, "-D CV_64FC1"); }else { CV_Error( CV_StsUnsupportedFormat, "" ); } openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, -1, builOption); Mat tmp(dst_m); tmp.convertTo(tmp, CV_64FC1); double tmp_m[10] = {0}; for(int j = 0; j < tmp.rows; j += 10) { for(int i = 0; i < tmp.cols; i++) { tmp_m[0] += tmp.at<double>(j, i); tmp_m[1] += tmp.at<double>(j + 1, i); tmp_m[2] += tmp.at<double>(j + 2, i); tmp_m[3] += tmp.at<double>(j + 3, i); tmp_m[4] += tmp.at<double>(j + 4, i); tmp_m[5] += tmp.at<double>(j + 5, i); tmp_m[6] += tmp.at<double>(j + 6, i); tmp_m[7] += tmp.at<double>(j + 7, i); tmp_m[8] += tmp.at<double>(j + 8, i); tmp_m[9] += tmp.at<double>(j + 9, i); } } mom.m00 = tmp_m[0]; mom.m10 = tmp_m[1]; mom.m01 = tmp_m[2]; mom.m20 = tmp_m[3]; mom.m11 = tmp_m[4]; mom.m02 = tmp_m[5]; mom.m30 = tmp_m[6]; mom.m21 = tmp_m[7]; mom.m12 = tmp_m[8]; mom.m03 = tmp_m[9]; icvCompleteMomentState( &mom ); return mom; }
void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho) { Context* clCxt = I0.clCxt; const bool isImgSupported = support_image2d(clCxt); CV_Assert(isImgSupported); int u1ElementSize = u1.elemSize(); int u1Step = u1.step/u1ElementSize; int u2ElementSize = u2.elemSize(); int u2Step = u2.step/u2ElementSize; int I0ElementSize = I0.elemSize(); int I0Step = I0.step/I0ElementSize; int I1w_element_size = I1w.elemSize(); int I1w_step = I1w.step/I1w_element_size; int u1_offset_y = u1.offset/u1.step; int u1_offset_x = u1.offset%u1.step; u1_offset_x = u1_offset_x/u1.elemSize(); int u2_offset_y = u2.offset/u2.step; int u2_offset_x = u2.offset%u2.step; u2_offset_x = u2_offset_x/u2.elemSize(); size_t localThread[] = {32, 8, 1}; size_t globalThread[] = { I0.cols, I0.rows, 1 }; cl_mem I1_tex; cl_mem I1x_tex; cl_mem I1y_tex; I1_tex = bindTexture(I1); I1x_tex = bindTexture(I1x); I1y_tex = bindTexture(I1y); String kernelName = "warpBackwardKernel"; vector< pair<size_t, const void *> > args; args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step)); args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols)); args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows)); args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex)); args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex)); args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex)); args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step)); args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data)); args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data)); args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); releaseTexture(I1_tex); releaseTexture(I1x_tex); releaseTexture(I1y_tex); }
static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters) { Context *clCxt = I.clCxt; bool isImageSupported = clCxt->impl->devName.find("Intel(R) HD Graphics") == std::string::npos; int elemCntPerRow = I.step / I.elemSize(); std::string kernelName = "lkDense"; size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { I.cols, I.rows, 1}; bool calcErr; if (err) { calcErr = true; } else { calcErr = false; } cl_mem ITex; cl_mem JTex; if (isImageSupported) { ITex = bindTexture(I); JTex = bindTexture(J); } else { ITex = (cl_mem)I.data; JTex = (cl_mem)J.data; } //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2}; //const int patchWidth = 16 + 2 * halfWin.x; //const int patchHeight = 16 + 2 * halfWin.y; //size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int); std::vector<std::pair<size_t , const void *> > args; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&u.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&u.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&v.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&v.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevU.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevU.step )); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevV.data )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevV.step )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols )); //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&(*err).data )); //args.push_back( std::make_pair( sizeof(cl_int), (void *)&(*err).step )); if (!isImageSupported) { args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) ); } args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr )); if (isImageSupported) { openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); releaseTexture(ITex); releaseTexture(JTex); } else { //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n"); openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); } }