static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
    std::vector<std::pair<size_t , const void *> > args;
    size_t localThreads[3] = { 16, 16, 1 };
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    const char channelMap[] = { ' ', ' ', '2', '4', '4' };
    std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);

    oclMat m(Mat(1, 1, dst.type(), scalar));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&m.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset ));
    openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
                        localThreads, args, -1, -1, buildOptions.c_str());
}
void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
    vector<pair<size_t , const void *> > args;
    cl_float4 val;
    val.s[0] = scalar.val[0];
    val.s[1] = scalar.val[1];
    val.s[2] = scalar.val[2];
    val.s[3] = scalar.val[3];
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    if(dst.type() == CV_8UC1)
    {
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    }
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
    args.push_back( make_pair( sizeof(cl_float4) , (void *)&val ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
    openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
                        localThreads, args, dst.channels(), dst.depth());
}
示例#3
0
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta)
{
    std::string kernelName = "convert_to_S";
    std::stringstream idxStr;
    idxStr << src.depth();
    kernelName += idxStr.str();
    float alpha_f = (float)alpha, beta_f = (float)beta;
    CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
    std::vector<std::pair<size_t , const void *> > args;
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
    if(dst.type() == CV_8UC1)
    {
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
    }
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));
    openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
                         localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH);
}
示例#4
0
void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
    oclMat &rho_c, oclMat &p11, oclMat &p12,
    oclMat &p21, oclMat &p22, oclMat &u1,
    oclMat &u2, oclMat &error, float l_t, float theta, char calc_error)
{
    Context* clCxt = I1wx.clCxt;

    size_t localThread[] = {32, 8, 1};
    size_t globalThread[] =
    {
        I1wx.cols,
        I1wx.rows,
        1
    };

    int I1wx_element_size = I1wx.elemSize();
    int I1wx_step = I1wx.step/I1wx_element_size;

    int u1_element_size = u1.elemSize();
    int u1_step = u1.step/u1_element_size;

    int u2_element_size = u2.elemSize();
    int u2_step = u2.step/u2_element_size;

    int u1_offset_y = u1.offset/u1.step;
    int u1_offset_x = u1.offset%u1.step;
    u1_offset_x = u1_offset_x/u1.elemSize();

    int u2_offset_y = u2.offset/u2.step;
    int u2_offset_x = u2.offset%u2.step;
    u2_offset_x = u2_offset_x/u2.elemSize();

    String kernelName = "estimateUKernel";
    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols));
    args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows));
    args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data));
    args.push_back( make_pair( sizeof(cl_float), (void*)&l_t));
    args.push_back( make_pair( sizeof(cl_float), (void*)&theta));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
    args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error));

    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
}
示例#5
0
static cl_mem bindTexture(const oclMat &mat, int depth, int channels)
{
    cl_mem texture;
    cl_image_format format;
    int err;
    if(depth == 0)
    {
        format.image_channel_data_type = CL_UNSIGNED_INT8;
    }
    else if(depth == 5)
    {
        format.image_channel_data_type = CL_FLOAT;
    }
    if(channels == 1)
    {
        format.image_channel_order     = CL_R;
    }
    else if(channels == 3)
    {
        format.image_channel_order     = CL_RGB;
    }
    else if(channels == 4)
    {
        format.image_channel_order     = CL_RGBA;
    }
#ifdef CL_VERSION_1_2
    cl_image_desc desc;
    desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
    desc.image_width      = mat.step / mat.elemSize();
    desc.image_height     = mat.rows;
    desc.image_depth      = NULL;
    desc.image_array_size = 1;
    desc.image_row_pitch  = 0;
    desc.image_slice_pitch = 0;
    desc.buffer           = NULL;
    desc.num_mip_levels   = 0;
    desc.num_samples      = 0;
    texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
    texture = clCreateImage2D(
                  mat.clCxt->impl->clContext,
                  CL_MEM_READ_WRITE,
                  &format,
                  mat.step / mat.elemSize(),
                  mat.rows,
                  0,
                  NULL,
                  &err);
#endif
    size_t origin[] = { 0, 0, 0 };
    size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
    clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
    openCLSafeCall(err);

    return texture;
}
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName)
{
    std::vector<std::pair<size_t , const void *> > args;

    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();

    if (dst.type() == CV_8UC1)
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    const char channelMap[] = { ' ', ' ', '2', '4', '4' };
    std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);

    Mat mat(1, 1, dst.type(), scalar);

#ifdef CL_VERSION_1_2
    // this enables backwards portability to
    // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
    if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
        dst.offset == 0 && dst.cols == dst.wholecols)
    {
        const int sizeofMap[][7] =
            {
                { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double)  },
                { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) },
                { 0                , 0               , 0                 , 0                , 0              , 0                ,  0                 },
                { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) },
            };
        int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];

        clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
                            (cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
                            0, dst.step * dst.rows, 0, NULL, NULL);
    }
    else
#endif
    {
        oclMat m(mat);
        args.push_back( std::make_pair( sizeof(cl_mem) , (void*)&m.data ));
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));

        openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
            localThreads, args, -1, -1, buildOptions.c_str());
    }
}
示例#7
0
void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut)
{
    Context *clCxt = u1.clCxt;

    size_t localThread[] = {32, 8, 1};
    size_t globalThread[] =
    {
        u1.cols,
        u1.rows,
        1
    };

    int u1_element_size = u1.elemSize();
    int u1_step = u1.step/u1_element_size;

    int u2_element_size = u2.elemSize();
    int u2_step = u2.step/u2_element_size;

    int p11_element_size = p11.elemSize();
    int p11_step = p11.step/p11_element_size;

    int u1_offset_y = u1.offset/u1.step;
    int u1_offset_x = u1.offset%u1.step;
    u1_offset_x = u1_offset_x/u1.elemSize();

    int u2_offset_y = u2.offset/u2.step;
    int u2_offset_x = u2.offset%u2.step;
    u2_offset_x = u2_offset_x/u2.elemSize();

    String kernelName = "estimateDualVariablesKernel";
    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data));
    args.push_back( make_pair( sizeof(cl_float), (void*)&taut));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));

    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
}
示例#8
0
文件: svm.cpp 项目: Niyikiza/opencv
static void matmul_poly(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1, double degree1, bool flag)
{
    Context *clCxt = Context::getContext();
    String kernelName = "svm_poly";
    int src_step = (int)src.step / src.elemSize();
    int src2_step = (int)src2.step / src2.elemSize();
    int dst_step = (int)dst.step / dst.elemSize();
    int x = MIN(16, src_rows);
    int y = MIN(16, src2_cols);
    size_t localThreads[] = {x, y, 1};
    size_t globalThreads[] = {src2_cols, src_rows, 1};
    int width = var_count;

    char build_options[50];

    if(flag)
    {
        sprintf(build_options, "-D ADDPOW");
    }
    std::vector< std::pair<size_t, const void *> > args;
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src2.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));

    float alpha = 0.0f, beta = 0.0f, degree = 0.0f;
    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
    {
        alpha = (float)alpha1;
        beta = (float)beta1;
        degree = (float)degree1;
        args.push_back(std::make_pair(sizeof(cl_float), (void* )&alpha));
        args.push_back(std::make_pair(sizeof(cl_float), (void* )&beta));
        args.push_back(std::make_pair(sizeof(cl_float), (void* )&degree));
    }
    else
    {
        args.push_back(std::make_pair(sizeof(cl_double), (void* )&alpha1));
        args.push_back(std::make_pair(sizeof(cl_double), (void* )&beta1));
        args.push_back(std::make_pair(sizeof(cl_double), (void* )&degree1));
    }
    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
示例#9
0
文件: kmeans.cpp 项目: ChrisWC/opencv
void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers)
{
    //if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
    //{
    //    CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
    //    return;
    //}

    Context  *clCxt = src.clCxt;
    int labels_step = (int)(labels.step/labels.elemSize());
    String kernelname = "distanceToCenters";
    int threadNum = src.rows > 256 ? 256 : src.rows;
    size_t localThreads[3]  = {1, threadNum, 1};
    size_t globalThreads[3] = {1, src.rows, 1};

    std::vector<std::pair<size_t, const void *> > args;
    args.push_back(std::make_pair(sizeof(cl_int), (void *)&labels_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void *)&centers.rows));
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&labels.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void *)&centers.cols));
    args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&centers.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dists.data));

    openCLExecuteKernel(clCxt, &kmeans_kernel, kernelname, globalThreads, localThreads, args, -1, -1, NULL);
}
示例#10
0
static void copyTo(const oclMat &src, oclMat &m )
{
    CV_DbgAssert(!src.empty());
    m.create(src.size(), src.type());
    openCLCopyBuffer2D(src.clCxt, m.data, m.step, m.offset,
                       src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset);
}
示例#11
0
void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
{
    Context* clCxt = Context::getContext();

    size_t local_thread[] = {32, 8, 1};
    size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};

    int weight_step = (int)(weight.step/weight.elemSize());
    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
    int mean_step = (int)(mean.step/mean.elemSize());
    int dst_step = (int)(dst.step/dst.elemSize());

    int dst_y = (int)(dst.offset/dst.step);
    int dst_x = (int)(dst.offset%dst.step);
    dst_x = dst_x/(int)dst.elemSize();

    String kernel_name = "getBackgroundImage2_kernel";
    std::vector<std::pair<size_t, const void*> > args;

    char build_option[50];
    if(cn == 1)
    {
        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
    }else
    {
        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
    }

    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
    args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y));

    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
}
示例#12
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    int elemCntPerRow = I.step / I.elemSize();
    std::string kernelName = "lkSparse";
    size_t localThreads[3]  = { 8, 8, 1 };
    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
    int cn = I.oclchannels();
    char calcErr;
    if (level == 0)
    {
        calcErr = 1;
    }
    else
    {
        calcErr = 0;
    }

    std::vector<std::pair<size_t , const void *> > args;
    cl_mem ITex = bindTexture(I);
    cl_mem JTex = bindTexture(J);

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    try
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
    catch(Exception&)
    {
        printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
        releaseTexture(ITex);
        releaseTexture(JTex);
        ITex = (cl_mem)I.data;
        JTex = (cl_mem)J.data;
        localThreads[1] = globalThreads[1] = 32;
        args.insert( args.begin()+11, std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
}
示例#13
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    int elemCntPerRow = I.step / I.elemSize();
    String kernelName = "lkSparse";
    bool isImageSupported = support_image2d();
    size_t localThreads[3]  = { 8, isImageSupported ? 8 : 32, 1 };
    size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
    int cn = I.oclchannels();
    char calcErr;
    if (level == 0)
    {
        calcErr = 1;
    }
    else
    {
        calcErr = 0;
    }

    std::vector<std::pair<size_t , const void *> > args;

    cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data;
    cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    if (!isImageSupported)
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    if(isImageSupported)
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
    {
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
}
示例#14
0
static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
    int nmixtures, float varThreshold, float backgroundRatio)
{
    Context* clCxt = Context::getContext();

    size_t local_thread[] = {32, 8, 1};
    size_t global_thread[] = {frame.cols, frame.rows, 1};

    int frame_step = (int)(frame.step/frame.elemSize());
    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
    int weight_step = (int)(weight.step/weight.elemSize());
    int mean_step = (int)(mean.step/mean.elemSize());
    int var_step = (int)(var.step/var.elemSize());

    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();

    int frame_offset_y = (int)(frame.offset/frame.step);
    int frame_offset_x = (int)(frame.offset%frame.step);
    frame_offset_x = frame_offset_x/(int)frame.elemSize();

    char build_option[50];
    if(cn == 1)
    {
        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
    }else
    {
        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
    }

    String kernel_name = "mog_withoutLearning_kernel";
    std::vector<std::pair<size_t, const void*> > args;

    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));

    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));

    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
}
示例#15
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    int elemCntPerRow = I.step / I.elemSize();
    String kernelName = "lkSparse";
    bool isImageSupported = support_image2d();
    size_t localThreads[3]  = { 8, isImageSupported ? 8 : 32, 1 };
    size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
    int cn = I.oclchannels();
    char calcErr = level==0?1:0;

    std::vector<std::pair<size_t , const void *> > args;

    cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data;
    cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    if (!isImageSupported)
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    if(isImageSupported)
    {
        std::stringstream idxStr;
        idxStr << kernelName.c_str() << "_C" << I.oclchannels() << "_D" << I.depth();
        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str().c_str());
        int wave_size = (int)queryWaveFrontSize(kernel);
        openCLSafeCall(clReleaseKernel(kernel));

        static char opt[32] = {0};
        sprintf(opt, " -D WAVE_SIZE=%d", wave_size);

        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt, CLFLUSH);
        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
}
示例#16
0
void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
                                 float scale, oclMat &xmap, oclMat &ymap)
{
    CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
    CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
    CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous());

    Mat K_Rinv = K * R.t();
    CV_Assert(K_Rinv.isContinuous());

    Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3
    KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1);
    KRT_mat(Range::all(), Range(9, 11)) = T;

    oclMat KRT_oclMat(KRT_mat);
    // transfer K_Rinv and T into a single cl_mem
    xmap.create(dst_roi.size(), CV_32F);
    ymap.create(dst_roi.size(), CV_32F);

    int tl_u = dst_roi.tl().x;
    int tl_v = dst_roi.tl().y;

    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));

    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
    size_t localThreads[3]  = { 32, 8, 1 };

    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1);
}
示例#17
0
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
                  src.rows == dst.rows && src.cols == dst.cols);

    vector<pair<size_t , const void *> > args;

    int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
        {2, 2, 1, 1, 1, 1, 1},
        {8, 8, 8, 8 , 4, 4, 4},      //vector length is undefined when channels = 3
        {1, 1, 1, 1, 1, 1, 1}
    };

    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];

    int vector_length = vector_lengths[dst.channels() -1][dst.depth()];
    int offset_cols = divUp(dst.offset, dst.elemSize()) & (vector_length - 1);
    int cols = vector_length == 1 ? divUp(dst.cols, vector_length) : divUp(dst.cols + offset_cols, vector_length);

    globalThreads[0] = divUp(cols, localThreads[0]) * localThreads[0];
    globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
    globalThreads[2] = 1;

    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();

    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));

    openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
                        localThreads, args, dst.channels(), dst.depth());
}
示例#18
0
文件: svm.cpp 项目: Niyikiza/opencv
static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, int src2_cols, int var_count, double gamma1, bool flag)
{

    Context *clCxt = Context::getContext();

    String kernelName = "svm_rbf";

    int width = var_count;
    int src_step = (int)src.step / src.elemSize();
    int src_e_step = (int)src_e.step / src_e.elemSize();
    int dst_step = (int)dst.step / dst.elemSize();

    int x = MIN(16, src_rows);
    int y = MIN(16, src2_cols);
    size_t localThreads[] = {x, y, 1};
    size_t globalThreads[] = {src2_cols,  src_rows, 1};
    char build_options[50];

    if(flag)
        sprintf(build_options, "-D ADDEXP");

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src_e.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_e_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));
    float gamma = 0.0f;
    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
    {
        gamma = (float)gamma1;
        args.push_back(std::make_pair(sizeof(cl_float), (void* )&gamma));
    }
    else
        args.push_back(std::make_pair(sizeof(cl_double), (void* )&gamma1));

    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
示例#19
0
void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
{
    CV_Assert(M.rows == 3 && M.cols == 3);
    CV_Assert(dsize.area() > 0);

    xmap.create(dsize, CV_32FC1);
    ymap.create(dsize, CV_32FC1);

    float coeffs[3 * 3];
    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);

    if (inverse)
        M.convertTo(coeffsMat, coeffsMat.type());
    else
    {
        cv::Mat iM;
        invert(M, iM);
        iM.convertTo(coeffsMat, coeffsMat.type());
    }

    oclMat coeffsOclMat(coeffsMat.reshape(1, 1));

    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));

    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };

    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1);
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
                  src.rows == dst.rows && src.cols == dst.cols
                  && mask.type() == CV_8UC1);

    vector<pair<size_t , const void *> > args;

    std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
        {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
        {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
        {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
    };
    char compile_option[32];
    sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];

    globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0];
    globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
    globalThreads[2] = 1;

    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();

    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));

    openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
                        localThreads, args, -1, -1, compile_option);
}
示例#21
0
// FIXME:
// This function cannot sort arrays with duplicated keys
static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
{
    CV_Error(-1, "This function is incorrect at the moment.");
    Context * cxt = Context::getContext();

    size_t globalThreads[3] = {vecSize, 1, 1};

    std::vector< std::pair<size_t, const void *> > args;
    char build_opt_buf [100];
    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);

    //local
    String kernelname = "selectionSortLocal";
#ifdef ANDROID
    int lds_size = cxt->getDeviceInfo().maxWorkGroupSize * keys.elemSize();
#else
    int lds_size = GROUP_SIZE * keys.elemSize();
#endif
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
    args.push_back(std::make_pair(lds_size,       (void*)NULL));

#ifdef ANDROID
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
#else
    size_t localThreads[3] = {GROUP_SIZE, 1, 1};
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
#endif

    //final
    kernelname = "selectionSortFinal";
    args.pop_back();
#ifdef ANDROID
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
#else
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
#endif
}
示例#22
0
void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
                                       oclMat &xmap, oclMat &ymap)
{
    CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
    CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);

    Mat K_Rinv = K * R.t();
    CV_Assert(K_Rinv.isContinuous());

    oclMat KR_oclMat(K_Rinv.reshape(1, 1));

    xmap.create(dst_roi.size(), CV_32F);
    ymap.create(dst_roi.size(), CV_32F);

    int tl_u = dst_roi.tl().x;
    int tl_v = dst_roi.tl().y;

    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&xmap.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ymap.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
    args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));

    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
    size_t localThreads[3]  = { 32, 8, 1 };

    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1);
}
示例#23
0
void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
{
    Context* clCxt = Context::getContext();

    size_t local_thread[] = {32, 8, 1};
    size_t global_thread[] = {(size_t)dst.cols, (size_t)dst.rows, 1};

    int weight_step = (int)(weight.step/weight.elemSize());
    int mean_step = (int)(mean.step/mean.elemSize());
    int dst_step = (int)(dst.step/dst.elemSize());

    char build_option[50];
    if(cn == 1)
    {
        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
    }else
    {
        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
    }

    String kernel_name = "getBackgroundImage_kernel";
    vector< pair<size_t, const void*> > args;

    args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data));
    args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data));
    args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data));

    args.push_back(make_pair(sizeof(cl_int), (void*)&dst.rows));
    args.push_back(make_pair(sizeof(cl_int), (void*)&dst.cols));

    args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step));
    args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step));
    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));

    args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio));

    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
}
示例#24
0
void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2,
                          oclMat &dst)
{
    CV_Assert(src1.depth() <= CV_32F);
    CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
    CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() &&
              weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);

    dst.create(src1.size(), src1.type());

    size_t globalSize[] = { (size_t)dst.cols, (size_t)dst.rows, 1};
    size_t localSize[] = { 16, 16, 1 };

    int depth = dst.depth(), ocn = dst.oclchannels();
    int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
    int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
    int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize();
    int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize();
    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();

    const char * const channelMap[] = { "", "", "2", "4", "4" };
    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s",
                                      typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn],
                                      depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]);

    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));

    openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args,
                        -1, -1, buildOptions.c_str());
}
示例#25
0
void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy)
{
    Context  *clCxt = src.clCxt;
    size_t localThreads[3] = {32, 8, 1};
    size_t globalThreads[3] = {src.cols, src.rows, 1};

    int srcElementSize = src.elemSize();
    int src_step = src.step/srcElementSize;

    int dElememntSize = dx.elemSize();
    int dx_step = dx.step/dElememntSize;

    String kernelName = "centeredGradientKernel";
    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols));
    args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows));
    args.push_back( make_pair( sizeof(cl_int), (void*)&src_step));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step));
    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1);

}
示例#26
0
int cv::ocl::FAST_OCL::nonmaxSupressionOCL(oclMat& keypoints)
{
    size_t localThreads[3] = {256, 1, 1};
    size_t globalThreads[3] = {count_, 1, 1};

    Context *clCxt = Context::getContext();
    String kernelName = "nonmaxSupression";
    std::vector< std::pair<size_t, const void *> > args;

    int counter = 0;
    int err = CL_SUCCESS;
    cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(),
                                      CL_MEM_COPY_HOST_PTR, sizeof(int),
                                      &counter, &err);

    int kpLocStep = kpLoc_.step / kpLoc_.elemSize();
    int sStep = score_.step / score_.elemSize();
    int kStep = keypoints.step / keypoints.elemSize();

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&count_));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&sStep));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&kStep));

    openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1);

    openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(),
                                       counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL));
    openCLSafeCall(clReleaseMemObject(counterCL));

    return counter;
}
示例#27
0
        cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
            cl_image_format format;
            int err;
            int depth    = mat.depth();
            int channels = mat.oclchannels();

            switch(depth)
            {
            case CV_8U:
                format.image_channel_data_type = CL_UNSIGNED_INT8;
                break;
            case CV_32S:
                format.image_channel_data_type = CL_UNSIGNED_INT32;
                break;
            case CV_32F:
                format.image_channel_data_type = CL_FLOAT;
                break;
            default:
                CV_Error(-1, "Image forma is not supported");
                break;
            }
            switch(channels)
            {
            case 1:
                format.image_channel_order     = CL_R;
                break;
            case 3:
                format.image_channel_order     = CL_RGB;
                break;
            case 4:
                format.image_channel_order     = CL_RGBA;
                break;
            default:
                CV_Error(-1, "Image format is not supported");
                break;
            }
#ifdef CL_VERSION_1_2
            //this enables backwards portability to
            //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
            if(Context::getContext()->supportsFeature(Context::CL_VER_1_2))
            {
                cl_image_desc desc;
                desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
                desc.image_width      = mat.cols;
                desc.image_height     = mat.rows;
                desc.image_depth      = 0;
                desc.image_array_size = 1;
                desc.image_row_pitch  = 0;
                desc.image_slice_pitch = 0;
                desc.buffer           = NULL;
                desc.num_mip_levels   = 0;
                desc.num_samples      = 0;
                texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
            }
            else
#endif
            {
                texture = clCreateImage2D(
                    (cl_context)mat.clCxt->oclContext(),
                    CL_MEM_READ_WRITE,
                    &format,
                    mat.cols,
                    mat.rows,
                    0,
                    NULL,
                    &err);
            }
            size_t origin[] = { 0, 0, 0 };
            size_t region[] = { mat.cols, mat.rows, 1 };

            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
                devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
                clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
            }
            else
            {
                devData = (cl_mem)mat.data;
            }

            clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
                clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
                clReleaseMemObject(devData);
            }

            openCLSafeCall(err);
            return texture;
        }
示例#28
0
        Moments ocl_moments(oclMat& src, bool binary) //for image
        {
            CV_Assert(src.oclchannels() == 1);
            if(src.type() == CV_64FC1 && !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
            {
                CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
            }

            if(binary)
            {
                oclMat mask;
                if(src.type() != CV_8UC1)
                {
                    src.convertTo(mask, CV_8UC1);
                }
                oclMat src8u(src.size(), CV_8UC1);
                src8u.setTo(Scalar(255), mask);
                src = src8u;
            }
            const int TILE_SIZE = 256;

            CvMoments mom;
            memset(&mom, 0, sizeof(mom));

            cv::Size size = src.size();
            int blockx, blocky;
            blockx = (size.width + TILE_SIZE - 1)/TILE_SIZE;
            blocky = (size.height + TILE_SIZE - 1)/TILE_SIZE;

            oclMat dst_m;
            int tile_height = TILE_SIZE;

            size_t localThreads[3]  = {1, tile_height, 1};
            size_t globalThreads[3] = {blockx, size.height, 1};

            if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
            {
                dst_m.create(blocky * 10, blockx, CV_64FC1);
            }else
            {
                dst_m.create(blocky * 10, blockx, CV_32FC1);
            }

            int src_step = (int)(src.step/src.elemSize());
            int dstm_step = (int)(dst_m.step/dst_m.elemSize());

            std::vector<std::pair<size_t , const void *> > args,args_sum;
            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step ));
            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstm_step ));

            int binary_;
            if(binary)
                binary_ = 1;
            else
                binary_ = 0;
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary_));

            char builOption[128];
            if(binary || src.type() == CV_8UC1)
            {
                snprintf(builOption, 128, "-D CV_8UC1");
            }else if(src.type() == CV_16UC1)
            {
                snprintf(builOption, 128, "-D CV_16UC1");
            }else if(src.type() == CV_16SC1)
            {
                snprintf(builOption, 128, "-D CV_16SC1");
            }else if(src.type() == CV_32FC1)
            {
                snprintf(builOption, 128, "-D CV_32FC1");
            }else if(src.type() == CV_64FC1)
            {
                snprintf(builOption, 128, "-D CV_64FC1");
            }else
            {
                CV_Error( CV_StsUnsupportedFormat, "" );
            }

            openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, -1, builOption);

            Mat tmp(dst_m);
            tmp.convertTo(tmp, CV_64FC1);

            double tmp_m[10] = {0};

            for(int j = 0; j < tmp.rows; j += 10)
            {
                for(int i = 0; i < tmp.cols; i++)
                {
                    tmp_m[0] += tmp.at<double>(j, i);
                    tmp_m[1] += tmp.at<double>(j + 1, i);
                    tmp_m[2] += tmp.at<double>(j + 2, i);
                    tmp_m[3] += tmp.at<double>(j + 3, i);
                    tmp_m[4] += tmp.at<double>(j + 4, i);
                    tmp_m[5] += tmp.at<double>(j + 5, i);
                    tmp_m[6] += tmp.at<double>(j + 6, i);
                    tmp_m[7] += tmp.at<double>(j + 7, i);
                    tmp_m[8] += tmp.at<double>(j + 8, i);
                    tmp_m[9] += tmp.at<double>(j + 9, i);
                }
            }

            mom.m00 = tmp_m[0];
            mom.m10 = tmp_m[1];
            mom.m01 = tmp_m[2];
            mom.m20 = tmp_m[3];
            mom.m11 = tmp_m[4];
            mom.m02 = tmp_m[5];
            mom.m30 = tmp_m[6];
            mom.m21 = tmp_m[7];
            mom.m12 = tmp_m[8];
            mom.m03 = tmp_m[9];
            icvCompleteMomentState( &mom );
            return mom;
        }
示例#29
0
void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho)
{
    Context* clCxt = I0.clCxt;
    const bool isImgSupported = support_image2d(clCxt);

    CV_Assert(isImgSupported);

    int u1ElementSize = u1.elemSize();
    int u1Step = u1.step/u1ElementSize;

    int u2ElementSize = u2.elemSize();
    int u2Step = u2.step/u2ElementSize;

    int I0ElementSize = I0.elemSize();
    int I0Step = I0.step/I0ElementSize;

    int I1w_element_size = I1w.elemSize();
    int I1w_step = I1w.step/I1w_element_size;

    int u1_offset_y = u1.offset/u1.step;
    int u1_offset_x = u1.offset%u1.step;
    u1_offset_x = u1_offset_x/u1.elemSize();

    int u2_offset_y = u2.offset/u2.step;
    int u2_offset_x = u2.offset%u2.step;
    u2_offset_x = u2_offset_x/u2.elemSize();

    size_t localThread[] = {32, 8, 1};
    size_t globalThread[] =
    {
        I0.cols,
        I0.rows,
        1
    };

    cl_mem I1_tex;
    cl_mem I1x_tex;
    cl_mem I1y_tex;
    I1_tex = bindTexture(I1);
    I1x_tex = bindTexture(I1x);
    I1y_tex = bindTexture(I1y);

    String kernelName = "warpBackwardKernel";
    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step));
    args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols));
    args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data));
    args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data));
    args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));

    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);

    releaseTexture(I1_tex);
    releaseTexture(I1x_tex);
    releaseTexture(I1y_tex);
}
示例#30
0
static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
                 oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    bool isImageSupported = clCxt->impl->devName.find("Intel(R) HD Graphics") == std::string::npos;
    int elemCntPerRow = I.step / I.elemSize();

    std::string kernelName = "lkDense";

    size_t localThreads[3]  = { 16, 16, 1 };
    size_t globalThreads[3] = { I.cols, I.rows, 1};

    bool calcErr;
    if (err)
    {
        calcErr = true;
    }
    else
    {
        calcErr = false;
    }

    cl_mem ITex;
    cl_mem JTex;

    if (isImageSupported)
    {
        ITex = bindTexture(I);
        JTex = bindTexture(J);
    }
    else
    {
        ITex = (cl_mem)I.data;
        JTex = (cl_mem)J.data;
    }

    //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
    //const int patchWidth  = 16 + 2 * halfWin.x;
    //const int patchHeight = 16 + 2 * halfWin.y;
    //size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);

    std::vector<std::pair<size_t , const void *> > args;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&u.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&u.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&v.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&v.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevU.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevU.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevV.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevV.step ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&(*err).data ));
    //args.push_back( std::make_pair( sizeof(cl_int), (void *)&(*err).step ));
    if (!isImageSupported)
    {
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    }
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    if (isImageSupported)
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);

        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
    {
        //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
}