示例#1
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    int elemCntPerRow = I.step / I.elemSize();
    String kernelName = "lkSparse";
    bool isImageSupported = support_image2d();
    size_t localThreads[3]  = { 8, isImageSupported ? 8 : 32, 1 };
    size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
    int cn = I.oclchannels();
    char calcErr = level==0?1:0;

    std::vector<std::pair<size_t , const void *> > args;

    cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data;
    cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    if (!isImageSupported)
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    if(isImageSupported)
    {
        std::stringstream idxStr;
        idxStr << kernelName.c_str() << "_C" << I.oclchannels() << "_D" << I.depth();
        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str().c_str());
        int wave_size = (int)queryWaveFrontSize(kernel);
        openCLSafeCall(clReleaseKernel(kernel));

        static char opt[32] = {0};
        sprintf(opt, " -D WAVE_SIZE=%d", wave_size);

        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt, CLFLUSH);
        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
}
static void convert_C3C4(const cl_mem &src, oclMat &dst)
{
    Context *clCxt = dst.clCxt;
    int pixel_end = dst.wholecols * dst.wholerows - 1;
    int dstStep_in_pixel = dst.step1() / dst.oclchannels();

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[dst.depth()]);

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholecols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholerows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = { divUp(dst.wholecols * dst.wholerows, 4), 1, 1 };

#ifdef ANDROID
    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, NULL,
                        args, -1, -1, buildOptions.c_str());
#else
    size_t localThreads[3] = { 256, 1, 1 };
    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, localThreads,
                        args, -1, -1, buildOptions.c_str());
#endif
}
示例#3
0
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta)
{
    std::string kernelName = "convert_to_S";
    std::stringstream idxStr;
    idxStr << src.depth();
    kernelName += idxStr.str();
    float alpha_f = (float)alpha, beta_f = (float)beta;
    CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
    std::vector<std::pair<size_t , const void *> > args;
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
    if(dst.type() == CV_8UC1)
    {
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
    }
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));
    openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
                         localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH);
}
示例#4
0
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
{
    //cout << "cv::ocl::oclMat::convertTo()" << endl;

    bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
                   && fabs(beta) < std::numeric_limits<double>::epsilon();

    if( rtype < 0 )
        rtype = src.type();
    else
        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels());

    int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
    if( sdepth == ddepth && noScale )
    {
        src.copyTo(dst);
        return;
    }

    oclMat temp;
    const oclMat *psrc = &src;
    if( sdepth != ddepth && psrc == &dst )
        psrc = &(temp = src);

    dst.create( src.size(), rtype );
    convert_run_cus(*psrc, dst, alpha, beta);
}
示例#5
0
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
                          oclMat &result)
{
    cv::ocl::Context *ctx = img1.clCxt;
    assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
    int channels = img1.oclchannels();
    int depth = img1.depth();
    int rows = img1.rows;
    int cols = img1.cols;
    int istep = img1.step1();
    int wstep = weights1.step1();
    size_t globalSize[] = {cols * channels / 4, rows, 1};
    size_t localSize[] = {256, 1, 1};

    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
        std::string kernelName = "BlendLinear";

        openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
    }
}
示例#6
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    int elemCntPerRow = I.step / I.elemSize();
    std::string kernelName = "lkSparse";
    size_t localThreads[3]  = { 8, 8, 1 };
    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
    int cn = I.oclchannels();
    char calcErr;
    if (level == 0)
    {
        calcErr = 1;
    }
    else
    {
        calcErr = 0;
    }

    std::vector<std::pair<size_t , const void *> > args;
    cl_mem ITex = bindTexture(I);
    cl_mem JTex = bindTexture(J);

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    try
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
    catch(Exception&)
    {
        printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
        releaseTexture(ITex);
        releaseTexture(JTex);
        ITex = (cl_mem)I.data;
        JTex = (cl_mem)J.data;
        localThreads[1] = globalThreads[1] = 32;
        args.insert( args.begin()+11, std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
}
示例#7
0
void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
{
    using namespace cv::ocl::device::mog;

    backgroundImage.create(frameSize_, frameType_);

    cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_);
}
示例#8
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    int elemCntPerRow = I.step / I.elemSize();
    String kernelName = "lkSparse";
    bool isImageSupported = support_image2d();
    size_t localThreads[3]  = { 8, isImageSupported ? 8 : 32, 1 };
    size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
    int cn = I.oclchannels();
    char calcErr;
    if (level == 0)
    {
        calcErr = 1;
    }
    else
    {
        calcErr = 0;
    }

    std::vector<std::pair<size_t , const void *> > args;

    cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data;
    cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    if (!isImageSupported)
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    if(isImageSupported)
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
    {
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
}
示例#9
0
文件: kmeans.cpp 项目: DevShah/18551
void cv::ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType)
{
    CV_Assert(src.cols * src.channels() == centers.cols * centers.channels());
    CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
    CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);

    dists.create(src.rows, 1, CV_32FC1);
    labels.create(src.rows, 1, CV_32SC1);

    std::stringstream build_opt_ss;
    build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST");

    int src_step = src.step / src.elemSize1();
    int centers_step = centers.step / centers.elemSize1();
    int feature_width = centers.cols * centers.oclchannels();
    int src_offset = src.offset / src.elemSize1();
    int centers_offset = centers.offset / centers.elemSize1();

    int all_dist_count = src.rows * centers.rows;
    oclMat all_dist(1, all_dist_count, CV_32FC1);

    vector<pair<size_t, const void *> > args;
    args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
    args.push_back(make_pair(sizeof(cl_mem), (void *)&centers.data));
    args.push_back(make_pair(sizeof(cl_mem), (void *)&all_dist.data));

    args.push_back(make_pair(sizeof(cl_int), (void *)&feature_width));
    args.push_back(make_pair(sizeof(cl_int), (void *)&src_step));
    args.push_back(make_pair(sizeof(cl_int), (void *)&centers_step));
    args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
    args.push_back(make_pair(sizeof(cl_int), (void *)&centers.rows));

    args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset));
    args.push_back(make_pair(sizeof(cl_int), (void *)&centers_offset));

    size_t globalThreads[3] = { all_dist_count, 1, 1 };

    openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
                        "distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str());

    Mat all_dist_cpu;
    all_dist.download(all_dist_cpu);

    for (int i = 0; i < src.rows; ++i)
    {
        Point p;
        double minVal;

        Rect roi(i * centers.rows, 0, centers.rows, 1);
        Mat hdr(all_dist_cpu, roi);

        cv::minMaxLoc(hdr, &minVal, NULL, &p);

        dists.at<float>(i, 0) = static_cast<float>(minVal);
        labels.at<int>(i, 0) = p.x;
    }
}
示例#10
0
void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate)
{
    using namespace cv::ocl::device::mog;

    int ch = frame.oclchannels();
    int work_ch = ch;

    if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
        initialize(frame.size(), frame.type());

    fgmask.create(frameSize_, CV_8UC1);
    fgmask.setTo(cv::Scalar::all(0));

    ++nframes_;
    learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
    CV_Assert(learningRate >= 0.0f);

    mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_);
}
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName)
{
    std::vector<std::pair<size_t , const void *> > args;

    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();

    if (dst.type() == CV_8UC1)
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    const char channelMap[] = { ' ', ' ', '2', '4', '4' };
    std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);

    Mat mat(1, 1, dst.type(), scalar);

#ifdef CL_VERSION_1_2
    // this enables backwards portability to
    // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
    if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
        dst.offset == 0 && dst.cols == dst.wholecols)
    {
        const int sizeofMap[][7] =
            {
                { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double)  },
                { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) },
                { 0                , 0               , 0                 , 0                , 0              , 0                ,  0                 },
                { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) },
            };
        int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];

        clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
                            (cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
                            0, dst.step * dst.rows, 0, NULL, NULL);
    }
    else
#endif
    {
        oclMat m(mat);
        args.push_back( std::make_pair( sizeof(cl_mem) , (void*)&m.data ));
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));

        openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
            localThreads, args, -1, -1, buildOptions.c_str());
    }
}
示例#12
0
void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2,
                          oclMat &dst)
{
    CV_Assert(src1.depth() <= CV_32F);
    CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
    CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() &&
              weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);

    dst.create(src1.size(), src1.type());

    size_t globalSize[] = { (size_t)dst.cols, (size_t)dst.rows, 1};
    size_t localSize[] = { 16, 16, 1 };

    int depth = dst.depth(), ocn = dst.oclchannels();
    int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
    int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
    int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize();
    int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize();
    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();

    const char * const channelMap[] = { "", "", "2", "4", "4" };
    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s",
                                      typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn],
                                      depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]);

    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));

    openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args,
                        -1, -1, buildOptions.c_str());
}
////////////////////////////////////////////////////////////////////////
// convert_C4C3
static void convert_C4C3(const oclMat &src, cl_mem &dst)
{
    int srcStep_in_pixel = src.step1() / src.oclchannels();
    int pixel_end = src.wholecols * src.wholerows - 1;
    Context *clCxt = src.clCxt;
    string kernelName = "convertC4C3";
    char compile_option[32];
    switch(src.depth())
    {
    case 0:
        sprintf(compile_option, "-D GENTYPE4=uchar4");
        break;
    case 1:
        sprintf(compile_option, "-D GENTYPE4=char4");
        break;
    case 2:
        sprintf(compile_option, "-D GENTYPE4=ushort4");
        break;
    case 3:
        sprintf(compile_option, "-D GENTYPE4=short4");
        break;
    case 4:
        sprintf(compile_option, "-D GENTYPE4=int4");
        break;
    case 5:
        sprintf(compile_option, "-D GENTYPE4=float4");
        break;
    case 6:
        sprintf(compile_option, "-D GENTYPE4=double4");
        break;
    default:
        CV_Error(CV_StsUnsupportedFormat, "unknown depth");
    }

    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
    args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1};
    size_t localThreads[3] = {256, 1, 1};

    openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
示例#14
0
            static void split(const oclMat &mat_src, oclMat *mat_dst)
            {
                CV_Assert(mat_dst);

                int depth = mat_src.depth();
                int num_channels = mat_src.oclchannels();
                Size size = mat_src.size();

                if(num_channels == 1)
                {
                    mat_src.copyTo(mat_dst[0]);
                    return;
                }

                int i;
                for(i = 0; i < num_channels; i++)
                    mat_dst[i].create(size, CV_MAKETYPE(depth, 1));

                split_vector_run(mat_src, mat_dst);
            }
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
                  src.rows == dst.rows && src.cols == dst.cols
                  && mask.type() == CV_8UC1);

    vector<pair<size_t , const void *> > args;

    std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
        {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
        {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
        {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
    };
    char compile_option[32];
    sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];

    globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0];
    globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
    globalThreads[2] = 1;

    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();

    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));

    openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
                        localThreads, args, -1, -1, compile_option);
}
示例#16
0
static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta)
{
    String kernelName = "convert_to";
    float alpha_f = alpha, beta_f = beta;
    int sdepth = src.depth(), ddepth = dst.depth();
    int sstep1 = (int)src.step1(), dstep1 = (int)dst.step1();
    int cols1 = src.cols * src.oclchannels();

    char buildOptions[150], convertString[50];
    const char * typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    sprintf(convertString, "convert_%s_sat_rte", typeMap[ddepth]);
    sprintf(buildOptions, "-D srcT=%s -D dstT=%s -D convertToDstType=%s", typeMap[sdepth],
            typeMap[ddepth], CV_32F == ddepth || ddepth == CV_64F ? "" : convertString);

    CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
    std::vector<std::pair<size_t , const void *> > args;

    size_t localThreads[3] = { 16, 16, 1 };
    size_t globalThreads[3] = { divUp(cols1, localThreads[0]) * localThreads[0],
                                divUp(dst.rows, localThreads[1]) * localThreads[1], 1
                              };

    int doffset1 = dst.offset / dst.elemSize1();
    int soffset1 = src.offset / src.elemSize1();

    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sstep1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&soffset1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstep1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&doffset1 ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));

    openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
                        localThreads, args, -1, -1, buildOptions);
}
示例#17
0
static void convert_C4C3(const oclMat &src, cl_mem &dst)
{
    int srcStep_in_pixel = src.step1() / src.oclchannels();
    int pixel_end = src.wholecols * src.wholerows - 1;
    Context *clCxt = src.clCxt;

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[src.depth()]);

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholecols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholerows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = { divUp(src.wholecols * src.wholerows, 4), 1, 1};
    size_t localThreads[3] = { 256, 1, 1 };

    openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
}
示例#18
0
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err)
{
    if (prevPts.empty())
    {
        nextPts.release();
        status.release();
        //if (err) err->release();
        return;
    }

    derivLambda = std::min(std::max(derivLambda, 0.0), 1.0);

    iters = std::min(std::max(iters, 0), 100);

    const int cn = prevImg.oclchannels();

    dim3 block, patch;
    calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);

    CV_Assert(derivLambda >= 0);
    CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2);
    CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
    CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
    CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);

    if (useInitialFlow)
        CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
    else
        ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);

    oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
    oclMat temp2 = nextPts.reshape(1);
    //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
    multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
    //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);

    ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
    //status.setTo(Scalar::all(1));
    setTo(status, Scalar::all(1));

    bool errMat = false;
    if (!err)
    {
        err = new oclMat(1, prevPts.cols, CV_32FC1);
        errMat = true;
    }
    else
        ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
    //ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err);

    // build the image pyramids.

    prevPyr_.resize(maxLevel + 1);
    nextPyr_.resize(maxLevel + 1);

    if (cn == 1 || cn == 4)
    {
        //prevImg.convertTo(prevPyr_[0], CV_32F);
        //nextImg.convertTo(nextPyr_[0], CV_32F);
        convertTo(prevImg, prevPyr_[0], CV_32F);
        convertTo(nextImg, nextPyr_[0], CV_32F);
    }
    else
    {
        //oclMat buf_;
        //      cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
        //      buf_.convertTo(prevPyr_[0], CV_32F);

        //      cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
        //      buf_.convertTo(nextPyr_[0], CV_32F);
    }

    for (int level = 1; level <= maxLevel; ++level)
    {
        pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]);
        pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]);
    }

    // dI/dx ~ Ix, dI/dy ~ Iy

    for (int level = maxLevel; level >= 0; level--)
    {
        lkSparse_run(prevPyr_[level], nextPyr_[level],
                     prevPts, nextPts, status, *err, getMinEigenVals, prevPts.cols,
                     level, /*block, */patch, winSize, iters);
    }

    clFinish(prevImg.clCxt->impl->clCmdQueue);

    if(errMat)
        delete err;
}
示例#19
0
            static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
            {

                if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F)
                {
                    CV_Error(Error::GpuNotSupported, "Selected device don't support double\r\n");
                    return;
                }

                Context  *clCxt = mat_src.clCxt;
                int channels = mat_src.oclchannels();
                int depth = mat_src.depth();

                String kernelName = "split_vector";

                int vector_lengths[4][7] = {{0, 0, 0, 0, 0, 0, 0},
                    {4, 4, 2, 2, 1, 1, 1},
                    {4, 4, 2, 2 , 1, 1, 1},
                    {4, 4, 2, 2, 1, 1, 1}
                };

                size_t vector_length = vector_lengths[channels - 1][mat_dst[0].depth()];

                int max_offset_cols = 0;
                for(int i = 0; i < channels; i++)
                {
                    int offset_cols = (mat_dst[i].offset / mat_dst[i].elemSize()) & (vector_length - 1);
                    if(max_offset_cols < offset_cols)
                        max_offset_cols = offset_cols;
                }

                int cols =  vector_length == 1 ? divUp(mat_src.cols, vector_length)
                            : divUp(mat_src.cols + max_offset_cols, vector_length);

                size_t localThreads[3]  = { 64, 4, 1 };
                size_t globalThreads[3] = { cols, mat_src.rows, 1 };

                int dst_step1 = mat_dst[0].cols * mat_dst[0].elemSize();
                std::vector<std::pair<size_t , const void *> > args;
                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src.data));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.step));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.offset));
                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[0].step));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[0].offset));
                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[1].step));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[1].offset));
                if(channels >= 3)
                {

                    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data));
                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[2].step));
                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[2].offset));
                }
                if(channels >= 4)
                {
                    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data));
                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[3].step));
                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst[3].offset));
                }

                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src.rows));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1));

                openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth);
            }
示例#20
0
static void pyrdown_run_cus(const oclMat &src, const oclMat &dst)
{

    CV_Assert(src.type() == dst.type());
    CV_Assert(src.depth() != CV_8S);

    Context  *clCxt = src.clCxt;

    std::string kernelName = "pyrDown";

    size_t localThreads[3]  = { 256, 1, 1 };
    size_t globalThreads[3] = { src.cols, dst.rows, 1};

    std::vector<std::pair<size_t , const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols));

    openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH);
}
示例#21
0
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// setTo ////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
//oclMat &operator = (const Scalar &s)
//{
//    //cout << "cv::ocl::oclMat::=" << endl;
//    setTo(s);
//    return *this;
//}
static void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, std::string kernelName)
{
    std::vector<std::pair<size_t , const void *> > args;

    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
    if(dst.type() == CV_8UC1)
    {
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    }
    char compile_option[32];
    union sc
    {
        cl_uchar4 uval;
        cl_char4  cval;
        cl_ushort4 usval;
        cl_short4 shval;
        cl_int4 ival;
        cl_float4 fval;
        cl_double4 dval;
    } val;
    switch(dst.depth())
    {
    case 0:
        val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
        val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
        val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
        val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=uchar");
            args.push_back( std::make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=uchar4");
            args.push_back( std::make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case 1:
        val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
        val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
        val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
        val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=char");
            args.push_back( std::make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=char4");
            args.push_back( std::make_pair( sizeof(cl_char4) , (void *)&val.cval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case 2:
        val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
        val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
        val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
        val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=ushort");
            args.push_back( std::make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=ushort4");
            args.push_back( std::make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case 3:
        val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
        val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
        val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
        val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=short");
            args.push_back( std::make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=short4");
            args.push_back( std::make_pair( sizeof(cl_short4) , (void *)&val.shval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case 4:
        val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
        val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
        val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
        val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=int");
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
            break;
        case 2:
            sprintf(compile_option, "-D GENTYPE=int2");
            cl_int2 i2val;
            i2val.s[0] = val.ival.s[0];
            i2val.s[1] = val.ival.s[1];
            args.push_back( std::make_pair( sizeof(cl_int2) , (void *)&i2val ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=int4");
            args.push_back( std::make_pair( sizeof(cl_int4) , (void *)&val.ival ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case 5:
        val.fval.s[0] = (float)scalar.val[0];
        val.fval.s[1] = (float)scalar.val[1];
        val.fval.s[2] = (float)scalar.val[2];
        val.fval.s[3] = (float)scalar.val[3];
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=float");
            args.push_back( std::make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=float4");
            args.push_back( std::make_pair( sizeof(cl_float4) , (void *)&val.fval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case 6:
        val.dval.s[0] = scalar.val[0];
        val.dval.s[1] = scalar.val[1];
        val.dval.s[2] = scalar.val[2];
        val.dval.s[3] = scalar.val[3];
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=double");
            args.push_back( std::make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=double4");
            args.push_back( std::make_pair( sizeof(cl_double4) , (void *)&val.dval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    default:
        CV_Error(CV_StsUnsupportedFormat, "unknown depth");
    }
#ifdef CL_VERSION_1_2
    if(dst.offset == 0 && dst.cols == dst.wholecols)
    {
        clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
    }
    else
    {
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
        openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
                             localThreads, args, -1, -1, compile_option, CLFLUSH);
    }
#else
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
    openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
                         localThreads, args, -1, -1, compile_option, CLFLUSH);
#endif
}
示例#22
0
        Moments ocl_moments(oclMat& src, bool binary) //for image
        {
            CV_Assert(src.oclchannels() == 1);
            if(src.type() == CV_64FC1 && !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
            {
                CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
            }

            if(binary)
            {
                oclMat mask;
                if(src.type() != CV_8UC1)
                {
                    src.convertTo(mask, CV_8UC1);
                }
                oclMat src8u(src.size(), CV_8UC1);
                src8u.setTo(Scalar(255), mask);
                src = src8u;
            }
            const int TILE_SIZE = 256;

            CvMoments mom;
            memset(&mom, 0, sizeof(mom));

            cv::Size size = src.size();
            int blockx, blocky;
            blockx = (size.width + TILE_SIZE - 1)/TILE_SIZE;
            blocky = (size.height + TILE_SIZE - 1)/TILE_SIZE;

            oclMat dst_m;
            int tile_height = TILE_SIZE;

            size_t localThreads[3]  = {1, tile_height, 1};
            size_t globalThreads[3] = {blockx, size.height, 1};

            if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
            {
                dst_m.create(blocky * 10, blockx, CV_64FC1);
            }else
            {
                dst_m.create(blocky * 10, blockx, CV_32FC1);
            }

            int src_step = (int)(src.step/src.elemSize());
            int dstm_step = (int)(dst_m.step/dst_m.elemSize());

            std::vector<std::pair<size_t , const void *> > args,args_sum;
            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step ));
            args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstm_step ));

            int binary_;
            if(binary)
                binary_ = 1;
            else
                binary_ = 0;
            args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary_));

            char builOption[128];
            if(binary || src.type() == CV_8UC1)
            {
                snprintf(builOption, 128, "-D CV_8UC1");
            }else if(src.type() == CV_16UC1)
            {
                snprintf(builOption, 128, "-D CV_16UC1");
            }else if(src.type() == CV_16SC1)
            {
                snprintf(builOption, 128, "-D CV_16SC1");
            }else if(src.type() == CV_32FC1)
            {
                snprintf(builOption, 128, "-D CV_32FC1");
            }else if(src.type() == CV_64FC1)
            {
                snprintf(builOption, 128, "-D CV_64FC1");
            }else
            {
                CV_Error( CV_StsUnsupportedFormat, "" );
            }

            openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, -1, builOption);

            Mat tmp(dst_m);
            tmp.convertTo(tmp, CV_64FC1);

            double tmp_m[10] = {0};

            for(int j = 0; j < tmp.rows; j += 10)
            {
                for(int i = 0; i < tmp.cols; i++)
                {
                    tmp_m[0] += tmp.at<double>(j, i);
                    tmp_m[1] += tmp.at<double>(j + 1, i);
                    tmp_m[2] += tmp.at<double>(j + 2, i);
                    tmp_m[3] += tmp.at<double>(j + 3, i);
                    tmp_m[4] += tmp.at<double>(j + 4, i);
                    tmp_m[5] += tmp.at<double>(j + 5, i);
                    tmp_m[6] += tmp.at<double>(j + 6, i);
                    tmp_m[7] += tmp.at<double>(j + 7, i);
                    tmp_m[8] += tmp.at<double>(j + 8, i);
                    tmp_m[9] += tmp.at<double>(j + 9, i);
                }
            }

            mom.m00 = tmp_m[0];
            mom.m10 = tmp_m[1];
            mom.m01 = tmp_m[2];
            mom.m20 = tmp_m[3];
            mom.m11 = tmp_m[4];
            mom.m02 = tmp_m[5];
            mom.m30 = tmp_m[6];
            mom.m21 = tmp_m[7];
            mom.m12 = tmp_m[8];
            mom.m03 = tmp_m[9];
            icvCompleteMomentState( &mom );
            return mom;
        }
示例#23
0
            static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
            {
                if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F)
                {
                    CV_Error(Error::GpuNotSupported, "Selected device don't support double\r\n");
                    return;
                }

                Context  *clCxt = mat_dst.clCxt;
                int channels = mat_dst.oclchannels();
                int depth = mat_dst.depth();

                String kernelName = "merge_vector";

                int vector_lengths[4][7] = {{0, 0, 0, 0, 0, 0, 0},
                    {2, 2, 1, 1, 1, 1, 1},
                    {4, 4, 2, 2 , 1, 1, 1},
                    {1, 1, 1, 1, 1, 1, 1}
                };

                size_t vector_length = vector_lengths[channels - 1][depth];
                int offset_cols = (mat_dst.offset / mat_dst.elemSize()) & (vector_length - 1);
                int cols = divUp(mat_dst.cols + offset_cols, vector_length);

                size_t localThreads[3]  = { 64, 4, 1 };
                size_t globalThreads[3] = { cols, mat_dst.rows, 1 };

                int dst_step1 = mat_dst.cols * mat_dst.elemSize();
                std::vector<std::pair<size_t , const void *> > args;
                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_dst.data));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst.step));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst.offset));
                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[0].data));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[0].step));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[0].offset));
                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[1].data));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[1].step));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[1].offset));

                if(channels == 4)
                {
                    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
                    args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));

                    if(n == 3)
                    {
                        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));
                    }
                    else if( n == 4)
                    {
                        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
                        args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_src[3].offset));
                    }
                }

                args.push_back( std::make_pair( sizeof(cl_int), (void *)&mat_dst.rows));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
                args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1));

                openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth);
            }
示例#24
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    char platform[256] = {0};
    cl_platform_id pid;
    clGetDeviceInfo(clCxt->impl->devices, CL_DEVICE_PLATFORM, sizeof(pid), &pid, NULL);
    clGetPlatformInfo(pid, CL_PLATFORM_NAME, 256, platform, NULL);
    std::string namestr = platform;
    bool isImageSupported = true;
    if(namestr.find("NVIDIA")!=string::npos || namestr.find("Intel")!=string::npos)
        isImageSupported = false;

    int elemCntPerRow = I.step / I.elemSize();

    string kernelName = "lkSparse";


    size_t localThreads[3]  = { 8, isImageSupported?8:32, 1 };
    size_t globalThreads[3] = { 8 * ptcount, isImageSupported?8:32, 1};

    int cn = I.oclchannels();

    char calcErr;
    if (level == 0)
    {
        calcErr = 1;
    }
    else
    {
        calcErr = 0;
    }

    vector<pair<size_t , const void *> > args;
    cl_mem ITex;
    cl_mem JTex;
    if (isImageSupported)
    {
        ITex = bindTexture(I, I.depth(), cn);
        JTex = bindTexture(J, J.depth(), cn);
    }
    else
    {
        ITex = (cl_mem)I.data;
        JTex = (cl_mem)J.data;
    }

    args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
    //cl_mem clmD = clCreateBuffer(clCxt, CL_MEM_READ_WRITE, ptcount * sizeof(float), NULL, NULL);
    args.push_back( make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
    if (!isImageSupported)
    {
        args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    }
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
    //args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));

    if (isImageSupported)
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);

        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
    {
        //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
}
static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
    vector<pair<size_t , const void *> > args;
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
    char compile_option[32];
    union sc
    {
        cl_uchar4 uval;
        cl_char4  cval;
        cl_ushort4 usval;
        cl_short4 shval;
        cl_int4 ival;
        cl_float4 fval;
        cl_double4 dval;
    } val;
    switch(dst.depth())
    {
    case CV_8U:
        val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
        val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
        val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
        val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=uchar");
            args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=uchar4");
            args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case CV_8S:
        val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
        val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
        val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
        val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=char");
            args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=char4");
            args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case CV_16U:
        val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
        val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
        val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
        val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=ushort");
            args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=ushort4");
            args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case CV_16S:
        val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
        val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
        val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
        val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=short");
            args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=short4");
            args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case CV_32S:
        val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
        val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
        val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
        val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=int");
            args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=int4");
            args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case CV_32F:
        val.fval.s[0] = scalar.val[0];
        val.fval.s[1] = scalar.val[1];
        val.fval.s[2] = scalar.val[2];
        val.fval.s[3] = scalar.val[3];
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=float");
            args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=float4");
            args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    case CV_64F:
        val.dval.s[0] = scalar.val[0];
        val.dval.s[1] = scalar.val[1];
        val.dval.s[2] = scalar.val[2];
        val.dval.s[3] = scalar.val[3];
        switch(dst.oclchannels())
        {
        case 1:
            sprintf(compile_option, "-D GENTYPE=double");
            args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
            break;
        case 4:
            sprintf(compile_option, "-D GENTYPE=double4");
            args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
            break;
        default:
            CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
        }
        break;
    default:
        CV_Error(CV_StsUnsupportedFormat, "unknown depth");
    }
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
    openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
                        localThreads, args, -1, -1, compile_option);
}
示例#26
0
static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
                 oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    bool isImageSupported = clCxt->impl->devName.find("Intel(R) HD Graphics") == std::string::npos;
    int elemCntPerRow = I.step / I.elemSize();

    std::string kernelName = "lkDense";

    size_t localThreads[3]  = { 16, 16, 1 };
    size_t globalThreads[3] = { I.cols, I.rows, 1};

    bool calcErr;
    if (err)
    {
        calcErr = true;
    }
    else
    {
        calcErr = false;
    }

    cl_mem ITex;
    cl_mem JTex;

    if (isImageSupported)
    {
        ITex = bindTexture(I);
        JTex = bindTexture(J);
    }
    else
    {
        ITex = (cl_mem)I.data;
        JTex = (cl_mem)J.data;
    }

    //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
    //const int patchWidth  = 16 + 2 * halfWin.x;
    //const int patchHeight = 16 + 2 * halfWin.y;
    //size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);

    std::vector<std::pair<size_t , const void *> > args;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&u.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&u.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&v.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&v.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevU.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevU.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevV.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevV.step ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&(*err).data ));
    //args.push_back( std::make_pair( sizeof(cl_int), (void *)&(*err).step ));
    if (!isImageSupported)
    {
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
    }
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    if (isImageSupported)
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);

        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
    {
        //printf("Warning: The image2d_t is not supported by the device. Using alternative method!\n");
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
    }
}
示例#27
0
//////////////////////////////////////////////////////////////////////////////
/////////////////////// add subtract multiply divide /////////////////////////
//////////////////////////////////////////////////////////////////////////////
static void pyrdown_run(const oclMat &src, const oclMat &dst)
{

    CV_Assert(src.type() == dst.type());
    CV_Assert(src.depth() != CV_8S);

    Context  *clCxt = src.clCxt;
    //int channels = dst.channels();
    //int depth = dst.depth();

    String kernelName = "pyrDown";

    //int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
    //    {4, 0, 4, 4, 1, 1, 1},
    //    {4, 0, 4, 4, 1, 1, 1},
    //    {4, 0, 4, 4, 1, 1, 1}
    //};

    //size_t vector_length = vector_lengths[channels-1][depth];
    //int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);

    size_t localThreads[3]  = { 256, 1, 1 };
    size_t globalThreads[3] = { src.cols, dst.rows, 1};

    //int dst_step1 = dst.cols * dst.elemSize();
    std::vector<std::pair<size_t , const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols));

    openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
示例#28
0
static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
                 oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    bool isImageSupported = support_image2d();
    int elemCntPerRow = I.step / I.elemSize();

    String kernelName = "lkDense";

    size_t localThreads[3]  = { 16, 16, 1 };
    size_t globalThreads[3] = { I.cols, I.rows, 1};

    bool calcErr;
    if (err)
    {
        calcErr = true;
    }
    else
    {
        calcErr = false;
    }

    cl_mem ITex;
    cl_mem JTex;

    if (isImageSupported)
    {
        ITex = bindTexture(I);
        JTex = bindTexture(J);
    }
    else
    {
        ITex = (cl_mem)I.data;
        JTex = (cl_mem)J.data;
    }

    std::vector<std::pair<size_t , const void *> > args;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&u.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&u.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&v.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&v.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevU.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevU.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevV.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevV.step ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));

    if (!isImageSupported)
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );

    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    if (isImageSupported)
    {
        openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);

        releaseTexture(ITex);
        releaseTexture(JTex);
    }
    else
        openCLExecuteKernel2(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
}
示例#29
0
        cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
            cl_image_format format;
            int err;
            int depth    = mat.depth();
            int channels = mat.oclchannels();

            switch(depth)
            {
            case CV_8U:
                format.image_channel_data_type = CL_UNSIGNED_INT8;
                break;
            case CV_32S:
                format.image_channel_data_type = CL_UNSIGNED_INT32;
                break;
            case CV_32F:
                format.image_channel_data_type = CL_FLOAT;
                break;
            default:
                CV_Error(-1, "Image forma is not supported");
                break;
            }
            switch(channels)
            {
            case 1:
                format.image_channel_order     = CL_R;
                break;
            case 3:
                format.image_channel_order     = CL_RGB;
                break;
            case 4:
                format.image_channel_order     = CL_RGBA;
                break;
            default:
                CV_Error(-1, "Image format is not supported");
                break;
            }
#ifdef CL_VERSION_1_2
            //this enables backwards portability to
            //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
            if(Context::getContext()->supportsFeature(Context::CL_VER_1_2))
            {
                cl_image_desc desc;
                desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
                desc.image_width      = mat.cols;
                desc.image_height     = mat.rows;
                desc.image_depth      = 0;
                desc.image_array_size = 1;
                desc.image_row_pitch  = 0;
                desc.image_slice_pitch = 0;
                desc.buffer           = NULL;
                desc.num_mip_levels   = 0;
                desc.num_samples      = 0;
                texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
            }
            else
#endif
            {
                texture = clCreateImage2D(
                    (cl_context)mat.clCxt->oclContext(),
                    CL_MEM_READ_WRITE,
                    &format,
                    mat.cols,
                    mat.rows,
                    0,
                    NULL,
                    &err);
            }
            size_t origin[] = { 0, 0, 0 };
            size_t region[] = { mat.cols, mat.rows, 1 };

            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
                devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
                clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
            }
            else
            {
                devData = (cl_mem)mat.data;
            }

            clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
                clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
                clReleaseMemObject(devData);
            }

            openCLSafeCall(err);
            return texture;
        }
示例#30
0
void cv::ocl::split(const oclMat &src, std::vector<oclMat> &dst)
{
    dst.resize(src.oclchannels());
    if(src.oclchannels() > 0)
        split_merge::split(src, &dst[0]);
}