Example #1
0
void cv::ocl::oclMat::copyTo( oclMat &m ) const
{
    CV_DbgAssert(!this->empty());
    m.create(size(), type());
    openCLCopyBuffer2D(clCxt, m.data, m.step, m.offset,
                       data, step, cols * elemSize(), rows, offset, clMemcpyDeviceToDevice);
}
////////////////////////////////////////////////////////////////////////
// convert_C4C3
static void convert_C4C3(const oclMat &src, cl_mem &dst)
{
    int srcStep_in_pixel = src.step1() / src.oclchannels();
    int pixel_end = src.wholecols * src.wholerows - 1;
    Context *clCxt = src.clCxt;
    string kernelName = "convertC4C3";
    char compile_option[32];
    switch(src.depth())
    {
    case 0:
        sprintf(compile_option, "-D GENTYPE4=uchar4");
        break;
    case 1:
        sprintf(compile_option, "-D GENTYPE4=char4");
        break;
    case 2:
        sprintf(compile_option, "-D GENTYPE4=ushort4");
        break;
    case 3:
        sprintf(compile_option, "-D GENTYPE4=short4");
        break;
    case 4:
        sprintf(compile_option, "-D GENTYPE4=int4");
        break;
    case 5:
        sprintf(compile_option, "-D GENTYPE4=float4");
        break;
    case 6:
        sprintf(compile_option, "-D GENTYPE4=double4");
        break;
    default:
        CV_Error(CV_StsUnsupportedFormat, "unknown depth");
    }

    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
    args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1};
    size_t localThreads[3] = {256, 1, 1};

    openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
Example #3
0
void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
{
    Context* clCxt = Context::getContext();

    size_t local_thread[] = {32, 8, 1};
    size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};

    int weight_step = (int)(weight.step/weight.elemSize());
    int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
    int mean_step = (int)(mean.step/mean.elemSize());
    int dst_step = (int)(dst.step/dst.elemSize());

    int dst_y = (int)(dst.offset/dst.step);
    int dst_x = (int)(dst.offset%dst.step);
    dst_x = dst_x/(int)dst.elemSize();

    String kernel_name = "getBackgroundImage2_kernel";
    std::vector<std::pair<size_t, const void*> > args;

    char build_option[50];
    if(cn == 1)
    {
        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
    }else
    {
        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
    }

    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&modesUsed.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&dst.data));
    args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y));

    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
}
Example #4
0
void cv::ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
{
    if (image.empty())
        return;

    (*this)(image, mask, d_keypoints_);
    downloadKeypoints(d_keypoints_, keypoints);
}
Example #5
0
void cv::ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints)
{
    if (d_keypoints.empty())
        return;

    Mat h_keypoints(d_keypoints);
    convertKeypoints(h_keypoints, keypoints);
}
Example #6
0
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
{
    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();

    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
                                        src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");

    vector<pair<size_t , const void *> > args;
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));

    size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
    size_t lt[3] = { 16, 10, 1 };
#else
    size_t lt[3] = { 16, 16, 1 };
#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
}
Example #7
0
int cv::ocl::FAST_OCL::calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints)
{
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3] = {divUp(img.cols - 6, localThreads[0]) * localThreads[0],
                               divUp(img.rows - 6, localThreads[1]) * localThreads[1],
                               1
                              };

    Context *clCxt = Context::getContext();
    String kernelName = (mask.empty()) ? "calcKeypoints" : "calcKeypointsWithMask";
    std::vector< std::pair<size_t, const void *> > args;

    int counter = 0;
    int err = CL_SUCCESS;
    cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(),
                                      CL_MEM_COPY_HOST_PTR, sizeof(int),
                                      &counter, &err);

    int kpLocStep = kpLoc_.step / kpLoc_.elemSize();
    int scoreStep = score_.step / score_.elemSize();
    int nms = (nonmaxSupression) ? 1 : 0;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data));
    if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nms));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxKeypoints));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&threshold));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols));
    if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&scoreStep));

    openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1);

    openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(),
                                       counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL));
    openCLSafeCall(clReleaseMemObject(counterCL));

    return counter;
}
// knn match
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx,
        oclMat &distance, oclMat &allDist, int k, const oclMat &mask)
{
    if (query.empty() || train.empty())
        return;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.type() == query.type() && train.cols == query.cols);

    const int nQuery = query.rows;
    const int nTrain = train.rows;

    if (k == 2)
    {
        ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
        ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
    }
    else
    {
        ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
        ensureSizeIsEnough(nQuery, k, CV_32F, distance);
        ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
    }

    trainIdx.setTo(Scalar::all(-1));

    kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType);

    return;
}
Example #9
0
static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
    int nmixtures, float varThreshold, float backgroundRatio)
{
    Context* clCxt = Context::getContext();

    size_t local_thread[] = {32, 8, 1};
    size_t global_thread[] = {frame.cols, frame.rows, 1};

    int frame_step = (int)(frame.step/frame.elemSize());
    int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
    int weight_step = (int)(weight.step/weight.elemSize());
    int mean_step = (int)(mean.step/mean.elemSize());
    int var_step = (int)(var.step/var.elemSize());

    int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
    int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
    fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();

    int frame_offset_y = (int)(frame.offset/frame.step);
    int frame_offset_x = (int)(frame.offset%frame.step);
    frame_offset_x = frame_offset_x/(int)frame.elemSize();

    char build_option[50];
    if(cn == 1)
    {
        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
    }else
    {
        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
    }

    String kernel_name = "mog_withoutLearning_kernel";
    std::vector<std::pair<size_t, const void*> > args;

    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&frame.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&fgmask.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&weight.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&mean.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void*)&var.data));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));

    args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
    args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));

    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
    args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));

    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
}
Example #10
0
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
                           const std::string & additionalOptions = std::string(),
                           const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
{
    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();

    std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
    if (!additionalOptions.empty())
        build_options += additionalOptions;

    vector<pair<size_t , const void *> > args;
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));

    if (!data1.empty())
        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data ));
    if (!data2.empty())
        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data ));

   size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
    size_t lt[3] = { 16, 10, 1 };
#else
    size_t lt[3] = { 16, 16, 1 };
#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
Example #11
0
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
                          oclMat &result)
{
    cv::ocl::Context *ctx = img1.clCxt;
    assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
    int channels = img1.oclchannels();
    int depth = img1.depth();
    int rows = img1.rows;
    int cols = img1.cols;
    int istep = img1.step1();
    int wstep = weights1.step1();
    size_t globalSize[] = {cols * channels / 4, rows, 1};
    size_t localSize[] = {256, 1, 1};

    vector< pair<size_t, const void *> > args;
    result.create(img1.size(), CV_MAKE_TYPE(depth,img1.channels()));
    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
        std::string kernelName = "BlendLinear";

        openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
    }
}
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx,
        oclMat &imgIdx, oclMat &distance, const oclMat &masks)
{
    if (query.empty() || trainCollection.empty())
        return;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);

    const int nQuery = query.rows;

    ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
    ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
    ensureSizeIsEnough(1, nQuery, CV_32F, distance);

    matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);

    return;
}
Example #13
0
            ////////////////////////////////////////////////////////////////////////////////////////////////////
            //////////////////////////////////////split/////////////////////////////////////////////////////////////
            //////////////////////////////////////////////////////////////////////////////////////////////////
            void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst)
            {
                Context  *clCxt = mat_src.clCxt;
                int channels = mat_src.channels();
                int depth = mat_src.depth();

                string kernelName = "split_vector";

                int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0},
                    {8, 8, 8, 8, 4, 4, 2},
                    {8, 8, 8, 8 , 4, 4, 4},
                    {4, 4, 2, 2, 1, 1, 1}
                };

                size_t index = indexes[channels-1][mat_dst[0].depth()];
                int cols = divUp(mat_src.cols, index);
                size_t localThreads[3]  = { 64, 4, 1 };
                size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
                    divUp(mat_src.rows, localThreads[1]) * localThreads[1],
                    1
                };

                vector<pair<size_t , const void *> > args;
                args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step));
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.rows));
                args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
                args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[0].data));
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[0].step));
                args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[1].data));
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[1].step));
                if(channels >= 3)
                {
                    args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[2].data));
                    args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[2].step));
                }
                if(channels >= 4)
                {
                    args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst[3].data));
                    args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst[3].step));
                }

                openCLExecuteKernel(clCxt, &split_mat, kernelName, globalThreads, localThreads, args, channels, depth);
            }
Example #14
0
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err)
{
    CV_Assert(prevImg.type() == CV_8UC1);
    CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
    CV_Assert(maxLevel >= 0);
    CV_Assert(winSize.width > 2 && winSize.height > 2);

    if (err)
        err->create(prevImg.size(), CV_32FC1);

    prevPyr_.resize(maxLevel + 1);
    nextPyr_.resize(maxLevel + 1);

    prevPyr_[0] = prevImg;
    //nextImg.convertTo(nextPyr_[0], CV_32F);
    convertTo(nextImg, nextPyr_[0], CV_32F);

    for (int level = 1; level <= maxLevel; ++level)
    {
        pyrDown_cus(prevPyr_[level - 1], prevPyr_[level]);
        pyrDown_cus(nextPyr_[level - 1], nextPyr_[level]);
    }

    ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
    ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
    ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
    ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
    //uPyr_[1].setTo(Scalar::all(0));
    //vPyr_[1].setTo(Scalar::all(0));
    setTo(uPyr_[1], Scalar::all(0));
    setTo(vPyr_[1], Scalar::all(0));

    Size winSize2i(winSize.width, winSize.height);

    int idx = 0;

    for (int level = maxLevel; level >= 0; level--)
    {
        int idx2 = (idx + 1) & 1;

        lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
                    level == 0 ? err : 0, winSize2i, iters);

        if (level > 0)
            idx = idx2;
    }

    //uPyr_[idx].copyTo(u);
    //vPyr_[idx].copyTo(v);
    copyTo(uPyr_[idx], u);
    copyTo(vPyr_[idx], v);

    clFinish(prevImg.clCxt->impl->clCmdQueue);
}
////////////////////////////////////////////////////////////////////////
// convert_C4C3
void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep)
{
    int srcStep = src.step1() / src.channels();
    Context *clCxt = src.clCxt;
    string kernelName = "convertC4C3";

    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));

    size_t globalThreads[3] = {(src.wholecols *src.wholerows + 255) / 256 * 256, 1, 1};
    size_t localThreads[3] = {256, 1, 1};

    openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, src.elemSize1() >> 1);
}
Example #16
0
void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks)
{

    if (empty())
        return;

    if (masks.empty())
    {
        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));

        oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();

        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
            *trainCollectionCPU_ptr = trainDescCollection[i];

        trainCollection.upload(trainCollectionCPU);
        maskCollection.release();
    }
    else
    {
        CV_Assert(masks.size() == trainDescCollection.size());

        Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
        Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));

        oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
        oclMat *maskCollectionCPU_ptr = maskCollectionCPU.ptr<oclMat>();

        for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
        {
            const oclMat &train = trainDescCollection[i];
            const oclMat &mask = masks[i];

            CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows));

            *trainCollectionCPU_ptr = train;
            *maskCollectionCPU_ptr = mask;
        }

        trainCollection.upload(trainCollectionCPU);
        maskCollection.upload(maskCollectionCPU);
    }
}
Example #17
0
    void detectKeypoints(oclMat &keypoints)
    {
        // create image pyramid buffers
        // different layers have same sized buffers, but they are sampled from gaussin kernel.
        ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det);
        ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);

        ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer);
        ensureSizeIsEnough(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints);
        keypoints.setTo(Scalar::all(0));

        for (int octave = 0; octave < surf_.nOctaves; ++octave)
        {
            const int layer_rows = img_rows >> octave;
            const int layer_cols = img_cols >> octave;

            //loadOctaveConstants(octave, layer_rows, layer_cols);

            icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);

            icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
                                     octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);

            unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
            maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));

            if (maxCounter > 0)
            {
                icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
                                           keypoints, counters, octave, layer_rows, maxFeatures);
            }
        }
        unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
        featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));

        keypoints.cols = featureCounter;

        if (surf_.upright)
            keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
        else
            findOrientation(keypoints);
    }
Example #18
0
static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, int src2_cols, int var_count, double gamma1, bool flag)
{

    Context *clCxt = Context::getContext();

    String kernelName = "svm_rbf";

    int width = var_count;
    int src_step = (int)src.step / src.elemSize();
    int src_e_step = (int)src_e.step / src_e.elemSize();
    int dst_step = (int)dst.step / dst.elemSize();

    int x = MIN(16, src_rows);
    int y = MIN(16, src2_cols);
    size_t localThreads[] = {x, y, 1};
    size_t globalThreads[] = {src2_cols,  src_rows, 1};
    char build_options[50];

    if(flag)
        sprintf(build_options, "-D ADDEXP");

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src_e.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_e_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));
    float gamma = 0.0f;
    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
    {
        gamma = (float)gamma1;
        args.push_back(std::make_pair(sizeof(cl_float), (void* )&gamma));
    }
    else
        args.push_back(std::make_pair(sizeof(cl_double), (void* )&gamma1));

    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
Example #19
0
void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
        const std::vector<oclMat> &masks, bool compactResult)
{
    if (k == 2)
    {
        oclMat trainCollection;
        oclMat maskCollection;

        makeGpuCollection(trainCollection, maskCollection, masks);

        oclMat trainIdx, imgIdx, distance;

        knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection);
        knnMatch2Download(trainIdx, imgIdx, distance, matches);
    }
    else
    {
        if (query.empty() || empty())
            return;

        std::vector< std::vector<DMatch> > curMatches;
        std::vector<DMatch> temp;
        temp.reserve(2 * k);

        matches.resize(query.rows);
        std::for_each(matches.begin(), matches.end(), std::bind2nd(std::mem_fun_ref(&std::vector<DMatch>::reserve), k));

        for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx)
        {
            knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? oclMat() : masks[imgIdx]);

            for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
            {
                std::vector<DMatch> &localMatch = curMatches[queryIdx];
                std::vector<DMatch> &globalMatch = matches[queryIdx];

                for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));

                temp.clear();
                merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));

                globalMatch.clear();
                const size_t count = std::min((size_t)k, temp.size());
                copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch));
            }
        }

        if (compactResult)
        {
            std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
            matches.erase(new_end, matches.end());
        }
    }
}
Example #20
0
            static void split(const oclMat &mat_src, oclMat *mat_dst)
            {
                CV_Assert(mat_dst);

                int depth = mat_src.depth();
                int num_channels = mat_src.oclchannels();
                Size size = mat_src.size();

                if(num_channels == 1)
                {
                    mat_src.copyTo(mat_dst[0]);
                    return;
                }

                int i;
                for(i = 0; i < num_channels; i++)
                    mat_dst[i].create(size, CV_MAKETYPE(depth, 1));

                split_vector_run(mat_src, mat_dst);
            }
Example #21
0
void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
                                 float scale, oclMat &map_x, oclMat &map_y)
{
    CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
    CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
    CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous());

    Mat K_Rinv = K * R.t();
    CV_Assert(K_Rinv.isContinuous());

    Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3
    KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1);
    KRT_mat(Range::all(), Range(9, 11)) = T;

    oclMat KRT_oclMat(KRT_mat);
    // transfer K_Rinv and T into a single cl_mem
    map_x.create(dst_roi.size(), CV_32F);
    map_y.create(dst_roi.size(), CV_32F);

    int tl_u = dst_roi.tl().x;
    int tl_v = dst_roi.tl().y;

    Context *clCxt = Context::getContext();
    string kernelName = "buildWarpPlaneMaps";
    vector< pair<size_t, const void *> > args;

    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
    args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
    args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
    args.push_back( make_pair( sizeof(cl_float), (void *)&scale));

    size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
    size_t localThreads[3]  = {32, 8, 1};
    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
}
Example #22
0
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
{
    //cout << "cv::ocl::oclMat::convertTo()" << endl;

    bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
                   && fabs(beta) < std::numeric_limits<double>::epsilon();

    if( rtype < 0 )
        rtype = src.type();
    else
        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels());

    int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
    if( sdepth == ddepth && noScale )
    {
        src.copyTo(dst);
        return;
    }

    oclMat temp;
    const oclMat *psrc = &src;
    if( sdepth != ddepth && psrc == &dst )
        psrc = &(temp = src);

    dst.create( src.size(), rtype );
    convert_run_cus(*psrc, dst, alpha, beta);
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
                  src.rows == dst.rows && src.cols == dst.cols
                  && mask.type() == CV_8UC1);

    vector<pair<size_t , const void *> > args;

    std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
        {"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
        {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
        {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
    };
    char compile_option[32];
    sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];

    globalThreads[0] = divUp(dst.cols, localThreads[0]) * localThreads[0];
    globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
    globalThreads[2] = 1;

    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();

    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));

    openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
                        localThreads, args, -1, -1, compile_option);
}
Example #24
0
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta)
{
    std::string kernelName = "convert_to_S";
    std::stringstream idxStr;
    idxStr << src.depth();
    kernelName += idxStr.str();
    float alpha_f = (float)alpha, beta_f = (float)beta;
    CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
    std::vector<std::pair<size_t , const void *> > args;
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
    if(dst.type() == CV_8UC1)
    {
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
    }
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));
    openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
                         localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH);
}
Example #25
0
static void matmul_linear(oclMat & src, oclMat & src2, oclMat & dst, int src_rows, int src2_cols, int var_count, double alpha1, double beta1)
{
    Context *clCxt = Context::getContext();
    String kernelName = "svm_linear";
    int src_step = (int)src.step / src.elemSize();
    int src2_step = (int)src2.step / src2.elemSize();
    int dst_step = (int)dst.step / dst.elemSize();
    int x = MIN(16, src_rows);
    int y = MIN(16, src2_cols);
    size_t localThreads[] = {x, y, 1};
    size_t globalThreads[] = {src2_cols, src_rows, 1};
    int width = var_count;

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&src2.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_step));
    args.push_back(std::make_pair(sizeof(cl_mem), (void* )&dst.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&dst_step));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src_rows));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&src2_cols));
    args.push_back(std::make_pair(sizeof(cl_int), (void* )&width));

    float alpha = 0.0f, beta = 0.0f;
    if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
    {
        alpha = (float)alpha1;
        beta = (float)beta1;
        args.push_back(std::make_pair(sizeof(cl_float), (void* )&alpha));
        args.push_back(std::make_pair(sizeof(cl_float), (void* )&beta));
    }
    else
    {
        args.push_back(std::make_pair(sizeof(cl_double), (void* )&alpha1));
        args.push_back(std::make_pair(sizeof(cl_double), (void* )&beta1));
    }
    openCLExecuteKernel(clCxt, &svm, kernelName, globalThreads, localThreads, args, -1, -1);
}
Example #26
0
// FIXME:
// This function cannot sort arrays with duplicated keys
static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreaterThan)
{
    CV_Error(-1, "This function is incorrect at the moment.");
    Context * cxt = Context::getContext();

    size_t globalThreads[3] = {vecSize, 1, 1};

    std::vector< std::pair<size_t, const void *> > args;
    char build_opt_buf [100];
    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);

    //local
    String kernelname = "selectionSortLocal";
#ifdef ANDROID
    int lds_size = cxt->getDeviceInfo().maxWorkGroupSize * keys.elemSize();
#else
    int lds_size = GROUP_SIZE * keys.elemSize();
#endif
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&keys.data));
    args.push_back(std::make_pair(sizeof(cl_mem), (void *)&vals.data));
    args.push_back(std::make_pair(sizeof(cl_int), (void *)&vecSize));
    args.push_back(std::make_pair(lds_size,       (void*)NULL));

#ifdef ANDROID
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
#else
    size_t localThreads[3] = {GROUP_SIZE, 1, 1};
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
#endif

    //final
    kernelname = "selectionSortFinal";
    args.pop_back();
#ifdef ANDROID
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, NULL, args, -1, -1, build_opt_buf);
#else
    openCLExecuteKernel(cxt, &kernel_sort_by_key, kernelname, globalThreads, localThreads, args, -1, -1, build_opt_buf);
#endif
}
Example #27
0
    SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
        surf_(surf),
        img_cols(img.cols), img_rows(img.rows),
        use_mask(!mask.empty()), counters(oclMat()),
        imgTex(NULL), sumTex(NULL), maskSumTex(NULL), _img(img)
    {
        CV_Assert(!img.empty() && img.type() == CV_8UC1);
        CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
        CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);

        const int min_size = calcSize(surf_.nOctaves - 1, 0);
        CV_Assert(img_rows - min_size >= 0);
        CV_Assert(img_cols - min_size >= 0);

        const int layer_rows = img_rows >> (surf_.nOctaves - 1);
        const int layer_cols = img_cols >> (surf_.nOctaves - 1);
        const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
        CV_Assert(layer_rows - 2 * min_margin > 0);
        CV_Assert(layer_cols - 2 * min_margin > 0);

        maxFeatures   = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535);
        maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);

        CV_Assert(maxFeatures > 0);

        counters.create(1, surf_.nOctaves + 1, CV_32SC1);
        counters.setTo(Scalar::all(0));

        integral(img, surf_.sum);
        if(support_image2d())
        {
            bindImgTex(img, imgTex);
            bindImgTex(surf_.sum, sumTex);
        }

        maskSumTex = 0;

        if (use_mask)
        {
            CV_Error(CV_StsBadFunc, "Masked SURF detector is not implemented yet");
            //!FIXME
            // temp fix for missing min overload
            //oclMat temp(mask.size(), mask.type());
            //temp.setTo(Scalar::all(1.0));
            ////cv::ocl::min(mask, temp, surf_.mask1);           ///////// disable this
            //integral(surf_.mask1, surf_.maskSum);
            //bindImgTex(surf_.maskSum, maskSumTex);
        }
    }
Example #28
0
static void convert_C4C3(const oclMat &src, cl_mem &dst)
{
    int srcStep_in_pixel = src.step1() / src.oclchannels();
    int pixel_end = src.wholecols * src.wholerows - 1;
    Context *clCxt = src.clCxt;

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[src.depth()]);

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholecols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholerows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = { divUp(src.wholecols * src.wholerows, 4), 1, 1};
    size_t localThreads[3] = { 256, 1, 1 };

    openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
}
Example #29
0
void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
{
    Context* clCxt = Context::getContext();

    size_t local_thread[] = {32, 8, 1};
    size_t global_thread[] = {(size_t)dst.cols, (size_t)dst.rows, 1};

    int weight_step = (int)(weight.step/weight.elemSize());
    int mean_step = (int)(mean.step/mean.elemSize());
    int dst_step = (int)(dst.step/dst.elemSize());

    char build_option[50];
    if(cn == 1)
    {
        snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
    }else
    {
        snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
    }

    String kernel_name = "getBackgroundImage_kernel";
    vector< pair<size_t, const void*> > args;

    args.push_back(make_pair(sizeof(cl_mem), (void*)&weight.data));
    args.push_back(make_pair(sizeof(cl_mem), (void*)&mean.data));
    args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data));

    args.push_back(make_pair(sizeof(cl_int), (void*)&dst.rows));
    args.push_back(make_pair(sizeof(cl_int), (void*)&dst.cols));

    args.push_back(make_pair(sizeof(cl_int), (void*)&weight_step));
    args.push_back(make_pair(sizeof(cl_int), (void*)&mean_step));
    args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));

    args.push_back(make_pair(sizeof(cl_float), (void*)&backgroundRatio));

    openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
}
Example #30
0
    SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
        surf_(surf),
        img_cols(img.cols), img_rows(img.rows),
        use_mask(!mask.empty()),
        imgTex(NULL), sumTex(NULL), maskSumTex(NULL)
    {
        CV_Assert(!img.empty() && img.type() == CV_8UC1);
        CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
        CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);

        const int min_size = calcSize(surf_.nOctaves - 1, 0);
        CV_Assert(img_rows - min_size >= 0);
        CV_Assert(img_cols - min_size >= 0);

        const int layer_rows = img_rows >> (surf_.nOctaves - 1);
        const int layer_cols = img_cols >> (surf_.nOctaves - 1);
        const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
        CV_Assert(layer_rows - 2 * min_margin > 0);
        CV_Assert(layer_cols - 2 * min_margin > 0);

        maxFeatures   = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535);
        maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);

        CV_Assert(maxFeatures > 0);

        counters.create(1, surf_.nOctaves + 1, CV_32SC1);
        counters.setTo(Scalar::all(0));

        //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));

        bindImgTex(img, imgTex);
        integral(img, surf_.sum); // the two argumented integral version is incorrect

        bindImgTex(surf_.sum, sumTex);
        maskSumTex = 0;

        if (use_mask)
        {
            throw std::exception();
            //!FIXME
            // temp fix for missing min overload
            //oclMat temp(mask.size(), mask.type());
            //temp.setTo(Scalar::all(1.0));
            ////cv::ocl::min(mask, temp, surf_.mask1);           ///////// disable this
            //integral(surf_.mask1, surf_.maskSum);
            //bindImgTex(surf_.maskSum, maskSumTex);
        }
    }