Exemplo n.º 1
0
// radiusMatchSingle
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
        oclMat &trainIdx,   oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
{
    if (query.empty() || train.empty())
        return;

    // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
    int callType = query.depth();
    char cvFuncName[] = "radiusMatchSingle";
    if (callType != 5)
        CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");

    if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
        || callType != 2 || callType != 4)))
    {
        CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
    }

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.type() == query.type() && train.cols == query.cols);
    CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size()));

    nMatches.create(1, query.rows, CV_32SC1);
    if (trainIdx.empty())
    {
        trainIdx.create(query.rows, std::max((train.rows/ 100), 10), CV_32SC1);
        distance.create(query.rows, std::max((train.rows/ 100), 10), CV_32FC1);
    }

    nMatches.setTo(Scalar::all(0));

    matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
exit:
    return;
}
Exemplo n.º 2
0
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx,
        oclMat &imgIdx, oclMat &distance, const oclMat &masks)
{
    if (query.empty() || trainCollection.empty())
        return;

    // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
    int callType = query.depth();
    char cvFuncName[] = "matchCollection";
    if (callType != 5)
        CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");

    if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
        || callType != 2 || callType != 4)))
    {
        CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
    }

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);

    trainIdx.create(1, query.rows, CV_32S);
    imgIdx.create(1, query.rows, CV_32S);
    distance.create(1, query.rows, CV_32F);

    matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
exit:
    return;
}
Exemplo n.º 3
0
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta)
{
    String kernelName = "convert_to_S";
    std::stringstream idxStr;
    idxStr << src.depth();
    kernelName = kernelName + idxStr.str().c_str();
    float alpha_f = alpha, beta_f = beta;
    CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
    std::vector<std::pair<size_t , const void *> > args;
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
    if(dst.type() == CV_8UC1)
    {
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
    }
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));
    openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
                        localThreads, args, dst.oclchannels(), dst.depth());
}
Exemplo n.º 4
0
void cv::ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType)
{
    CV_Assert(src.cols * src.channels() == centers.cols * centers.channels());
    CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
    CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);

    dists.create(src.rows, 1, CV_32FC1);
    labels.create(src.rows, 1, CV_32SC1);

    std::stringstream build_opt_ss;
    build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST");

    int src_step = src.step / src.elemSize1();
    int centers_step = centers.step / centers.elemSize1();
    int feature_width = centers.cols * centers.oclchannels();
    int src_offset = src.offset / src.elemSize1();
    int centers_offset = centers.offset / centers.elemSize1();

    int all_dist_count = src.rows * centers.rows;
    oclMat all_dist(1, all_dist_count, CV_32FC1);

    vector<pair<size_t, const void *> > args;
    args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
    args.push_back(make_pair(sizeof(cl_mem), (void *)&centers.data));
    args.push_back(make_pair(sizeof(cl_mem), (void *)&all_dist.data));

    args.push_back(make_pair(sizeof(cl_int), (void *)&feature_width));
    args.push_back(make_pair(sizeof(cl_int), (void *)&src_step));
    args.push_back(make_pair(sizeof(cl_int), (void *)&centers_step));
    args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
    args.push_back(make_pair(sizeof(cl_int), (void *)&centers.rows));

    args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset));
    args.push_back(make_pair(sizeof(cl_int), (void *)&centers_offset));

    size_t globalThreads[3] = { all_dist_count, 1, 1 };

    openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
                        "distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str());

    Mat all_dist_cpu;
    all_dist.download(all_dist_cpu);

    for (int i = 0; i < src.rows; ++i)
    {
        Point p;
        double minVal;

        Rect roi(i * centers.rows, 0, centers.rows, 1);
        Mat hdr(all_dist_cpu, roi);

        cv::minMaxLoc(hdr, &minVal, NULL, &p);

        dists.at<float>(i, 0) = static_cast<float>(minVal);
        labels.at<int>(i, 0) = p.x;
    }
}
Exemplo n.º 5
0
static void lkSparse_run(oclMat &I, oclMat &J,
                  const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount,
                  int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
    Context  *clCxt = I.clCxt;
    String kernelName = "lkSparse";
    size_t localThreads[3]  = { 8, 8, 1 };
    size_t globalThreads[3] = { 8 * ptcount, 8, 1};
    int cn = I.oclchannels();
    char calcErr = level==0?1:0;

    std::vector<std::pair<size_t , const void *> > args;

    cl_mem ITex = bindTexture(I);
    cl_mem JTex = bindTexture(J);

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&ITex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&JTex ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&prevPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&prevPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&nextPts.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nextPts.step ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&status.data ));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&err.data ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&level ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.rows ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&I.cols ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.x ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&patch.y ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&cn ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.width ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&winSize.height ));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&iters ));
    args.push_back( std::make_pair( sizeof(cl_char), (void *)&calcErr ));

    bool is_cpu = isCpuDevice();
    if (is_cpu)
    {
        openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
    }
    else
    {
        std::stringstream idxStr;
        idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
        cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
        int wave_size = (int)queryWaveFrontSize(kernel);
        openCLSafeCall(clReleaseKernel(kernel));

        static char opt[32] = {0};
        sprintf(opt, "-D WAVE_SIZE=%d", wave_size);

        openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
                            args, I.oclchannels(), I.depth(), opt);
    }
    releaseTexture(ITex);
    releaseTexture(JTex);
}
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName)
{
    std::vector<std::pair<size_t , const void *> > args;

    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();

    if (dst.type() == CV_8UC1)
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    const char channelMap[] = { ' ', ' ', '2', '4', '4' };
    std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);

    Mat mat(1, 1, dst.type(), scalar);

#ifdef CL_VERSION_1_2
    // this enables backwards portability to
    // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
    if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
        dst.offset == 0 && dst.cols == dst.wholecols)
    {
        const int sizeofMap[][7] =
            {
                { sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double)  },
                { sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) },
                { 0                , 0               , 0                 , 0                , 0              , 0                ,  0                 },
                { sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) },
            };
        int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];

        clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
                            (cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
                            0, dst.step * dst.rows, 0, NULL, NULL);
    }
    else
#endif
    {
        oclMat m(mat);
        args.push_back( std::make_pair( sizeof(cl_mem) , (void*)&m.data ));
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));

        openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
            localThreads, args, -1, -1, buildOptions.c_str());
    }
}
Exemplo n.º 7
0
void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2,
                          oclMat &dst)
{
    CV_Assert(src1.depth() <= CV_32F);
    CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
    CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() &&
              weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);

    dst.create(src1.size(), src1.type());

    size_t globalSize[] = { (size_t)dst.cols, (size_t)dst.rows, 1};
    size_t localSize[] = { 16, 16, 1 };

    int depth = dst.depth(), ocn = dst.oclchannels();
    int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
    int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
    int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize();
    int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize();
    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();

    const char * const channelMap[] = { "", "", "2", "4", "4" };
    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s",
                                      typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn],
                                      depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]);

    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight1_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&weight2_step ));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));

    openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args,
                        -1, -1, buildOptions.c_str());
}
Exemplo n.º 8
0
void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
                         const oclMat &trainIdx, const oclMat &distance, int distType)
{
    cv::ocl::Context *ctx = query.clCxt;
    size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
    size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
    const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
    int block_size = BLOCK_SIZE;
    int m_size = MAX_DESC_LEN;
    std::vector< std::pair<size_t, const void *> > args;

    char opt [OPT_SIZE] = "";
    sprintf(opt,
        "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
        T_ARR[query.depth()], distType, block_size, m_size);

    if(globalSize[0] != 0)
    {
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&query.data ));
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&train.data ));
        //args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data ));
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&distance.data ));
        args.push_back( std::make_pair( smemSize, (void *)NULL));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.rows ));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.cols ));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.rows ));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&train.cols ));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&query.step ));

        String kernelName = "BruteForceMatch_UnrollMatch";

        openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1, opt);
    }
}
Exemplo n.º 9
0
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
{
    //cout << "cv::ocl::oclMat::convertTo()" << endl;

    bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
                   && fabs(beta) < std::numeric_limits<double>::epsilon();

    if( rtype < 0 )
        rtype = src.type();
    else
        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels());

    int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
    if( sdepth == ddepth && noScale )
    {
        src.copyTo(dst);
        return;
    }

    oclMat temp;
    const oclMat *psrc = &src;
    if( sdepth != ddepth && psrc == &dst )
        psrc = &(temp = src);

    dst.create( src.size(), rtype );
    convert_run_cus(*psrc, dst, alpha, beta);
}
Exemplo n.º 10
0
// knn match
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx,
        oclMat &distance, oclMat &allDist, int k, const oclMat &mask)
{
    if (query.empty() || train.empty())
        return;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.type() == query.type() && train.cols == query.cols);

    const int nQuery = query.rows;
    const int nTrain = train.rows;

    if (k == 2)
    {
        ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
        ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
    }
    else
    {
        ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
        ensureSizeIsEnough(nQuery, k, CV_32F, distance);
        ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
    }

    trainIdx.setTo(Scalar::all(-1));

    kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType);

    return;
}
Exemplo n.º 11
0
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
                           const std::string & additionalOptions = std::string(),
                           const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
{
    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();

    std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
    if (!additionalOptions.empty())
        build_options += additionalOptions;

    vector<pair<size_t , const void *> > args;
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));

    if (!data1.empty())
        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data ));
    if (!data2.empty())
        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data ));

   size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
    size_t lt[3] = { 16, 10, 1 };
#else
    size_t lt[3] = { 16, 16, 1 };
#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
Exemplo n.º 12
0
static void convert_C3C4(const cl_mem &src, oclMat &dst)
{
    Context *clCxt = dst.clCxt;
    int pixel_end = dst.wholecols * dst.wholerows - 1;
    int dstStep_in_pixel = dst.step1() / dst.oclchannels();

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[dst.depth()]);

    std::vector< std::pair<size_t, const void *> > args;
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholecols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholerows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = { divUp(dst.wholecols * dst.wholerows, 4), 1, 1 };

#ifdef ANDROID
    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, NULL,
                        args, -1, -1, buildOptions.c_str());
#else
    size_t localThreads[3] = { 256, 1, 1 };
    openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, localThreads,
                        args, -1, -1, buildOptions.c_str());
#endif
}
Exemplo n.º 13
0
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
{
    if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) &&
            (depth() == CV_64F || dst.depth() == CV_64F))
    {
        CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
        return;
    }

    bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
                   && fabs(beta) < std::numeric_limits<double>::epsilon();

    if( rtype < 0 )
        rtype = type();
    else
        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());

    int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
    if( sdepth == ddepth && noScale )
    {
        copyTo(dst);
        return;
    }

    oclMat temp;
    const oclMat *psrc = this;
    if( sdepth != ddepth && psrc == &dst )
        psrc = &(temp = *this);

    dst.create( size(), rtype );
    convert_run(*psrc, dst, alpha, beta);
}
Exemplo n.º 14
0
static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
    std::vector<std::pair<size_t , const void *> > args;
    size_t localThreads[3] = { 16, 16, 1 };
    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();

    const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    const char channelMap[] = { ' ', ' ', '2', '4', '4' };
    std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);

    oclMat m(Mat(1, 1, dst.type(), scalar));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&m.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset ));
    openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
                        localThreads, args, -1, -1, buildOptions.c_str());
}
Exemplo n.º 15
0
static void pyrDown_cus(const oclMat &src, oclMat &dst)
{
    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);

    dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
    pyrdown_run_cus(src, dst);
}
Exemplo n.º 16
0
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
                          oclMat &result)
{
    cv::ocl::Context *ctx = img1.clCxt;
    assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
    int channels = img1.oclchannels();
    int depth = img1.depth();
    int rows = img1.rows;
    int cols = img1.cols;
    int istep = img1.step1();
    int wstep = weights1.step1();
    size_t globalSize[] = {cols * channels / 4, rows, 1};
    size_t localSize[] = {256, 1, 1};

    vector< pair<size_t, const void *> > args;

    if(globalSize[0] != 0)
    {
        args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
        args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
        std::string kernelName = "BlendLinear";

        openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
    }
}
Exemplo n.º 17
0
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
{
    std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
                                       src.depth(), greenbits, src.channels(), bidx);
    int src_offset = (int)src.offset, src_step = (int)src.step;
    int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;

    vector<pair<size_t , const void *> > args;
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));

    size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
    size_t lt[3] = { 16, 10, 1 };
#else
    size_t lt[3] = { 16, 16, 1 };
#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
Exemplo n.º 18
0
// radiusMatchSingle
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
        oclMat &trainIdx,   oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
{
    if (query.empty() || train.empty())
        return;

    const int nQuery = query.rows;
    const int nTrain = train.rows;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.type() == query.type() && train.cols == query.cols);
    CV_Assert(trainIdx.empty() || (trainIdx.rows == query.rows && trainIdx.size() == distance.size()));

    ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
    if (trainIdx.empty())
    {
        ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx);
        ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
    }

    nMatches.setTo(Scalar::all(0));

    matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);

    return;
}
Exemplo n.º 19
0
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
{
    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();

    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
                                        src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");

    vector<pair<size_t , const void *> > args;
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));

    size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
    size_t lt[3] = { 16, 10, 1 };
#else
    size_t lt[3] = { 16, 16, 1 };
#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
}
void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
    vector<pair<size_t , const void *> > args;
    cl_float4 val;
    val.s[0] = scalar.val[0];
    val.s[1] = scalar.val[1];
    val.s[2] = scalar.val[2];
    val.s[3] = scalar.val[3];
    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];
    globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
    globalThreads[2] = 1;
    if(dst.type() == CV_8UC1)
    {
        globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
    }
    int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
    args.push_back( make_pair( sizeof(cl_float4) , (void *)&val ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
    openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
                        localThreads, args, dst.channels(), dst.depth());
}
Exemplo n.º 21
0
            ////////////////////////////////////////////////////////////////////////////
            ////////////////////merge//////////////////////////////////////////////////
            ////////////////////////////////////////////////////////////////////////////
            void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst)
            {
                Context  *clCxt = mat_dst.clCxt;
                int channels = mat_dst.channels();
                int depth = mat_dst.depth();

                string kernelName = "merge_vector";

                int indexes[4][7] = {{0, 0, 0, 0, 0, 0, 0},
                    {4, 4, 2, 2, 1, 1, 1},
                    {4, 4, 2, 2 , 1, 1, 1},
                    {4, 4, 2, 2, 1, 1, 1}
                };

                size_t index = indexes[channels-1][mat_dst.depth()];
                int    cols = divUp(mat_dst.cols, index);
                size_t localThreads[3]  = { 64, 4, 1 };
                size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
                                            divUp(mat_dst.rows, localThreads[1]) * localThreads[1],
                                            1
                                          };

                vector<pair<size_t , const void *> > args;
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.rows));
                args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
                args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_dst.data));
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_dst.step));
                args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[0].data));
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[0].step));
                args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[1].data));
                args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[1].step));
                if(n >= 3)
                {
                    args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[2].data));
                    args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
                }
                if(n >= 4)
                {
                    args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
                    args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
                }

                openCLExecuteKernel(clCxt, &merge_mat, kernelName, globalThreads, localThreads, args, channels, depth);
            }
Exemplo n.º 22
0
void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
{
    int depth = src.depth(), channels = src.channels();
    CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_16S || depth == CV_32F);
    CV_Assert(channels == 1 || channels == 3 || channels == 4);

    dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());

    pyrdown_run(src, dst);
}
Exemplo n.º 23
0
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
{
    CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
                  src.rows == dst.rows && src.cols == dst.cols);

    vector<pair<size_t , const void *> > args;

    int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
        {2, 2, 1, 1, 1, 1, 1},
        {8, 8, 8, 8 , 4, 4, 4},      //vector length is undefined when channels = 3
        {1, 1, 1, 1, 1, 1, 1}
    };

    size_t localThreads[3] = {16, 16, 1};
    size_t globalThreads[3];

    int vector_length = vector_lengths[dst.channels() -1][dst.depth()];
    int offset_cols = divUp(dst.offset, dst.elemSize()) & (vector_length - 1);
    int cols = vector_length == 1 ? divUp(dst.cols, vector_length) : divUp(dst.cols + offset_cols, vector_length);

    globalThreads[0] = divUp(cols, localThreads[0]) * localThreads[0];
    globalThreads[1] = divUp(dst.rows, localThreads[1]) * localThreads[1];
    globalThreads[2] = 1;

    int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
    int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();

    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));

    openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
                        localThreads, args, dst.channels(), dst.depth());
}
Exemplo n.º 24
0
// knn match
void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx,
        oclMat &distance, oclMat &allDist, int k, const oclMat &mask)
{
    if (query.empty() || train.empty())
        return;

    // match1 doesn't support signed char type, match2 only support float, hamming support uchar, ushort and int
    int callType = query.depth();

    char cvFuncName[] = "knnMatchSingle";
    if (callType != 5)
        CV_ERROR(CV_UNSUPPORTED_FORMAT_ERR, "BruteForceMatch OpenCL only support float type query!\n");

    if ((distType == 0 && callType == 1 ) || (distType == 1 && callType != 5) || (distType == 2 && (callType != 0
        || callType != 2 || callType != 4)))
    {
        CV_ERROR(CV_UNSUPPORTED_DEPTH_ERR, "BruteForceMatch OpenCL only support float type query!\n");
    }

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.type() == query.type() && train.cols == query.cols);

    if (k == 2)
    {
        trainIdx.create(1, query.rows, CV_32SC2);
        distance.create(1, query.rows, CV_32FC2);
    }
    else
    {
        trainIdx.create(query.rows, k, CV_32S);
        distance.create(query.rows, k, CV_32F);
        allDist.create(query.rows, train.rows, CV_32FC1);
    }

    trainIdx.setTo(Scalar::all(-1));

    kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType);
exit:
    return;
}
Exemplo n.º 25
0
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
        oclMat &nMatches, float /*maxDistance*/, const std::vector<oclMat> &masks)
{
    if (query.empty() || empty())
        return;

    typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks,
                             const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches);
#if 0
    static const caller_t callers[3][6] =
    {
        {
            ocl_matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
            ocl_matchL1_gpu<unsigned short>, matchL1_gpu<short>,
            ocl_matchL1_gpu<int>, matchL1_gpu<float>
        },
        {
            0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
            0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
            0/*matchL2_gpu<int>*/, ocl_matchL2_gpu<float>
        },
        {
            ocl_matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
            ocl_matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
            ocl_matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
        }
    };
#endif
    const int nQuery = query.rows;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size()));

    nMatches.create(1, nQuery, CV_32SC1);
    if (trainIdx.empty())
    {
        trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
        imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
        distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1);
    }

    nMatches.setTo(Scalar::all(0));

    //caller_t func = callers[distType][query.depth()];
    //CV_Assert(func != 0);

    std::vector<oclMat> trains_(trainDescCollection.begin(), trainDescCollection.end());
    std::vector<oclMat> masks_(masks.begin(), masks.end());

    /*  func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
          trainIdx, imgIdx, distance, nMatches));*/
}
Exemplo n.º 26
0
static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta)
{
    String kernelName = "convert_to";
    float alpha_f = alpha, beta_f = beta;
    int sdepth = src.depth(), ddepth = dst.depth();
    int sstep1 = (int)src.step1(), dstep1 = (int)dst.step1();
    int cols1 = src.cols * src.oclchannels();

    char buildOptions[150], convertString[50];
    const char * typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
    sprintf(convertString, "convert_%s_sat_rte", typeMap[ddepth]);
    sprintf(buildOptions, "-D srcT=%s -D dstT=%s -D convertToDstType=%s", typeMap[sdepth],
            typeMap[ddepth], CV_32F == ddepth || ddepth == CV_64F ? "" : convertString);

    CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
    std::vector<std::pair<size_t , const void *> > args;

    size_t localThreads[3] = { 16, 16, 1 };
    size_t globalThreads[3] = { divUp(cols1, localThreads[0]) * localThreads[0],
                                divUp(dst.rows, localThreads[1]) * localThreads[1], 1
                              };

    int doffset1 = dst.offset / dst.elemSize1();
    int soffset1 = src.offset / src.elemSize1();

    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
    args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sstep1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&soffset1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstep1 ));
    args.push_back( std::make_pair( sizeof(cl_int) , (void *)&doffset1 ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
    args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));

    openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
                        localThreads, args, -1, -1, buildOptions);
}
Exemplo n.º 27
0
void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const oclMat &train,
        oclMat &trainIdx, oclMat &distance, const oclMat &mask)
{
    if (query.empty() || train.empty())
        return;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
    CV_Assert(train.cols == query.cols && train.type() == query.type());

    ensureSizeIsEnough(1, query.rows, CV_32S, trainIdx);
    ensureSizeIsEnough(1, query.rows, CV_32F, distance);

    matchDispatcher(query, train, mask, trainIdx, distance, distType);

    return;
}
////////////////////////////////////////////////////////////////////////
// convert_C4C3
static void convert_C4C3(const oclMat &src, cl_mem &dst)
{
    int srcStep_in_pixel = src.step1() / src.oclchannels();
    int pixel_end = src.wholecols * src.wholerows - 1;
    Context *clCxt = src.clCxt;
    string kernelName = "convertC4C3";
    char compile_option[32];
    switch(src.depth())
    {
    case 0:
        sprintf(compile_option, "-D GENTYPE4=uchar4");
        break;
    case 1:
        sprintf(compile_option, "-D GENTYPE4=char4");
        break;
    case 2:
        sprintf(compile_option, "-D GENTYPE4=ushort4");
        break;
    case 3:
        sprintf(compile_option, "-D GENTYPE4=short4");
        break;
    case 4:
        sprintf(compile_option, "-D GENTYPE4=int4");
        break;
    case 5:
        sprintf(compile_option, "-D GENTYPE4=float4");
        break;
    case 6:
        sprintf(compile_option, "-D GENTYPE4=double4");
        break;
    default:
        CV_Error(CV_StsUnsupportedFormat, "unknown depth");
    }

    vector< pair<size_t, const void *> > args;
    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
    args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end));

    size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1};
    size_t localThreads[3] = {256, 1, 1};

    openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
Exemplo n.º 29
0
static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
                           const std::string & additionalOptions = std::string(),
                           const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
{
    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
    int pixels_per_work_item = 1;

    if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
    {
        if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
            pixels_per_work_item =  4;
        else if (src.cols % 2 == 0)
            pixels_per_work_item =  2;
        else
            pixels_per_work_item =  1;
    }

    std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
    if (!additionalOptions.empty())
        build_options += additionalOptions;

    vector<pair<size_t , const void *> > args;
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));

    if (!data1.empty())
        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data ));
    if (!data2.empty())
        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data ));

    size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
    size_t lt[3] = { 16, 10, 1 };
#else
    size_t lt[3] = { 16, 16, 1 };
#endif
    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
Exemplo n.º 30
0
void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx,
        oclMat &imgIdx, oclMat &distance, const oclMat &masks)
{
    if (query.empty() || trainCollection.empty())
        return;

    CV_Assert(query.channels() == 1 && query.depth() < CV_64F);

    const int nQuery = query.rows;

    ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
    ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
    ensureSizeIsEnough(1, nQuery, CV_32F, distance);

    matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);

    return;
}