Ejemplo n.º 1
0
CUDA_TEST_P(CalTech, HOG)
{
    cv::cuda::GpuMat d_img(img);
    cv::Mat markedImage(img.clone());

    cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
    d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
    d_hog->setNumLevels(d_hog->getNumLevels() + 32);

    std::vector<cv::Rect> found_locations;
    d_hog->detectMultiScale(d_img, found_locations);

#if defined (LOG_CASCADE_STATISTIC)
    for (int i = 0; i < (int)found_locations.size(); i++)
    {
        cv::Rect r = found_locations[i];

        std::cout << r.x << " " << r.y  << " " << r.width << " " << r.height << std::endl;
        cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
    }

    cv::imshow("Res", markedImage);
    cv::waitKey();
#endif
}
Ejemplo n.º 2
0
    void test(const cv::Mat& img) 
    {
        cv::gpu::GpuMat d_img(img);

        gamma_correction = false;
        setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
        //cpu detector may be updated soon
        //hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());

        std::vector<cv::Point> locations;

        // Test detect
        detect(d_img, locations, 0);

#ifdef DUMP
        dump(block_hists, locations);
#else
        compare(cv::Mat(block_hists), locations);
#endif

        // Test detect on smaller image
        cv::Mat img2;
        cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); 
        detect(cv::gpu::GpuMat(img2), locations, 0);

#ifdef DUMP
        dump(block_hists, locations);
#else
        compare(cv::Mat(block_hists), locations);
#endif

        // Test detect on greater image
        cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); 
        detect(cv::gpu::GpuMat(img2), locations, 0);
        
#ifdef DUMP
        dump(block_hists, locations);
#else
        compare(cv::Mat(block_hists), locations);
#endif
    }
Ejemplo n.º 3
0
GPU_TEST_P(CalTech, HOG)
{
    cv::gpu::GpuMat d_img(img);
    cv::Mat markedImage(img.clone());

    cv::gpu::HOGDescriptor d_hog;
    d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
    d_hog.nlevels = d_hog.nlevels + 32;

    std::vector<cv::Rect> found_locations;
    d_hog.detectMultiScale(d_img, found_locations);

#if defined (LOG_CASCADE_STATISTIC)
    for (int i = 0; i < (int)found_locations.size(); i++)
    {
        cv::Rect r = found_locations[i];

        std::cout << r.x << " " << r.y  << " " << r.width << " " << r.height << std::endl;
        cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
    }

    cv::imshow("Res", markedImage); cv::waitKey();
#endif
}
Ejemplo n.º 4
0
bool TestRectStdDev::process()
{
    NCVStatus ncvStat;
    bool rcode = false;

    Ncv32s _normWidth = (Ncv32s)this->width - this->rect.x - this->rect.width + 1;
    Ncv32s _normHeight = (Ncv32s)this->height - this->rect.y - this->rect.height + 1;
    if (_normWidth <= 0 || _normHeight <= 0)
    {
        return true;
    }
    Ncv32u normWidth = (Ncv32u)_normWidth;
    Ncv32u normHeight = (Ncv32u)_normHeight;
    NcvSize32u szNormRoi(normWidth, normHeight);

    Ncv32u widthII = this->width + 1;
    Ncv32u heightII = this->height + 1;
    Ncv32u widthSII = this->width + 1;
    Ncv32u heightSII = this->height + 1;

    NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_img.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img.isMemAllocated(), false);

    NCVMatrixAlloc<Ncv32u> d_imgII(*this->allocatorGPU.get(), widthII, heightII);
    ncvAssertReturn(d_imgII.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv32u> h_imgII(*this->allocatorCPU.get(), widthII, heightII);
    ncvAssertReturn(h_imgII.isMemAllocated(), false);

    NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);
    ncvAssertReturn(d_imgSII.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);
    ncvAssertReturn(h_imgSII.isMemAllocated(), false);

    NCVMatrixAlloc<Ncv32f> d_norm(*this->allocatorGPU.get(), normWidth, normHeight);
    ncvAssertReturn(d_norm.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv32f> h_norm(*this->allocatorCPU.get(), normWidth, normHeight);
    ncvAssertReturn(h_norm.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv32f> h_norm_d(*this->allocatorCPU.get(), normWidth, normHeight);
    ncvAssertReturn(h_norm_d.isMemAllocated(), false);

    Ncv32u bufSizeII, bufSizeSII;
    ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSizeII, this->devProp);
    ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
    ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSizeSII, this->devProp);
    ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
    Ncv32u bufSize = bufSizeII > bufSizeSII ? bufSizeII : bufSizeSII;
    NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
    ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);

    NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
    NCV_SKIP_COND_BEGIN
    ncvAssertReturn(this->src.fill(h_img), false);

    ncvStat = h_img.copySolid(d_img, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),
                                       d_imgII.ptr(), d_imgII.pitch(),
                                       NcvSize32u(this->width, this->height),
                                       d_tmpBuf.ptr(), bufSize, this->devProp);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
                                          d_imgSII.ptr(), d_imgSII.pitch(),
                                          NcvSize32u(this->width, this->height),
                                          d_tmpBuf.ptr(), bufSize, this->devProp);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStRectStdDev_32f_C1R(d_imgII.ptr(), d_imgII.pitch(),
                                       d_imgSII.ptr(), d_imgSII.pitch(),
                                       d_norm.ptr(), d_norm.pitch(),
                                       szNormRoi, this->rect,
                                       this->scaleFactor,
                                       this->bTextureCache);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = d_norm.copySolid(h_norm_d, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStIntegral_8u32u_C1R_host(h_img.ptr(), h_img.pitch(),
                                          h_imgII.ptr(), h_imgII.pitch(),
                                          NcvSize32u(this->width, this->height));
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),
                                             h_imgSII.ptr(), h_imgSII.pitch(),
                                             NcvSize32u(this->width, this->height));
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStRectStdDev_32f_C1R_host(h_imgII.ptr(), h_imgII.pitch(),
                                          h_imgSII.ptr(), h_imgSII.pitch(),
                                          h_norm.ptr(), h_norm.pitch(),
                                          szNormRoi, this->rect,
                                          this->scaleFactor);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    NCV_SKIP_COND_END

    //bit-to-bit check
    bool bLoopVirgin = true;

    NCV_SKIP_COND_BEGIN
    const Ncv64f relEPS = 0.005;
    for (Ncv32u i=0; bLoopVirgin && i < h_norm.height(); i++)
    {
        for (Ncv32u j=0; bLoopVirgin && j < h_norm.width(); j++)
        {
            Ncv64f absErr = fabs(h_norm.ptr()[h_norm.stride()*i+j] - h_norm_d.ptr()[h_norm_d.stride()*i+j]);
            Ncv64f relErr = absErr / h_norm.ptr()[h_norm.stride()*i+j];

            if (relErr > relEPS)
            {
                bLoopVirgin = false;
            }
        }
    }
    NCV_SKIP_COND_END

    if (bLoopVirgin)
    {
        rcode = true;
    }

    return rcode;
}
bool TestHaarCascadeApplication::process()
{
#if defined(__APPLE)
	return true;
#endif
    NCVStatus ncvStat;
    bool rcode = false;

    Ncv32u numStages, numNodes, numFeatures;

    ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);

    NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages);
    ncvAssertReturn(h_HaarStages.isMemAllocated(), false);
    NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes);
    ncvAssertReturn(h_HaarNodes.isMemAllocated(), false);
    NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures);
    ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false);

    NCVVectorAlloc<HaarStage64> d_HaarStages(*this->allocatorGPU.get(), numStages);
    ncvAssertReturn(d_HaarStages.isMemAllocated(), false);
    NCVVectorAlloc<HaarClassifierNode128> d_HaarNodes(*this->allocatorGPU.get(), numNodes);
    ncvAssertReturn(d_HaarNodes.isMemAllocated(), false);
    NCVVectorAlloc<HaarFeature64> d_HaarFeatures(*this->allocatorGPU.get(), numFeatures);
    ncvAssertReturn(d_HaarFeatures.isMemAllocated(), false);

    HaarClassifierCascadeDescriptor haar;
    haar.ClassifierSize.width = haar.ClassifierSize.height = 1;
    haar.bNeedsTiltedII = false;
    haar.NumClassifierRootNodes = numNodes;
    haar.NumClassifierTotalNodes = numNodes;
    haar.NumFeatures = numFeatures;
    haar.NumStages = numStages;

    NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
    NCV_SKIP_COND_BEGIN

    ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);

    ncvAssertReturn(NCV_SUCCESS == h_HaarStages.copySolid(d_HaarStages, 0), false);
    ncvAssertReturn(NCV_SUCCESS == h_HaarNodes.copySolid(d_HaarNodes, 0), false);
    ncvAssertReturn(NCV_SUCCESS == h_HaarFeatures.copySolid(d_HaarFeatures, 0), false);
    ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);

    NCV_SKIP_COND_END

    NcvSize32s srcRoi, srcIIRoi, searchRoi;
    srcRoi.width = this->width;
    srcRoi.height = this->height;
    srcIIRoi.width = srcRoi.width + 1;
    srcIIRoi.height = srcRoi.height + 1;
    searchRoi.width = srcIIRoi.width - haar.ClassifierSize.width;
    searchRoi.height = srcIIRoi.height - haar.ClassifierSize.height;
    if (searchRoi.width <= 0 || searchRoi.height <= 0)
    {
        return false;
    }
    NcvSize32u searchRoiU(searchRoi.width, searchRoi.height);

    NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_img.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img.isMemAllocated(), false);

    Ncv32u integralWidth = this->width + 1;
    Ncv32u integralHeight = this->height + 1;

    NCVMatrixAlloc<Ncv32u> d_integralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);
    ncvAssertReturn(d_integralImage.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);
    ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv32u> h_integralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);
    ncvAssertReturn(h_integralImage.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv64u> h_sqIntegralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);
    ncvAssertReturn(h_sqIntegralImage.isMemAllocated(), false);

    NCVMatrixAlloc<Ncv32f> d_rectStdDev(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_rectStdDev.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv32u> d_pixelMask(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_pixelMask.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv32f> h_rectStdDev(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_rectStdDev.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv32u> h_pixelMask(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_pixelMask.isMemAllocated(), false);

    NCVVectorAlloc<NcvRect32u> d_hypotheses(*this->allocatorGPU.get(), this->width * this->height);
    ncvAssertReturn(d_hypotheses.isMemAllocated(), false);
    NCVVectorAlloc<NcvRect32u> h_hypotheses(*this->allocatorCPU.get(), this->width * this->height);
    ncvAssertReturn(h_hypotheses.isMemAllocated(), false);

    NCVStatus nppStat;
    Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;
    nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &szTmpBufIntegral, this->devProp);
    ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
    nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &szTmpBufSqIntegral, this->devProp);
    ncvAssertReturn(nppStat == NPPST_SUCCESS, false);
    NCVVectorAlloc<Ncv8u> d_tmpIIbuf(*this->allocatorGPU.get(), std::max(szTmpBufIntegral, szTmpBufSqIntegral));
    ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), false);

    Ncv32u detectionsOnThisScale_d = 0;
    Ncv32u detectionsOnThisScale_h = 0;

    NCV_SKIP_COND_BEGIN

    ncvAssertReturn(this->src.fill(h_img), false);
    ncvStat = h_img.copySolid(d_img, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
    ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);

    nppStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),
                                       d_integralImage.ptr(), d_integralImage.pitch(),
                                       NcvSize32u(d_img.width(), d_img.height()),
                                       d_tmpIIbuf.ptr(), szTmpBufIntegral, this->devProp);
    ncvAssertReturn(nppStat == NPPST_SUCCESS, false);

    nppStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
                                          d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
                                          NcvSize32u(d_img.width(), d_img.height()),
                                          d_tmpIIbuf.ptr(), szTmpBufSqIntegral, this->devProp);
    ncvAssertReturn(nppStat == NPPST_SUCCESS, false);

    const NcvRect32u rect(
        HAAR_STDDEV_BORDER,
        HAAR_STDDEV_BORDER,
        haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,
        haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER);
    nppStat = nppiStRectStdDev_32f_C1R(
        d_integralImage.ptr(), d_integralImage.pitch(),
        d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),
        d_rectStdDev.ptr(), d_rectStdDev.pitch(),
        NcvSize32u(searchRoi.width, searchRoi.height), rect,
        1.0f, true);
    ncvAssertReturn(nppStat == NPPST_SUCCESS, false);

    ncvStat = d_integralImage.copySolid(h_integralImage, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
    ncvStat = d_rectStdDev.copySolid(h_rectStdDev, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);

    for (Ncv32u i=0; i<searchRoiU.height; i++)
    {
        for (Ncv32u j=0; j<h_pixelMask.stride(); j++)
        {
            if (j<searchRoiU.width)
            {
                h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = (i << 16) | j;
            }
            else
            {
                h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = OBJDET_MASK_ELEMENT_INVALID_32U;
            }
        }
    }
    ncvAssertReturn(cudaSuccess == cudaStreamSynchronize(0), false);

#if !defined(__APPLE__)

#if defined(__GNUC__)
    //http://www.christian-seiler.de/projekte/fpmath/

    fpu_control_t fpu_oldcw, fpu_cw;
    _FPU_GETCW(fpu_oldcw); // store old cw
     fpu_cw = (fpu_oldcw & ~_FPU_EXTENDED & ~_FPU_DOUBLE & ~_FPU_SINGLE) | _FPU_SINGLE;
    _FPU_SETCW(fpu_cw);

    // calculations here
    ncvStat = ncvApplyHaarClassifierCascade_host(
        h_integralImage, h_rectStdDev, h_pixelMask,
        detectionsOnThisScale_h,
        haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false,
        searchRoiU, 1, 1.0f);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);

    _FPU_SETCW(fpu_oldcw); // restore old cw
#else
#ifndef _WIN64
    Ncv32u fpu_oldcw, fpu_cw;
    _controlfp_s(&fpu_cw, 0, 0);
    fpu_oldcw = fpu_cw;
    _controlfp_s(&fpu_cw, _PC_24, _MCW_PC);
#endif
    ncvStat = ncvApplyHaarClassifierCascade_host(
        h_integralImage, h_rectStdDev, h_pixelMask,
        detectionsOnThisScale_h,
        haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false,
        searchRoiU, 1, 1.0f);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
#ifndef _WIN64
    _controlfp_s(&fpu_cw, fpu_oldcw, _MCW_PC);
#endif
#endif

#endif
    NCV_SKIP_COND_END

    int devId;
    ncvAssertCUDAReturn(cudaGetDevice(&devId), false);
    cudaDeviceProp _devProp;
    ncvAssertCUDAReturn(cudaGetDeviceProperties(&_devProp, devId), false);

    ncvStat = ncvApplyHaarClassifierCascade_device(
        d_integralImage, d_rectStdDev, d_pixelMask,
        detectionsOnThisScale_d,
        haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,
        searchRoiU, 1, 1.0f,
        *this->allocatorGPU.get(), *this->allocatorCPU.get(),
        _devProp, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);

    NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_pixelMask_d.isMemAllocated(), false);

    //bit-to-bit check
    bool bLoopVirgin = true;

    NCV_SKIP_COND_BEGIN

    ncvStat = d_pixelMask.copySolid(h_pixelMask_d, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);

    if (detectionsOnThisScale_d != detectionsOnThisScale_h)
    {
        bLoopVirgin = false;
    }
    else
    {
        std::sort(h_pixelMask_d.ptr(), h_pixelMask_d.ptr() + detectionsOnThisScale_d);
        for (Ncv32u i=0; i<detectionsOnThisScale_d && bLoopVirgin; i++)
        {
            if (h_pixelMask.ptr()[i] != h_pixelMask_d.ptr()[i])
            {
                bLoopVirgin = false;
            }
        }
    }

    NCV_SKIP_COND_END

    if (bLoopVirgin)
    {
        rcode = true;
    }

    return rcode;
}
Ejemplo n.º 6
0
bool TestTranspose<T>::process()
{
    NCVStatus ncvStat;
    bool rcode = false;

    NcvSize32u srcSize(this->width, this->height);

    NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_img.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img.isMemAllocated(), false);

    NCVMatrixAlloc<T> d_dst(*this->allocatorGPU.get(), this->height, this->width);
    ncvAssertReturn(d_dst.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_dst(*this->allocatorCPU.get(), this->height, this->width);
    ncvAssertReturn(h_dst.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_dst_d(*this->allocatorCPU.get(), this->height, this->width);
    ncvAssertReturn(h_dst_d.isMemAllocated(), false);

    NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
    NCV_SKIP_COND_BEGIN
    ncvAssertReturn(this->src.fill(h_img), false);
    NCV_SKIP_COND_END

    ncvStat = h_img.copySolid(d_img, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    NCV_SKIP_COND_BEGIN
    if (sizeof(T) == sizeof(Ncv32u))
    {
        ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
                                          (Ncv32u *)d_dst.ptr(), d_dst.pitch(),
                                          NcvSize32u(this->width, this->height));
    }
    else if (sizeof(T) == sizeof(Ncv64u))
    {
        ncvStat = nppiStTranspose_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
                                        (Ncv64u *)d_dst.ptr(), d_dst.pitch(),
                                        NcvSize32u(this->width, this->height));
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect transpose test instance", false);
    }
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    NCV_SKIP_COND_END
    ncvStat = d_dst.copySolid(h_dst_d, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    NCV_SKIP_COND_BEGIN
    if (sizeof(T) == sizeof(Ncv32u))
    {
        ncvStat = nppiStTranspose_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
                                               (Ncv32u *)h_dst.ptr(), h_dst.pitch(),
                                               NcvSize32u(this->width, this->height));
    }
    else if (sizeof(T) == sizeof(Ncv64u))
    {
        ncvStat = nppiStTranspose_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
                                               (Ncv64u *)h_dst.ptr(), h_dst.pitch(),
                                               NcvSize32u(this->width, this->height));
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
    }
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    NCV_SKIP_COND_END

    //bit-to-bit check
    bool bLoopVirgin = true;

    NCV_SKIP_COND_BEGIN
    //const Ncv64f relEPS = 0.005;
    for (Ncv32u i=0; bLoopVirgin && i < this->width; i++)
    {
        for (Ncv32u j=0; bLoopVirgin && j < this->height; j++)
        {
            if (h_dst.ptr()[h_dst.stride()*i+j] != h_dst_d.ptr()[h_dst_d.stride()*i+j])
            {
                bLoopVirgin = false;
            }
        }
    }
    NCV_SKIP_COND_END

    if (bLoopVirgin)
    {
        rcode = true;
    }

    return rcode;
}
Ejemplo n.º 7
0
bool TestResize<T>::process()
{
    NCVStatus ncvStat;
    bool rcode = false;

    Ncv32s smallWidth = this->width / this->scaleFactor;
    Ncv32s smallHeight = this->height / this->scaleFactor;
    if (smallWidth == 0 || smallHeight == 0)
    {
        return true;
    }

    NcvSize32u srcSize(this->width, this->height);

    NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_img.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img.isMemAllocated(), false);

    NCVMatrixAlloc<T> d_small(*this->allocatorGPU.get(), smallWidth, smallHeight);
    ncvAssertReturn(d_small.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_small(*this->allocatorCPU.get(), smallWidth, smallHeight);
    ncvAssertReturn(h_small.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_small_d(*this->allocatorCPU.get(), smallWidth, smallHeight);
    ncvAssertReturn(h_small_d.isMemAllocated(), false);

    NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
    NCV_SKIP_COND_BEGIN
    ncvAssertReturn(this->src.fill(h_img), false);
    NCV_SKIP_COND_END

    ncvStat = h_img.copySolid(d_img, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    NCV_SKIP_COND_BEGIN
    if (sizeof(T) == sizeof(Ncv32u))
    {
        ncvStat = nppiStDecimate_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),
                                         (Ncv32u *)d_small.ptr(), d_small.pitch(),
                                         srcSize, this->scaleFactor,
                                         this->bTextureCache);
    }
    else if (sizeof(T) == sizeof(Ncv64u))
    {
        ncvStat = nppiStDecimate_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),
                                         (Ncv64u *)d_small.ptr(), d_small.pitch(),
                                         srcSize, this->scaleFactor,
                                         this->bTextureCache);
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
    }
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    NCV_SKIP_COND_END
    ncvStat = d_small.copySolid(h_small_d, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    NCV_SKIP_COND_BEGIN
    if (sizeof(T) == sizeof(Ncv32u))
    {
        ncvStat = nppiStDecimate_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),
                                              (Ncv32u *)h_small.ptr(), h_small.pitch(),
                                              srcSize, this->scaleFactor);
    }
    else if (sizeof(T) == sizeof(Ncv64u))
    {
        ncvStat = nppiStDecimate_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),
                                              (Ncv64u *)h_small.ptr(), h_small.pitch(),
                                              srcSize, this->scaleFactor);
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);
    }
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    NCV_SKIP_COND_END

    //bit-to-bit check
    bool bLoopVirgin = true;

    NCV_SKIP_COND_BEGIN
    //const Ncv64f relEPS = 0.005;
    for (Ncv32u i=0; bLoopVirgin && i < h_small.height(); i++)
    {
        for (Ncv32u j=0; bLoopVirgin && j < h_small.width(); j++)
        {
            if (h_small.ptr()[h_small.stride()*i+j] != h_small_d.ptr()[h_small_d.stride()*i+j])
            {
                bLoopVirgin = false;
            }
        }
    }
    NCV_SKIP_COND_END

    if (bLoopVirgin)
    {
        rcode = true;
    }

    return rcode;
}
Ejemplo n.º 8
0
bool TestIntegralImage<T_in, T_out>::process()
{
    NCVStatus ncvStat;
    bool rcode = false;

    Ncv32u widthII = this->width + 1;
    Ncv32u heightII = this->height + 1;

    NCVMatrixAlloc<T_in> d_img(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_img.isMemAllocated(), false);
    NCVMatrixAlloc<T_in> h_img(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img.isMemAllocated(), false);
    NCVMatrixAlloc<T_out> d_imgII(*this->allocatorGPU.get(), widthII, heightII);
    ncvAssertReturn(d_imgII.isMemAllocated(), false);
    NCVMatrixAlloc<T_out> h_imgII(*this->allocatorCPU.get(), widthII, heightII);
    ncvAssertReturn(h_imgII.isMemAllocated(), false);
    NCVMatrixAlloc<T_out> h_imgII_d(*this->allocatorCPU.get(), widthII, heightII);
    ncvAssertReturn(h_imgII_d.isMemAllocated(), false);

    Ncv32u bufSize;
    if (sizeof(T_in) == sizeof(Ncv8u))
    {
        ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
        ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
    }
    else if (sizeof(T_in) == sizeof(Ncv32f))
    {
        ncvStat = nppiStIntegralGetSize_32f32f(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
        ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
    }

    NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
    ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);

    NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
    NCV_SKIP_COND_BEGIN

    ncvAssertReturn(this->src.fill(h_img), false);

    ncvStat = h_img.copySolid(d_img, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    if (sizeof(T_in) == sizeof(Ncv8u))
    {
        ncvStat = nppiStIntegral_8u32u_C1R((Ncv8u *)d_img.ptr(), d_img.pitch(),
                                           (Ncv32u *)d_imgII.ptr(), d_imgII.pitch(),
                                           NcvSize32u(this->width, this->height),
                                           d_tmpBuf.ptr(), bufSize, this->devProp);
        ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    }
    else if (sizeof(T_in) == sizeof(Ncv32f))
    {
        ncvStat = nppiStIntegral_32f32f_C1R((Ncv32f *)d_img.ptr(), d_img.pitch(),
                                            (Ncv32f *)d_imgII.ptr(), d_imgII.pitch(),
                                            NcvSize32u(this->width, this->height),
                                            d_tmpBuf.ptr(), bufSize, this->devProp);
        ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
    }

    ncvStat = d_imgII.copySolid(h_imgII_d, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    if (sizeof(T_in) == sizeof(Ncv8u))
    {
        ncvStat = nppiStIntegral_8u32u_C1R_host((Ncv8u *)h_img.ptr(), h_img.pitch(),
                                                (Ncv32u *)h_imgII.ptr(), h_imgII.pitch(),
                                                NcvSize32u(this->width, this->height));
        ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    }
    else if (sizeof(T_in) == sizeof(Ncv32f))
    {
        ncvStat = nppiStIntegral_32f32f_C1R_host((Ncv32f *)h_img.ptr(), h_img.pitch(),
                                                 (Ncv32f *)h_imgII.ptr(), h_imgII.pitch(),
                                                 NcvSize32u(this->width, this->height));
        ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
    }

    NCV_SKIP_COND_END

    //bit-to-bit check
    bool bLoopVirgin = true;

    NCV_SKIP_COND_BEGIN
    for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)
    {
        for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)
        {
            if (sizeof(T_in) == sizeof(Ncv8u))
            {
                if (h_imgII.ptr()[h_imgII.stride()*i+j] != h_imgII_d.ptr()[h_imgII_d.stride()*i+j])
                {
                    bLoopVirgin = false;
                }
            }
            else if (sizeof(T_in) == sizeof(Ncv32f))
            {
                if (fabsf((float)h_imgII.ptr()[h_imgII.stride()*i+j] - (float)h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) > 0.01f)
                {
                    bLoopVirgin = false;
                }
            }
            else
            {
                ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);
            }
        }
    }
    NCV_SKIP_COND_END

    if (bLoopVirgin)
    {
        rcode = true;
    }

    return rcode;
}
Ejemplo n.º 9
0
bool TestDrawRects<T>::process()
{
    NCVStatus ncvStat;
    bool rcode = false;

    NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_img.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img.isMemAllocated(), false);
    NCVMatrixAlloc<T> h_img_d(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img_d.isMemAllocated(), false);

    NCVVectorAlloc<NcvRect32u> d_rects(*this->allocatorGPU.get(), this->numRects);
    ncvAssertReturn(d_rects.isMemAllocated(), false);
    NCVVectorAlloc<NcvRect32u> h_rects(*this->allocatorCPU.get(), this->numRects);
    ncvAssertReturn(h_rects.isMemAllocated(), false);

    NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
    NCV_SKIP_COND_BEGIN
    ncvAssertReturn(this->src.fill(h_img), false);
    ncvStat = h_img.copySolid(d_img, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
    ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);

    //fill vector of rectangles with random rects covering the input
    NCVVectorReuse<Ncv32u> h_rects_as32u(h_rects.getSegment());
    ncvAssertReturn(h_rects_as32u.isMemReused(), false);
    ncvAssertReturn(this->src32u.fill(h_rects_as32u), false);
    for (Ncv32u i=0; i<this->numRects; i++)
    {
        h_rects.ptr()[i].x = (Ncv32u)(((1.0 * h_rects.ptr()[i].x) / RAND_MAX) * (this->width-2));
        h_rects.ptr()[i].y = (Ncv32u)(((1.0 * h_rects.ptr()[i].y) / RAND_MAX) * (this->height-2));
        h_rects.ptr()[i].width = (Ncv32u)(((1.0 * h_rects.ptr()[i].width) / RAND_MAX) * (this->width+10 - h_rects.ptr()[i].x));
        h_rects.ptr()[i].height = (Ncv32u)(((1.0 * h_rects.ptr()[i].height) / RAND_MAX) * (this->height+10 - h_rects.ptr()[i].y));
    }
    ncvStat = h_rects.copySolid(d_rects, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
    ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);

    if (sizeof(T) == sizeof(Ncv32u))
    {
        ncvStat = ncvDrawRects_32u_device((Ncv32u *)d_img.ptr(), d_img.stride(), this->width, this->height,
                                          (NcvRect32u *)d_rects.ptr(), this->numRects, this->color, 0);
    }
    else if (sizeof(T) == sizeof(Ncv8u))
    {
        ncvStat = ncvDrawRects_8u_device((Ncv8u *)d_img.ptr(), d_img.stride(), this->width, this->height,
                                         (NcvRect32u *)d_rects.ptr(), this->numRects, (Ncv8u)this->color, 0);
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);
    }
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
    NCV_SKIP_COND_END

    ncvStat = d_img.copySolid(h_img_d, 0);
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
    ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);

    NCV_SKIP_COND_BEGIN
    if (sizeof(T) == sizeof(Ncv32u))
    {
        ncvStat = ncvDrawRects_32u_host((Ncv32u *)h_img.ptr(), h_img.stride(), this->width, this->height,
                                        (NcvRect32u *)h_rects.ptr(), this->numRects, this->color);
    }
    else if (sizeof(T) == sizeof(Ncv8u))
    {
        ncvStat = ncvDrawRects_8u_host((Ncv8u *)h_img.ptr(), h_img.stride(), this->width, this->height,
                                       (NcvRect32u *)h_rects.ptr(), this->numRects, (Ncv8u)this->color);
    }
    else
    {
        ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);
    }
    ncvAssertReturn(ncvStat == NCV_SUCCESS, false);
    NCV_SKIP_COND_END

    //bit-to-bit check
    bool bLoopVirgin = true;

    NCV_SKIP_COND_BEGIN
    //const Ncv64f relEPS = 0.005;
    for (Ncv32u i=0; bLoopVirgin && i < h_img.height(); i++)
    {
        for (Ncv32u j=0; bLoopVirgin && j < h_img.width(); j++)
        {
            if (h_img.ptr()[h_img.stride()*i+j] != h_img_d.ptr()[h_img_d.stride()*i+j])
            {
                bLoopVirgin = false;
            }
        }
    }
    NCV_SKIP_COND_END

    if (bLoopVirgin)
    {
        rcode = true;
    }

    return rcode;
}
Ejemplo n.º 10
0
bool TestIntegralImageSquared::process()
{
    NCVStatus ncvStat;
    bool rcode = false;

    Ncv32u widthSII = this->width + 1;
    Ncv32u heightSII = this->height + 1;

    NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);
    ncvAssertReturn(d_img.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);
    ncvAssertReturn(h_img.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);
    ncvAssertReturn(d_imgSII.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);
    ncvAssertReturn(h_imgSII.isMemAllocated(), false);
    NCVMatrixAlloc<Ncv64u> h_imgSII_d(*this->allocatorCPU.get(), widthSII, heightSII);
    ncvAssertReturn(h_imgSII_d.isMemAllocated(), false);

    Ncv32u bufSize;
    ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);
    ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);
    NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);
    ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);

    NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());
    NCV_SKIP_COND_BEGIN

    ncvAssertReturn(this->src.fill(h_img), false);

    ncvStat = h_img.copySolid(d_img, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),
                                          d_imgSII.ptr(), d_imgSII.pitch(),
                                          NcvSize32u(this->width, this->height),
                                          d_tmpBuf.ptr(), bufSize, this->devProp);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = d_imgSII.copySolid(h_imgSII_d, 0);
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),
                                               h_imgSII.ptr(), h_imgSII.pitch(),
                                               NcvSize32u(this->width, this->height));
    ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);

    NCV_SKIP_COND_END

    //bit-to-bit check
    bool bLoopVirgin = true;

    NCV_SKIP_COND_BEGIN
    for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)
    {
        for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)
        {
            if (h_imgSII.ptr()[h_imgSII.stride()*i+j] != h_imgSII_d.ptr()[h_imgSII_d.stride()*i+j])
            {
                bLoopVirgin = false;
            }
        }
    }
    NCV_SKIP_COND_END

    if (bLoopVirgin)
    {
        rcode = true;
    }

    return rcode;
}
Ejemplo n.º 11
0
GPU_TEST_P(HOG, GetDescriptors)
{
    // Load image (e.g. train data, composed from windows)
    cv::Mat img_rgb = readImage("hog/train_data.png");
    ASSERT_FALSE(img_rgb.empty());

    // Convert to C4
    cv::Mat img;
    cv::cvtColor(img_rgb, img, CV_BGR2BGRA);

    cv::gpu::GpuMat d_img(img);

    // Convert train images into feature vectors (train table)
    cv::gpu::GpuMat descriptors, descriptors_by_cols;
    getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
    getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);

    // Check size of the result train table
    wins_per_img_x = 3;
    wins_per_img_y = 2;
    blocks_per_win_x = 7;
    blocks_per_win_y = 15;
    block_hist_size = 36;
    cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size,
                                            wins_per_img_x * wins_per_img_y);
    ASSERT_EQ(descr_size_expected, descriptors.size());

    // Check both formats of output descriptors are handled correctly
    cv::Mat dr(descriptors);
    cv::Mat dc(descriptors_by_cols);
    for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i)
    {
        const float* l = dr.rowRange(i, i + 1).ptr<float>();
        const float* r = dc.rowRange(i, i + 1).ptr<float>();
        for (int y = 0; y < blocks_per_win_y; ++y)
            for (int x = 0; x < blocks_per_win_x; ++x)
                for (int k = 0; k < block_hist_size; ++k)
                    ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
                              r[(x * blocks_per_win_y + y) * block_hist_size + k]);
    }

    /* Now we want to extract the same feature vectors, but from single images. NOTE: results will
    be defferent, due to border values interpolation. Using of many small images is slower, however we
    wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
    works good, it can be checked in the gpu_hog sample */

    img_rgb = readImage("hog/positive1.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
    computeBlockHistograms(cv::gpu::GpuMat(img));
    // Everything is fine with interpolation for left top subimage
    ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));

    img_rgb = readImage("hog/positive2.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
    computeBlockHistograms(cv::gpu::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));

    img_rgb = readImage("hog/negative1.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
    computeBlockHistograms(cv::gpu::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));

    img_rgb = readImage("hog/negative2.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
    computeBlockHistograms(cv::gpu::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));

    img_rgb = readImage("hog/positive3.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
    computeBlockHistograms(cv::gpu::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));

    img_rgb = readImage("hog/negative3.png");
    ASSERT_TRUE(!img_rgb.empty());
    cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
    computeBlockHistograms(cv::gpu::GpuMat(img));
    compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
}