CUDA_TEST_P(CalTech, HOG) { cv::cuda::GpuMat d_img(img); cv::Mat markedImage(img.clone()); cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create(); d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector()); d_hog->setNumLevels(d_hog->getNumLevels() + 32); std::vector<cv::Rect> found_locations; d_hog->detectMultiScale(d_img, found_locations); #if defined (LOG_CASCADE_STATISTIC) for (int i = 0; i < (int)found_locations.size(); i++) { cv::Rect r = found_locations[i]; std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl; cv::rectangle(markedImage, r , CV_RGB(255, 0, 0)); } cv::imshow("Res", markedImage); cv::waitKey(); #endif }
void test(const cv::Mat& img) { cv::gpu::GpuMat d_img(img); gamma_correction = false; setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); //cpu detector may be updated soon //hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector()); std::vector<cv::Point> locations; // Test detect detect(d_img, locations, 0); #ifdef DUMP dump(block_hists, locations); #else compare(cv::Mat(block_hists), locations); #endif // Test detect on smaller image cv::Mat img2; cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); detect(cv::gpu::GpuMat(img2), locations, 0); #ifdef DUMP dump(block_hists, locations); #else compare(cv::Mat(block_hists), locations); #endif // Test detect on greater image cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); detect(cv::gpu::GpuMat(img2), locations, 0); #ifdef DUMP dump(block_hists, locations); #else compare(cv::Mat(block_hists), locations); #endif }
GPU_TEST_P(CalTech, HOG) { cv::gpu::GpuMat d_img(img); cv::Mat markedImage(img.clone()); cv::gpu::HOGDescriptor d_hog; d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); d_hog.nlevels = d_hog.nlevels + 32; std::vector<cv::Rect> found_locations; d_hog.detectMultiScale(d_img, found_locations); #if defined (LOG_CASCADE_STATISTIC) for (int i = 0; i < (int)found_locations.size(); i++) { cv::Rect r = found_locations[i]; std::cout << r.x << " " << r.y << " " << r.width << " " << r.height << std::endl; cv::rectangle(markedImage, r , CV_RGB(255, 0, 0)); } cv::imshow("Res", markedImage); cv::waitKey(); #endif }
bool TestRectStdDev::process() { NCVStatus ncvStat; bool rcode = false; Ncv32s _normWidth = (Ncv32s)this->width - this->rect.x - this->rect.width + 1; Ncv32s _normHeight = (Ncv32s)this->height - this->rect.y - this->rect.height + 1; if (_normWidth <= 0 || _normHeight <= 0) { return true; } Ncv32u normWidth = (Ncv32u)_normWidth; Ncv32u normHeight = (Ncv32u)_normHeight; NcvSize32u szNormRoi(normWidth, normHeight); Ncv32u widthII = this->width + 1; Ncv32u heightII = this->height + 1; Ncv32u widthSII = this->width + 1; Ncv32u heightSII = this->height + 1; NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> d_imgII(*this->allocatorGPU.get(), widthII, heightII); ncvAssertReturn(d_imgII.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> h_imgII(*this->allocatorCPU.get(), widthII, heightII); ncvAssertReturn(h_imgII.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII); ncvAssertReturn(d_imgSII.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII); ncvAssertReturn(h_imgSII.isMemAllocated(), false); NCVMatrixAlloc<Ncv32f> d_norm(*this->allocatorGPU.get(), normWidth, normHeight); ncvAssertReturn(d_norm.isMemAllocated(), false); NCVMatrixAlloc<Ncv32f> h_norm(*this->allocatorCPU.get(), normWidth, normHeight); ncvAssertReturn(h_norm.isMemAllocated(), false); NCVMatrixAlloc<Ncv32f> h_norm_d(*this->allocatorCPU.get(), normWidth, normHeight); ncvAssertReturn(h_norm_d.isMemAllocated(), false); Ncv32u bufSizeII, bufSizeSII; ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSizeII, this->devProp); ncvAssertReturn(NPPST_SUCCESS == ncvStat, false); ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSizeSII, this->devProp); ncvAssertReturn(NPPST_SUCCESS == ncvStat, false); Ncv32u bufSize = bufSizeII > bufSizeSII ? bufSizeII : bufSizeSII; NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize); ncvAssertReturn(d_tmpBuf.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(), d_imgII.ptr(), d_imgII.pitch(), NcvSize32u(this->width, this->height), d_tmpBuf.ptr(), bufSize, this->devProp); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(), d_imgSII.ptr(), d_imgSII.pitch(), NcvSize32u(this->width, this->height), d_tmpBuf.ptr(), bufSize, this->devProp); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStRectStdDev_32f_C1R(d_imgII.ptr(), d_imgII.pitch(), d_imgSII.ptr(), d_imgSII.pitch(), d_norm.ptr(), d_norm.pitch(), szNormRoi, this->rect, this->scaleFactor, this->bTextureCache); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = d_norm.copySolid(h_norm_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStIntegral_8u32u_C1R_host(h_img.ptr(), h_img.pitch(), h_imgII.ptr(), h_imgII.pitch(), NcvSize32u(this->width, this->height)); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(), h_imgSII.ptr(), h_imgSII.pitch(), NcvSize32u(this->width, this->height)); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStRectStdDev_32f_C1R_host(h_imgII.ptr(), h_imgII.pitch(), h_imgSII.ptr(), h_imgSII.pitch(), h_norm.ptr(), h_norm.pitch(), szNormRoi, this->rect, this->scaleFactor); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN const Ncv64f relEPS = 0.005; for (Ncv32u i=0; bLoopVirgin && i < h_norm.height(); i++) { for (Ncv32u j=0; bLoopVirgin && j < h_norm.width(); j++) { Ncv64f absErr = fabs(h_norm.ptr()[h_norm.stride()*i+j] - h_norm_d.ptr()[h_norm_d.stride()*i+j]); Ncv64f relErr = absErr / h_norm.ptr()[h_norm.stride()*i+j]; if (relErr > relEPS) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestHaarCascadeApplication::process() { #if defined(__APPLE) return true; #endif NCVStatus ncvStat; bool rcode = false; Ncv32u numStages, numNodes, numFeatures; ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages); ncvAssertReturn(h_HaarStages.isMemAllocated(), false); NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes); ncvAssertReturn(h_HaarNodes.isMemAllocated(), false); NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures); ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false); NCVVectorAlloc<HaarStage64> d_HaarStages(*this->allocatorGPU.get(), numStages); ncvAssertReturn(d_HaarStages.isMemAllocated(), false); NCVVectorAlloc<HaarClassifierNode128> d_HaarNodes(*this->allocatorGPU.get(), numNodes); ncvAssertReturn(d_HaarNodes.isMemAllocated(), false); NCVVectorAlloc<HaarFeature64> d_HaarFeatures(*this->allocatorGPU.get(), numFeatures); ncvAssertReturn(d_HaarFeatures.isMemAllocated(), false); HaarClassifierCascadeDescriptor haar; haar.ClassifierSize.width = haar.ClassifierSize.height = 1; haar.bNeedsTiltedII = false; haar.NumClassifierRootNodes = numNodes; haar.NumClassifierTotalNodes = numNodes; haar.NumFeatures = numFeatures; haar.NumStages = numStages; NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertReturn(NCV_SUCCESS == h_HaarStages.copySolid(d_HaarStages, 0), false); ncvAssertReturn(NCV_SUCCESS == h_HaarNodes.copySolid(d_HaarNodes, 0), false); ncvAssertReturn(NCV_SUCCESS == h_HaarFeatures.copySolid(d_HaarFeatures, 0), false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); NCV_SKIP_COND_END NcvSize32s srcRoi, srcIIRoi, searchRoi; srcRoi.width = this->width; srcRoi.height = this->height; srcIIRoi.width = srcRoi.width + 1; srcIIRoi.height = srcRoi.height + 1; searchRoi.width = srcIIRoi.width - haar.ClassifierSize.width; searchRoi.height = srcIIRoi.height - haar.ClassifierSize.height; if (searchRoi.width <= 0 || searchRoi.height <= 0) { return false; } NcvSize32u searchRoiU(searchRoi.width, searchRoi.height); NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); Ncv32u integralWidth = this->width + 1; Ncv32u integralHeight = this->height + 1; NCVMatrixAlloc<Ncv32u> d_integralImage(*this->allocatorGPU.get(), integralWidth, integralHeight); ncvAssertReturn(d_integralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(*this->allocatorGPU.get(), integralWidth, integralHeight); ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> h_integralImage(*this->allocatorCPU.get(), integralWidth, integralHeight); ncvAssertReturn(h_integralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> h_sqIntegralImage(*this->allocatorCPU.get(), integralWidth, integralHeight); ncvAssertReturn(h_sqIntegralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv32f> d_rectStdDev(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_rectStdDev.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> d_pixelMask(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_pixelMask.isMemAllocated(), false); NCVMatrixAlloc<Ncv32f> h_rectStdDev(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_rectStdDev.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> h_pixelMask(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_pixelMask.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> d_hypotheses(*this->allocatorGPU.get(), this->width * this->height); ncvAssertReturn(d_hypotheses.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> h_hypotheses(*this->allocatorCPU.get(), this->width * this->height); ncvAssertReturn(h_hypotheses.isMemAllocated(), false); NCVStatus nppStat; Ncv32u szTmpBufIntegral, szTmpBufSqIntegral; nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &szTmpBufIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &szTmpBufSqIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); NCVVectorAlloc<Ncv8u> d_tmpIIbuf(*this->allocatorGPU.get(), std::max(szTmpBufIntegral, szTmpBufSqIntegral)); ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), false); Ncv32u detectionsOnThisScale_d = 0; Ncv32u detectionsOnThisScale_h = 0; NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); nppStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(), d_integralImage.ptr(), d_integralImage.pitch(), NcvSize32u(d_img.width(), d_img.height()), d_tmpIIbuf.ptr(), szTmpBufIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); nppStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(), d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(), NcvSize32u(d_img.width(), d_img.height()), d_tmpIIbuf.ptr(), szTmpBufSqIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); const NcvRect32u rect( HAAR_STDDEV_BORDER, HAAR_STDDEV_BORDER, haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER, haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER); nppStat = nppiStRectStdDev_32f_C1R( d_integralImage.ptr(), d_integralImage.pitch(), d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(), d_rectStdDev.ptr(), d_rectStdDev.pitch(), NcvSize32u(searchRoi.width, searchRoi.height), rect, 1.0f, true); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); ncvStat = d_integralImage.copySolid(h_integralImage, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvStat = d_rectStdDev.copySolid(h_rectStdDev, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); for (Ncv32u i=0; i<searchRoiU.height; i++) { for (Ncv32u j=0; j<h_pixelMask.stride(); j++) { if (j<searchRoiU.width) { h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = (i << 16) | j; } else { h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = OBJDET_MASK_ELEMENT_INVALID_32U; } } } ncvAssertReturn(cudaSuccess == cudaStreamSynchronize(0), false); #if !defined(__APPLE__) #if defined(__GNUC__) //http://www.christian-seiler.de/projekte/fpmath/ fpu_control_t fpu_oldcw, fpu_cw; _FPU_GETCW(fpu_oldcw); // store old cw fpu_cw = (fpu_oldcw & ~_FPU_EXTENDED & ~_FPU_DOUBLE & ~_FPU_SINGLE) | _FPU_SINGLE; _FPU_SETCW(fpu_cw); // calculations here ncvStat = ncvApplyHaarClassifierCascade_host( h_integralImage, h_rectStdDev, h_pixelMask, detectionsOnThisScale_h, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false, searchRoiU, 1, 1.0f); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); _FPU_SETCW(fpu_oldcw); // restore old cw #else #ifndef _WIN64 Ncv32u fpu_oldcw, fpu_cw; _controlfp_s(&fpu_cw, 0, 0); fpu_oldcw = fpu_cw; _controlfp_s(&fpu_cw, _PC_24, _MCW_PC); #endif ncvStat = ncvApplyHaarClassifierCascade_host( h_integralImage, h_rectStdDev, h_pixelMask, detectionsOnThisScale_h, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false, searchRoiU, 1, 1.0f); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); #ifndef _WIN64 _controlfp_s(&fpu_cw, fpu_oldcw, _MCW_PC); #endif #endif #endif NCV_SKIP_COND_END int devId; ncvAssertCUDAReturn(cudaGetDevice(&devId), false); cudaDeviceProp _devProp; ncvAssertCUDAReturn(cudaGetDeviceProperties(&_devProp, devId), false); ncvStat = ncvApplyHaarClassifierCascade_device( d_integralImage, d_rectStdDev, d_pixelMask, detectionsOnThisScale_d, haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false, searchRoiU, 1, 1.0f, *this->allocatorGPU.get(), *this->allocatorCPU.get(), _devProp, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_pixelMask_d.isMemAllocated(), false); //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN ncvStat = d_pixelMask.copySolid(h_pixelMask_d, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); if (detectionsOnThisScale_d != detectionsOnThisScale_h) { bLoopVirgin = false; } else { std::sort(h_pixelMask_d.ptr(), h_pixelMask_d.ptr() + detectionsOnThisScale_d); for (Ncv32u i=0; i<detectionsOnThisScale_d && bLoopVirgin; i++) { if (h_pixelMask.ptr()[i] != h_pixelMask_d.ptr()[i]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestTranspose<T>::process() { NCVStatus ncvStat; bool rcode = false; NcvSize32u srcSize(this->width, this->height); NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T> d_dst(*this->allocatorGPU.get(), this->height, this->width); ncvAssertReturn(d_dst.isMemAllocated(), false); NCVMatrixAlloc<T> h_dst(*this->allocatorCPU.get(), this->height, this->width); ncvAssertReturn(h_dst.isMemAllocated(), false); NCVMatrixAlloc<T> h_dst_d(*this->allocatorCPU.get(), this->height, this->width); ncvAssertReturn(h_dst_d.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); NCV_SKIP_COND_END ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(), (Ncv32u *)d_dst.ptr(), d_dst.pitch(), NcvSize32u(this->width, this->height)); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStTranspose_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(), (Ncv64u *)d_dst.ptr(), d_dst.pitch(), NcvSize32u(this->width, this->height)); } else { ncvAssertPrintReturn(false, "Incorrect transpose test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END ncvStat = d_dst.copySolid(h_dst_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStTranspose_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(), (Ncv32u *)h_dst.ptr(), h_dst.pitch(), NcvSize32u(this->width, this->height)); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStTranspose_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(), (Ncv64u *)h_dst.ptr(), h_dst.pitch(), NcvSize32u(this->width, this->height)); } else { ncvAssertPrintReturn(false, "Incorrect downsample test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN //const Ncv64f relEPS = 0.005; for (Ncv32u i=0; bLoopVirgin && i < this->width; i++) { for (Ncv32u j=0; bLoopVirgin && j < this->height; j++) { if (h_dst.ptr()[h_dst.stride()*i+j] != h_dst_d.ptr()[h_dst_d.stride()*i+j]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestResize<T>::process() { NCVStatus ncvStat; bool rcode = false; Ncv32s smallWidth = this->width / this->scaleFactor; Ncv32s smallHeight = this->height / this->scaleFactor; if (smallWidth == 0 || smallHeight == 0) { return true; } NcvSize32u srcSize(this->width, this->height); NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T> d_small(*this->allocatorGPU.get(), smallWidth, smallHeight); ncvAssertReturn(d_small.isMemAllocated(), false); NCVMatrixAlloc<T> h_small(*this->allocatorCPU.get(), smallWidth, smallHeight); ncvAssertReturn(h_small.isMemAllocated(), false); NCVMatrixAlloc<T> h_small_d(*this->allocatorCPU.get(), smallWidth, smallHeight); ncvAssertReturn(h_small_d.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); NCV_SKIP_COND_END ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStDecimate_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(), (Ncv32u *)d_small.ptr(), d_small.pitch(), srcSize, this->scaleFactor, this->bTextureCache); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStDecimate_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(), (Ncv64u *)d_small.ptr(), d_small.pitch(), srcSize, this->scaleFactor, this->bTextureCache); } else { ncvAssertPrintReturn(false, "Incorrect downsample test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END ncvStat = d_small.copySolid(h_small_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStDecimate_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(), (Ncv32u *)h_small.ptr(), h_small.pitch(), srcSize, this->scaleFactor); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStDecimate_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(), (Ncv64u *)h_small.ptr(), h_small.pitch(), srcSize, this->scaleFactor); } else { ncvAssertPrintReturn(false, "Incorrect downsample test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN //const Ncv64f relEPS = 0.005; for (Ncv32u i=0; bLoopVirgin && i < h_small.height(); i++) { for (Ncv32u j=0; bLoopVirgin && j < h_small.width(); j++) { if (h_small.ptr()[h_small.stride()*i+j] != h_small_d.ptr()[h_small_d.stride()*i+j]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestIntegralImage<T_in, T_out>::process() { NCVStatus ncvStat; bool rcode = false; Ncv32u widthII = this->width + 1; Ncv32u heightII = this->height + 1; NCVMatrixAlloc<T_in> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T_in> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T_out> d_imgII(*this->allocatorGPU.get(), widthII, heightII); ncvAssertReturn(d_imgII.isMemAllocated(), false); NCVMatrixAlloc<T_out> h_imgII(*this->allocatorCPU.get(), widthII, heightII); ncvAssertReturn(h_imgII.isMemAllocated(), false); NCVMatrixAlloc<T_out> h_imgII_d(*this->allocatorCPU.get(), widthII, heightII); ncvAssertReturn(h_imgII_d.isMemAllocated(), false); Ncv32u bufSize; if (sizeof(T_in) == sizeof(Ncv8u)) { ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSize, this->devProp); ncvAssertReturn(NPPST_SUCCESS == ncvStat, false); } else if (sizeof(T_in) == sizeof(Ncv32f)) { ncvStat = nppiStIntegralGetSize_32f32f(NcvSize32u(this->width, this->height), &bufSize, this->devProp); ncvAssertReturn(NPPST_SUCCESS == ncvStat, false); } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize); ncvAssertReturn(d_tmpBuf.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); if (sizeof(T_in) == sizeof(Ncv8u)) { ncvStat = nppiStIntegral_8u32u_C1R((Ncv8u *)d_img.ptr(), d_img.pitch(), (Ncv32u *)d_imgII.ptr(), d_imgII.pitch(), NcvSize32u(this->width, this->height), d_tmpBuf.ptr(), bufSize, this->devProp); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else if (sizeof(T_in) == sizeof(Ncv32f)) { ncvStat = nppiStIntegral_32f32f_C1R((Ncv32f *)d_img.ptr(), d_img.pitch(), (Ncv32f *)d_imgII.ptr(), d_imgII.pitch(), NcvSize32u(this->width, this->height), d_tmpBuf.ptr(), bufSize, this->devProp); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } ncvStat = d_imgII.copySolid(h_imgII_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); if (sizeof(T_in) == sizeof(Ncv8u)) { ncvStat = nppiStIntegral_8u32u_C1R_host((Ncv8u *)h_img.ptr(), h_img.pitch(), (Ncv32u *)h_imgII.ptr(), h_imgII.pitch(), NcvSize32u(this->width, this->height)); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else if (sizeof(T_in) == sizeof(Ncv32f)) { ncvStat = nppiStIntegral_32f32f_C1R_host((Ncv32f *)h_img.ptr(), h_img.pitch(), (Ncv32f *)h_imgII.ptr(), h_imgII.pitch(), NcvSize32u(this->width, this->height)); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++) { for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++) { if (sizeof(T_in) == sizeof(Ncv8u)) { if (h_imgII.ptr()[h_imgII.stride()*i+j] != h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) { bLoopVirgin = false; } } else if (sizeof(T_in) == sizeof(Ncv32f)) { if (fabsf((float)h_imgII.ptr()[h_imgII.stride()*i+j] - (float)h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) > 0.01f) { bLoopVirgin = false; } } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestDrawRects<T>::process() { NCVStatus ncvStat; bool rcode = false; NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img_d(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img_d.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> d_rects(*this->allocatorGPU.get(), this->numRects); ncvAssertReturn(d_rects.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> h_rects(*this->allocatorCPU.get(), this->numRects); ncvAssertReturn(h_rects.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); //fill vector of rectangles with random rects covering the input NCVVectorReuse<Ncv32u> h_rects_as32u(h_rects.getSegment()); ncvAssertReturn(h_rects_as32u.isMemReused(), false); ncvAssertReturn(this->src32u.fill(h_rects_as32u), false); for (Ncv32u i=0; i<this->numRects; i++) { h_rects.ptr()[i].x = (Ncv32u)(((1.0 * h_rects.ptr()[i].x) / RAND_MAX) * (this->width-2)); h_rects.ptr()[i].y = (Ncv32u)(((1.0 * h_rects.ptr()[i].y) / RAND_MAX) * (this->height-2)); h_rects.ptr()[i].width = (Ncv32u)(((1.0 * h_rects.ptr()[i].width) / RAND_MAX) * (this->width+10 - h_rects.ptr()[i].x)); h_rects.ptr()[i].height = (Ncv32u)(((1.0 * h_rects.ptr()[i].height) / RAND_MAX) * (this->height+10 - h_rects.ptr()[i].y)); } ncvStat = h_rects.copySolid(d_rects, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = ncvDrawRects_32u_device((Ncv32u *)d_img.ptr(), d_img.stride(), this->width, this->height, (NcvRect32u *)d_rects.ptr(), this->numRects, this->color, 0); } else if (sizeof(T) == sizeof(Ncv8u)) { ncvStat = ncvDrawRects_8u_device((Ncv8u *)d_img.ptr(), d_img.stride(), this->width, this->height, (NcvRect32u *)d_rects.ptr(), this->numRects, (Ncv8u)this->color, 0); } else { ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false); } ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCV_SKIP_COND_END ncvStat = d_img.copySolid(h_img_d, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = ncvDrawRects_32u_host((Ncv32u *)h_img.ptr(), h_img.stride(), this->width, this->height, (NcvRect32u *)h_rects.ptr(), this->numRects, this->color); } else if (sizeof(T) == sizeof(Ncv8u)) { ncvStat = ncvDrawRects_8u_host((Ncv8u *)h_img.ptr(), h_img.stride(), this->width, this->height, (NcvRect32u *)h_rects.ptr(), this->numRects, (Ncv8u)this->color); } else { ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false); } ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN //const Ncv64f relEPS = 0.005; for (Ncv32u i=0; bLoopVirgin && i < h_img.height(); i++) { for (Ncv32u j=0; bLoopVirgin && j < h_img.width(); j++) { if (h_img.ptr()[h_img.stride()*i+j] != h_img_d.ptr()[h_img_d.stride()*i+j]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestIntegralImageSquared::process() { NCVStatus ncvStat; bool rcode = false; Ncv32u widthSII = this->width + 1; Ncv32u heightSII = this->height + 1; NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII); ncvAssertReturn(d_imgSII.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII); ncvAssertReturn(h_imgSII.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> h_imgSII_d(*this->allocatorCPU.get(), widthSII, heightSII); ncvAssertReturn(h_imgSII_d.isMemAllocated(), false); Ncv32u bufSize; ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSize, this->devProp); ncvAssertReturn(NPPST_SUCCESS == ncvStat, false); NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize); ncvAssertReturn(d_tmpBuf.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(), d_imgSII.ptr(), d_imgSII.pitch(), NcvSize32u(this->width, this->height), d_tmpBuf.ptr(), bufSize, this->devProp); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = d_imgSII.copySolid(h_imgSII_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(), h_imgSII.ptr(), h_imgSII.pitch(), NcvSize32u(this->width, this->height)); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++) { for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++) { if (h_imgSII.ptr()[h_imgSII.stride()*i+j] != h_imgSII_d.ptr()[h_imgSII_d.stride()*i+j]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
GPU_TEST_P(HOG, GetDescriptors) { // Load image (e.g. train data, composed from windows) cv::Mat img_rgb = readImage("hog/train_data.png"); ASSERT_FALSE(img_rgb.empty()); // Convert to C4 cv::Mat img; cv::cvtColor(img_rgb, img, CV_BGR2BGRA); cv::gpu::GpuMat d_img(img); // Convert train images into feature vectors (train table) cv::gpu::GpuMat descriptors, descriptors_by_cols; getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW); getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL); // Check size of the result train table wins_per_img_x = 3; wins_per_img_y = 2; blocks_per_win_x = 7; blocks_per_win_y = 15; block_hist_size = 36; cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size, wins_per_img_x * wins_per_img_y); ASSERT_EQ(descr_size_expected, descriptors.size()); // Check both formats of output descriptors are handled correctly cv::Mat dr(descriptors); cv::Mat dc(descriptors_by_cols); for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i) { const float* l = dr.rowRange(i, i + 1).ptr<float>(); const float* r = dc.rowRange(i, i + 1).ptr<float>(); for (int y = 0; y < blocks_per_win_y; ++y) for (int x = 0; x < blocks_per_win_x; ++x) for (int k = 0; k < block_hist_size; ++k) ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k], r[(x * blocks_per_win_y + y) * block_hist_size + k]); } /* Now we want to extract the same feature vectors, but from single images. NOTE: results will be defferent, due to border values interpolation. Using of many small images is slower, however we wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms works good, it can be checked in the gpu_hog sample */ img_rgb = readImage("hog/positive1.png"); ASSERT_TRUE(!img_rgb.empty()); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); computeBlockHistograms(cv::gpu::GpuMat(img)); // Everything is fine with interpolation for left top subimage ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1))); img_rgb = readImage("hog/positive2.png"); ASSERT_TRUE(!img_rgb.empty()); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); computeBlockHistograms(cv::gpu::GpuMat(img)); compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2))); img_rgb = readImage("hog/negative1.png"); ASSERT_TRUE(!img_rgb.empty()); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); computeBlockHistograms(cv::gpu::GpuMat(img)); compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3))); img_rgb = readImage("hog/negative2.png"); ASSERT_TRUE(!img_rgb.empty()); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); computeBlockHistograms(cv::gpu::GpuMat(img)); compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4))); img_rgb = readImage("hog/positive3.png"); ASSERT_TRUE(!img_rgb.empty()); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); computeBlockHistograms(cv::gpu::GpuMat(img)); compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5))); img_rgb = readImage("hog/negative3.png"); ASSERT_TRUE(!img_rgb.empty()); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); computeBlockHistograms(cv::gpu::GpuMat(img)); compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6))); }