bool TestTranspose<T>::process() { NCVStatus ncvStat; bool rcode = false; NcvSize32u srcSize(this->width, this->height); NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T> d_dst(*this->allocatorGPU.get(), this->height, this->width); ncvAssertReturn(d_dst.isMemAllocated(), false); NCVMatrixAlloc<T> h_dst(*this->allocatorCPU.get(), this->height, this->width); ncvAssertReturn(h_dst.isMemAllocated(), false); NCVMatrixAlloc<T> h_dst_d(*this->allocatorCPU.get(), this->height, this->width); ncvAssertReturn(h_dst_d.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); NCV_SKIP_COND_END ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(), (Ncv32u *)d_dst.ptr(), d_dst.pitch(), NcvSize32u(this->width, this->height)); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStTranspose_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(), (Ncv64u *)d_dst.ptr(), d_dst.pitch(), NcvSize32u(this->width, this->height)); } else { ncvAssertPrintReturn(false, "Incorrect transpose test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END ncvStat = d_dst.copySolid(h_dst_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStTranspose_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(), (Ncv32u *)h_dst.ptr(), h_dst.pitch(), NcvSize32u(this->width, this->height)); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStTranspose_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(), (Ncv64u *)h_dst.ptr(), h_dst.pitch(), NcvSize32u(this->width, this->height)); } else { ncvAssertPrintReturn(false, "Incorrect downsample test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN //const Ncv64f relEPS = 0.005; for (Ncv32u i=0; bLoopVirgin && i < this->width; i++) { for (Ncv32u j=0; bLoopVirgin && j < this->height; j++) { if (h_dst.ptr()[h_dst.stride()*i+j] != h_dst_d.ptr()[h_dst_d.stride()*i+j]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestIntegralImage<T_in, T_out>::process() { NCVStatus ncvStat; bool rcode = false; Ncv32u widthII = this->width + 1; Ncv32u heightII = this->height + 1; NCVMatrixAlloc<T_in> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T_in> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T_out> d_imgII(*this->allocatorGPU.get(), widthII, heightII); ncvAssertReturn(d_imgII.isMemAllocated(), false); NCVMatrixAlloc<T_out> h_imgII(*this->allocatorCPU.get(), widthII, heightII); ncvAssertReturn(h_imgII.isMemAllocated(), false); NCVMatrixAlloc<T_out> h_imgII_d(*this->allocatorCPU.get(), widthII, heightII); ncvAssertReturn(h_imgII_d.isMemAllocated(), false); Ncv32u bufSize; if (sizeof(T_in) == sizeof(Ncv8u)) { ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSize, this->devProp); ncvAssertReturn(NPPST_SUCCESS == ncvStat, false); } else if (sizeof(T_in) == sizeof(Ncv32f)) { ncvStat = nppiStIntegralGetSize_32f32f(NcvSize32u(this->width, this->height), &bufSize, this->devProp); ncvAssertReturn(NPPST_SUCCESS == ncvStat, false); } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize); ncvAssertReturn(d_tmpBuf.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); if (sizeof(T_in) == sizeof(Ncv8u)) { ncvStat = nppiStIntegral_8u32u_C1R((Ncv8u *)d_img.ptr(), d_img.pitch(), (Ncv32u *)d_imgII.ptr(), d_imgII.pitch(), NcvSize32u(this->width, this->height), d_tmpBuf.ptr(), bufSize, this->devProp); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else if (sizeof(T_in) == sizeof(Ncv32f)) { ncvStat = nppiStIntegral_32f32f_C1R((Ncv32f *)d_img.ptr(), d_img.pitch(), (Ncv32f *)d_imgII.ptr(), d_imgII.pitch(), NcvSize32u(this->width, this->height), d_tmpBuf.ptr(), bufSize, this->devProp); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } ncvStat = d_imgII.copySolid(h_imgII_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); if (sizeof(T_in) == sizeof(Ncv8u)) { ncvStat = nppiStIntegral_8u32u_C1R_host((Ncv8u *)h_img.ptr(), h_img.pitch(), (Ncv32u *)h_imgII.ptr(), h_imgII.pitch(), NcvSize32u(this->width, this->height)); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else if (sizeof(T_in) == sizeof(Ncv32f)) { ncvStat = nppiStIntegral_32f32f_C1R_host((Ncv32f *)h_img.ptr(), h_img.pitch(), (Ncv32f *)h_imgII.ptr(), h_imgII.pitch(), NcvSize32u(this->width, this->height)); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++) { for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++) { if (sizeof(T_in) == sizeof(Ncv8u)) { if (h_imgII.ptr()[h_imgII.stride()*i+j] != h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) { bLoopVirgin = false; } } else if (sizeof(T_in) == sizeof(Ncv32f)) { if (fabsf((float)h_imgII.ptr()[h_imgII.stride()*i+j] - (float)h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) > 0.01f) { bLoopVirgin = false; } } else { ncvAssertPrintReturn(false, "Incorrect integral image test instance", false); } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestResize<T>::process() { NCVStatus ncvStat; bool rcode = false; Ncv32s smallWidth = this->width / this->scaleFactor; Ncv32s smallHeight = this->height / this->scaleFactor; if (smallWidth == 0 || smallHeight == 0) { return true; } NcvSize32u srcSize(this->width, this->height); NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T> d_small(*this->allocatorGPU.get(), smallWidth, smallHeight); ncvAssertReturn(d_small.isMemAllocated(), false); NCVMatrixAlloc<T> h_small(*this->allocatorCPU.get(), smallWidth, smallHeight); ncvAssertReturn(h_small.isMemAllocated(), false); NCVMatrixAlloc<T> h_small_d(*this->allocatorCPU.get(), smallWidth, smallHeight); ncvAssertReturn(h_small_d.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); NCV_SKIP_COND_END ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStDecimate_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(), (Ncv32u *)d_small.ptr(), d_small.pitch(), srcSize, this->scaleFactor, this->bTextureCache); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStDecimate_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(), (Ncv64u *)d_small.ptr(), d_small.pitch(), srcSize, this->scaleFactor, this->bTextureCache); } else { ncvAssertPrintReturn(false, "Incorrect downsample test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END ncvStat = d_small.copySolid(h_small_d, 0); ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = nppiStDecimate_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(), (Ncv32u *)h_small.ptr(), h_small.pitch(), srcSize, this->scaleFactor); } else if (sizeof(T) == sizeof(Ncv64u)) { ncvStat = nppiStDecimate_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(), (Ncv64u *)h_small.ptr(), h_small.pitch(), srcSize, this->scaleFactor); } else { ncvAssertPrintReturn(false, "Incorrect downsample test instance", false); } ncvAssertReturn(ncvStat == NPPST_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN //const Ncv64f relEPS = 0.005; for (Ncv32u i=0; bLoopVirgin && i < h_small.height(); i++) { for (Ncv32u j=0; bLoopVirgin && j < h_small.width(); j++) { if (h_small.ptr()[h_small.stride()*i+j] != h_small_d.ptr()[h_small_d.stride()*i+j]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
bool TestDrawRects<T>::process() { NCVStatus ncvStat; bool rcode = false; NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); NCVMatrixAlloc<T> h_img_d(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img_d.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> d_rects(*this->allocatorGPU.get(), this->numRects); ncvAssertReturn(d_rects.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> h_rects(*this->allocatorCPU.get(), this->numRects); ncvAssertReturn(h_rects.isMemAllocated(), false); NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); //fill vector of rectangles with random rects covering the input NCVVectorReuse<Ncv32u> h_rects_as32u(h_rects.getSegment()); ncvAssertReturn(h_rects_as32u.isMemReused(), false); ncvAssertReturn(this->src32u.fill(h_rects_as32u), false); for (Ncv32u i=0; i<this->numRects; i++) { h_rects.ptr()[i].x = (Ncv32u)(((1.0 * h_rects.ptr()[i].x) / RAND_MAX) * (this->width-2)); h_rects.ptr()[i].y = (Ncv32u)(((1.0 * h_rects.ptr()[i].y) / RAND_MAX) * (this->height-2)); h_rects.ptr()[i].width = (Ncv32u)(((1.0 * h_rects.ptr()[i].width) / RAND_MAX) * (this->width+10 - h_rects.ptr()[i].x)); h_rects.ptr()[i].height = (Ncv32u)(((1.0 * h_rects.ptr()[i].height) / RAND_MAX) * (this->height+10 - h_rects.ptr()[i].y)); } ncvStat = h_rects.copySolid(d_rects, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = ncvDrawRects_32u_device((Ncv32u *)d_img.ptr(), d_img.stride(), this->width, this->height, (NcvRect32u *)d_rects.ptr(), this->numRects, this->color, 0); } else if (sizeof(T) == sizeof(Ncv8u)) { ncvStat = ncvDrawRects_8u_device((Ncv8u *)d_img.ptr(), d_img.stride(), this->width, this->height, (NcvRect32u *)d_rects.ptr(), this->numRects, (Ncv8u)this->color, 0); } else { ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false); } ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCV_SKIP_COND_END ncvStat = d_img.copySolid(h_img_d, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); NCV_SKIP_COND_BEGIN if (sizeof(T) == sizeof(Ncv32u)) { ncvStat = ncvDrawRects_32u_host((Ncv32u *)h_img.ptr(), h_img.stride(), this->width, this->height, (NcvRect32u *)h_rects.ptr(), this->numRects, this->color); } else if (sizeof(T) == sizeof(Ncv8u)) { ncvStat = ncvDrawRects_8u_host((Ncv8u *)h_img.ptr(), h_img.stride(), this->width, this->height, (NcvRect32u *)h_rects.ptr(), this->numRects, (Ncv8u)this->color); } else { ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false); } ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCV_SKIP_COND_END //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN //const Ncv64f relEPS = 0.005; for (Ncv32u i=0; bLoopVirgin && i < h_img.height(); i++) { for (Ncv32u j=0; bLoopVirgin && j < h_img.width(); j++) { if (h_img.ptr()[h_img.stride()*i+j] != h_img_d.ptr()[h_img_d.stride()*i+j]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }