double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) { CV_Assert(src1.type() == CV_8UC1); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; NppiSize sz; sz.width = src1.cols; sz.height = src1.rows; int funcIdx = normType >> 1; double retVal; DeviceBuffer dbuf; nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) ); cudaSafeCall( cudaDeviceSynchronize() ); dbuf.download(&retVal); return retVal; }
bool MemObject::allocate(DeviceInterface *device) { DeviceBuffer *buffer = deviceBuffer(device); if (!buffer->allocated()) { return buffer->allocate(); } return true; }
double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) { CV_Assert(src1.type() == CV_8UC1); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); #if CUDA_VERSION < 5050 typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; #else typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer); typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize); static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R}; #endif NppiSize sz; sz.width = src1.cols; sz.height = src1.rows; int funcIdx = normType >> 1; double retVal; DeviceBuffer dbuf; #if CUDA_VERSION < 5050 nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) ); #else int bufSize; buf_size_funcs[funcIdx](sz, &bufSize); GpuMat buf(1, bufSize, CV_8UC1); nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) ); #endif cudaSafeCall( cudaDeviceSynchronize() ); dbuf.download(&retVal); return retVal; }
void copyFrom(DeviceBuffer<TYPE, DIM>& other) { PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); Environment<>::get().Factory().createTaskCopyDeviceToDevice(other, *this); }
void copyFrom(DeviceBuffer<TYPE, DIM>& other) { assert(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); Factory::getInstance().createTaskCopyDeviceToHost(other, *this); }