__host__ void reduce(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream) { const dim3 block(Policy::block_size_x, Policy::block_size_y); const dim3 grid(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y)); reduce<Reductor, Policy::block_size_x * Policy::block_size_y, Policy::patch_size_x, Policy::patch_size_y><<<grid, block, 0, stream>>>(src, result, mask, rows, cols); CV_CUDEV_SAFE_CALL( cudaGetLastError() ); if (stream == 0) CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); }
__host__ void histogram(const SrcPtr& src, ResType* hist, const MaskPtr& mask, int rows, int cols, cudaStream_t stream) { const dim3 block(Policy::block_size_x, Policy::block_size_y); const dim3 grid(divUp(rows, block.y)); const int BLOCK_SIZE = Policy::block_size_x * Policy::block_size_y; histogram<BIN_COUNT, BLOCK_SIZE><<<grid, block, 0, stream>>>(src, hist, mask, rows, cols); CV_CUDEV_SAFE_CALL( cudaGetLastError() ); if (stream == 0) CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); }
__host__ void reduceToRow(const SrcPtr& src, ResType* dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream) { const int BLOCK_SIZE_X = 16; const int BLOCK_SIZE_Y = 16; const dim3 block(BLOCK_SIZE_X, BLOCK_SIZE_Y); const dim3 grid(divUp(cols, block.x)); reduceToRow<Reductor, BLOCK_SIZE_X, BLOCK_SIZE_Y><<<grid, block, 0, stream>>>(src, dst, mask, rows, cols); CV_CUDEV_SAFE_CALL( cudaGetLastError() ); if (stream == 0) CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); }
__host__ explicit Texture(const GlobPtrSz<T>& mat, bool normalizedCoords = false, cudaTextureFilterMode filterMode = cudaFilterModePoint, cudaTextureAddressMode addressMode = cudaAddressModeClamp) { CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) ); rows = mat.rows; cols = mat.cols; cudaResourceDesc texRes; std::memset(&texRes, 0, sizeof(texRes)); texRes.resType = cudaResourceTypePitch2D; texRes.res.pitch2D.devPtr = mat.data; texRes.res.pitch2D.height = mat.rows; texRes.res.pitch2D.width = mat.cols; texRes.res.pitch2D.pitchInBytes = mat.step; texRes.res.pitch2D.desc = cudaCreateChannelDesc<T>(); cudaTextureDesc texDescr; std::memset(&texDescr, 0, sizeof(texDescr)); texDescr.addressMode[0] = addressMode; texDescr.addressMode[1] = addressMode; texDescr.addressMode[2] = addressMode; texDescr.filterMode = filterMode; texDescr.readMode = cudaReadModeElementType; texDescr.normalizedCoords = normalizedCoords; CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) ); }
__host__ void reduceToColumn(const SrcPtr& src, ResType* dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream) { const int BLOCK_SIZE_X = Policy::block_size_x; const int BLOCK_SIZE_Y = Policy::block_size_y; const int BLOCK_SIZE = BLOCK_SIZE_X * BLOCK_SIZE_Y; const dim3 block(BLOCK_SIZE); const dim3 grid(rows); reduceToColumn<Reductor, BLOCK_SIZE><<<grid, block, 0, stream>>>(src, dst, mask, cols); CV_CUDEV_SAFE_CALL( cudaGetLastError() ); if (stream == 0) CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); }