__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, const UnOp op) { typedef typename UnReadWriteTraits<T, D>::read_type read_type; typedef typename UnReadWriteTraits<T, D>::write_type write_type; const int shift = UnReadWriteTraits<T, D>::shift; const int x = threadIdx.x + blockIdx.x * blockDim.x; const int y = threadIdx.y + blockIdx.y * blockDim.y; const int x_shifted = x * shift; if (y < src_.rows) { const T* src = src_.ptr(y); D* dst = dst_.ptr(y); if (x_shifted + shift - 1 < src_.cols) { const read_type src_n_el = ((const read_type*)src)[x]; write_type dst_n_el; OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y); ((write_type*)dst)[x] = dst_n_el; } else { for (int real_x = x_shifted; real_x < src_.cols; ++real_x) { if (mask(y, real_x)) dst[real_x] = op(src[real_x]); } } } }
static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, const UnOp op) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; if (x < src.cols && y < src.rows && mask(y, x)) { dst.ptr(y)[x] = op(src.ptr(y)[x]); } }
static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, const BinOp op) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; if (x < src1.cols && y < src1.rows && mask(y, x)) { const T1 src1_data = src1.ptr(y)[x]; const T2 src2_data = src2.ptr(y)[x]; dst.ptr(y)[x] = op(src1_data, src2_data); } }
template<class T> static inline void bindTexture(const char* name, const DevMem2D_<T>& img/*, bool normalized = false, enum cudaTextureFilterMode filterMode = cudaFilterModePoint, enum cudaTextureAddressMode addrMode = cudaAddressModeClamp*/) { //!!!! const_cast is disabled! //!!!! Please use constructor of 'class texture' instead. //textureReference* tex; //cudaSafeCall( cudaGetTextureReference((const textureReference**)&tex, name) ); //tex->normalized = normalized; //tex->filterMode = filterMode; //tex->addressMode[0] = addrMode; //tex->addressMode[1] = addrMode; const textureReference* tex; cudaSafeCall( cudaGetTextureReference(&tex, name) ); cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>(); cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); }
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_, const Mask mask, const BinOp op) { typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1; typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2; typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type; const int shift = BinReadWriteTraits<T1, T2, D>::shift; const int x = threadIdx.x + blockIdx.x * blockDim.x; const int y = threadIdx.y + blockIdx.y * blockDim.y; const int x_shifted = x * shift; if (y < src1_.rows) { const T1* src1 = src1_.ptr(y); const T2* src2 = src2_.ptr(y); D* dst = dst_.ptr(y); if (x_shifted + shift - 1 < src1_.cols) { const read_type1 src1_n_el = ((const read_type1*)src1)[x]; const read_type2 src2_n_el = ((const read_type2*)src2)[x]; write_type dst_n_el; OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y); ((write_type*)dst)[x] = dst_n_el; } else { for (int real_x = x_shifted; real_x < src1_.cols; ++real_x) { if (mask(y, real_x)) dst[real_x] = op(src1[real_x], src2[real_x]); } } } }
template<class T> inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img) { cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>(); cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); }