void fft_c2r(Param<Tr> out, const af::dim4 oDataDims, CParam<Tc> in, const af::dim4 iDataDims, const af::dim4 odims) { int t_dims[rank]; int in_embed[rank]; int out_embed[rank]; computeDims<rank>(t_dims , odims); computeDims<rank>(in_embed , iDataDims); computeDims<rank>(out_embed , oDataDims); const af::dim4 istrides = in.strides(); const af::dim4 ostrides = out.strides(); typedef typename fftw_real_transform<Tr, Tc>::ctype_t ctype_t; typename fftw_real_transform<Tr, Tc>::plan_t plan; fftw_real_transform<Tr, Tc> transform; int batch = 1; for (int i = rank; i < 4; i++) { batch *= odims[i]; } plan = transform.create(rank, t_dims, (int)batch, (ctype_t *)in.get(), in_embed, (int)istrides[0], (int)istrides[rank], (Tr *)out.get(), out_embed, (int)ostrides[0], (int)ostrides[rank], FFTW_ESTIMATE); transform.execute(plan); transform.destroy(plan); }
static void copy(Param<T> dst, CParam<T> src) { af::dim4 src_dims = src.dims(); af::dim4 dst_dims = dst.dims(); af::dim4 src_strides = src.strides(); af::dim4 dst_strides = dst.strides(); T const * src_ptr = src.get(); T * dst_ptr = dst.get(); // find the major-most dimension, which is linear in both arrays int linear_end = 0; dim_t count = 1; while (linear_end < 4 && count == src_strides[linear_end] && count == dst_strides[linear_end]) { count *= src_dims[linear_end]; ++linear_end; } // traverse through the array using strides only until neccessary copy_go(dst_ptr, dst_strides, dst_dims, src_ptr, src_strides, src_dims, 3, linear_end); }
void transform(Param<T> output, CParam<T> input, CParam<float> transform, const bool inverse, const bool perspective, af_interp_type method) { typedef typename af::dtype_traits<T>::base_type BT; typedef wtype_t<BT> WT; const af::dim4 idims = input.dims(); const af::dim4 odims = output.dims(); const af::dim4 tdims = transform.dims(); const af::dim4 tstrides = transform.strides(); const af::dim4 istrides = input.strides(); const af::dim4 ostrides = output.strides(); T *out = output.get(); const float *tf = transform.get(); int batch_size = 1; if (idims[2] != tdims[2]) batch_size = idims[2]; Interp2<T, WT, order> interp; for (int idw = 0; idw < (int)odims[3]; idw++) { dim_t out_offw = idw * ostrides[3]; dim_t in_offw = (idims[3] > 1) * idw * istrides[3]; dim_t tf_offw = (tdims[3] > 1) * idw * tstrides[3]; for (int idz = 0; idz < (int)odims[2]; idz += batch_size) { dim_t out_offzw = out_offw + idz * ostrides[2]; dim_t in_offzw = in_offw + (idims[2] > 1) * idz * istrides[2]; dim_t tf_offzw = tf_offw + (tdims[2] > 1) * idz * tstrides[2]; const float *tptr = tf + tf_offzw; float tmat[9]; calc_transform_inverse(tmat, tptr, inverse, perspective, perspective ? 9 : 6); for (int idy = 0; idy < (int)odims[1]; idy++) { for (int idx = 0; idx < (int)odims[0]; idx++) { WT xidi = idx * tmat[0] + idy * tmat[1] + tmat[2]; WT yidi = idx * tmat[3] + idy * tmat[4] + tmat[5]; if (perspective) { WT W = idx * tmat[6] + idy * tmat[7] + tmat[8]; xidi /= W; yidi /= W; } // FIXME: Nearest and lower do not do clamping, but other // methods do Make it consistent bool clamp = order != 1; bool condX = xidi >= -0.0001 && xidi < idims[0]; bool condY = yidi >= -0.0001 && yidi < idims[1]; int ooff = out_offzw + idy * ostrides[1] + idx; if (condX && condY) { interp(output, ooff, input, in_offzw, xidi, yidi, method, batch_size, clamp); } else { for (int n = 0; n < batch_size; n++) { out[ooff + n * ostrides[2]] = scalar<T>(0); } } } } } } }
void padBorders(Param<T> out, CParam<T> in, const dim4 lBoundPadSize, const dim4 uBoundPadSize, const af::borderType btype) { const dim4& oDims = out.dims(); const dim4& oStrs = out.strides(); const dim4& iDims = in.dims(); const dim4& iStrs = in.strides(); T const* const src = in.get(); T* dst = out.get(); const dim4 validRegEnds( oDims[0] - uBoundPadSize[0], oDims[1] - uBoundPadSize[1], oDims[2] - uBoundPadSize[2], oDims[3] - uBoundPadSize[3]); const bool isInputLinear = iStrs[0] == 1; /* * VALID REGION COPYING DOES * NOT NEED ANY BOUND CHECKS * */ for (dim_t l = lBoundPadSize[3]; l < validRegEnds[3]; ++l) { dim_t oLOff = oStrs[3] * l; dim_t iLOff = iStrs[3] * (l - lBoundPadSize[3]); for (dim_t k = lBoundPadSize[2]; k < validRegEnds[2]; ++k) { dim_t oKOff = oStrs[2] * k; dim_t iKOff = iStrs[2] * (k - lBoundPadSize[2]); for (dim_t j = lBoundPadSize[1]; j < validRegEnds[1]; ++j) { dim_t oJOff = oStrs[1] * j; dim_t iJOff = iStrs[1] * (j - lBoundPadSize[1]); if (isInputLinear) { T const* const sptr = src + iLOff + iKOff + iJOff; T* dptr = dst + oLOff + oKOff + oJOff + lBoundPadSize[0]; std::copy(sptr, sptr + iDims[0], dptr); } else { for (dim_t i = lBoundPadSize[0]; i < validRegEnds[0]; ++i) { dim_t oIOff = oStrs[0] * i; dim_t iIOff = iStrs[0] * (i - lBoundPadSize[0]); dst[oLOff + oKOff + oJOff + oIOff] = src[iLOff + iKOff + iJOff + iIOff]; } } } // second dimension loop } // third dimension loop } // fourth dimension loop // If we have to do zero padding, // just return as the output is filled with // zeros during allocation if (btype == AF_PAD_ZERO) return; /* * PADDED REGIONS NEED BOUND * CHECKS; FOLLOWING NESTED * LOOPS SHALL ONLY PROCESS * PADDED REGIONS AND SKIP REST * */ for (dim_t l = 0; l < oDims[3]; ++l) { bool skipL = (l >= lBoundPadSize[3] && l < validRegEnds[3]); dim_t oLOff = oStrs[3] * l; dim_t iLOff = iStrs[3] * idxByndEdge(l, lBoundPadSize[3], iDims[3], btype); for (dim_t k = 0; k < oDims[2]; ++k) { bool skipK = (k >= lBoundPadSize[2] && k < validRegEnds[2]); dim_t oKOff = oStrs[2] * k; dim_t iKOff = iStrs[2] * idxByndEdge(k, lBoundPadSize[2], iDims[2], btype); for (dim_t j = 0; j < oDims[1]; ++j) { bool skipJ = (j >= lBoundPadSize[1] && j < validRegEnds[1]); dim_t oJOff = oStrs[1] * j; dim_t iJOff = iStrs[1] * idxByndEdge(j, lBoundPadSize[1], iDims[1], btype); for (dim_t i = 0; i < oDims[0]; ++i) { bool skipI = (i >= lBoundPadSize[0] && i < validRegEnds[0]); if (skipI && skipJ && skipK && skipL) continue; dim_t oIOff = oStrs[0] * i; dim_t iIOff = iStrs[0] * idxByndEdge(i, lBoundPadSize[0], iDims[0], btype); dst[oLOff + oKOff + oJOff + oIOff] = src[iLOff + iKOff + iJOff + iIOff]; } // first dimension loop } // second dimension loop } // third dimension loop } // fourth dimension loop }