Ejemplo n.º 1
0
void fft_c2r(Param<Tr> out, const af::dim4 oDataDims,
             CParam<Tc> in, const af::dim4 iDataDims,
             const af::dim4 odims)
{
    int t_dims[rank];
    int in_embed[rank];
    int out_embed[rank];

    computeDims<rank>(t_dims  , odims);
    computeDims<rank>(in_embed , iDataDims);
    computeDims<rank>(out_embed , oDataDims);

    const af::dim4 istrides = in.strides();
    const af::dim4 ostrides = out.strides();

    typedef typename fftw_real_transform<Tr, Tc>::ctype_t ctype_t;
    typename fftw_real_transform<Tr, Tc>::plan_t plan;

    fftw_real_transform<Tr, Tc> transform;

    int batch = 1;
    for (int i = rank; i < 4; i++) {
        batch *= odims[i];
    }

    plan = transform.create(rank,
                            t_dims,
                            (int)batch,
                            (ctype_t *)in.get(),
                            in_embed, (int)istrides[0],
                            (int)istrides[rank],
                            (Tr *)out.get(),
                            out_embed, (int)ostrides[0],
                            (int)ostrides[rank],
                            FFTW_ESTIMATE);

    transform.execute(plan);
    transform.destroy(plan);
}
Ejemplo n.º 2
0
    static void copy(Param<T> dst, CParam<T> src)
    {
        af::dim4 src_dims       = src.dims();
        af::dim4 dst_dims       = dst.dims();
        af::dim4 src_strides    = src.strides();
        af::dim4 dst_strides    = dst.strides();

        T const * src_ptr = src.get();
        T * dst_ptr       = dst.get();

        // find the major-most dimension, which is linear in both arrays
        int linear_end = 0;
        dim_t count = 1;
        while (linear_end < 4
                && count == src_strides[linear_end]
                && count == dst_strides[linear_end]) {
            count *= src_dims[linear_end];
            ++linear_end;
        }

        // traverse through the array using strides only until neccessary
        copy_go(dst_ptr, dst_strides, dst_dims, src_ptr, src_strides, src_dims, 3, linear_end);
    }
Ejemplo n.º 3
0
void transform(Param<T> output, CParam<T> input, CParam<float> transform,
               const bool inverse, const bool perspective,
               af_interp_type method) {
    typedef typename af::dtype_traits<T>::base_type BT;
    typedef wtype_t<BT> WT;

    const af::dim4 idims    = input.dims();
    const af::dim4 odims    = output.dims();
    const af::dim4 tdims    = transform.dims();
    const af::dim4 tstrides = transform.strides();
    const af::dim4 istrides = input.strides();
    const af::dim4 ostrides = output.strides();

    T *out          = output.get();
    const float *tf = transform.get();

    int batch_size = 1;
    if (idims[2] != tdims[2]) batch_size = idims[2];

    Interp2<T, WT, order> interp;
    for (int idw = 0; idw < (int)odims[3]; idw++) {
        dim_t out_offw = idw * ostrides[3];
        dim_t in_offw  = (idims[3] > 1) * idw * istrides[3];
        dim_t tf_offw  = (tdims[3] > 1) * idw * tstrides[3];

        for (int idz = 0; idz < (int)odims[2]; idz += batch_size) {
            dim_t out_offzw = out_offw + idz * ostrides[2];
            dim_t in_offzw  = in_offw + (idims[2] > 1) * idz * istrides[2];
            dim_t tf_offzw  = tf_offw + (tdims[2] > 1) * idz * tstrides[2];

            const float *tptr = tf + tf_offzw;

            float tmat[9];
            calc_transform_inverse(tmat, tptr, inverse, perspective,
                                   perspective ? 9 : 6);

            for (int idy = 0; idy < (int)odims[1]; idy++) {
                for (int idx = 0; idx < (int)odims[0]; idx++) {
                    WT xidi = idx * tmat[0] + idy * tmat[1] + tmat[2];
                    WT yidi = idx * tmat[3] + idy * tmat[4] + tmat[5];

                    if (perspective) {
                        WT W = idx * tmat[6] + idy * tmat[7] + tmat[8];
                        xidi /= W;
                        yidi /= W;
                    }

                    // FIXME: Nearest and lower do not do clamping, but other
                    // methods do Make it consistent
                    bool clamp = order != 1;
                    bool condX = xidi >= -0.0001 && xidi < idims[0];
                    bool condY = yidi >= -0.0001 && yidi < idims[1];

                    int ooff = out_offzw + idy * ostrides[1] + idx;
                    if (condX && condY) {
                        interp(output, ooff, input, in_offzw, xidi, yidi,
                               method, batch_size, clamp);
                    } else {
                        for (int n = 0; n < batch_size; n++) {
                            out[ooff + n * ostrides[2]] = scalar<T>(0);
                        }
                    }
                }
            }
        }
    }
}
Ejemplo n.º 4
0
void padBorders(Param<T> out, CParam<T> in, const dim4 lBoundPadSize,
                const dim4 uBoundPadSize, const af::borderType btype) {
    const dim4& oDims = out.dims();
    const dim4& oStrs = out.strides();
    const dim4& iDims = in.dims();
    const dim4& iStrs = in.strides();

    T const* const src = in.get();
    T* dst             = out.get();

    const dim4 validRegEnds(
        oDims[0] - uBoundPadSize[0], oDims[1] - uBoundPadSize[1],
        oDims[2] - uBoundPadSize[2], oDims[3] - uBoundPadSize[3]);
    const bool isInputLinear = iStrs[0] == 1;

    /*
     * VALID REGION COPYING DOES
     * NOT NEED ANY BOUND CHECKS
     * */
    for (dim_t l = lBoundPadSize[3]; l < validRegEnds[3]; ++l) {
        dim_t oLOff = oStrs[3] * l;
        dim_t iLOff = iStrs[3] * (l - lBoundPadSize[3]);

        for (dim_t k = lBoundPadSize[2]; k < validRegEnds[2]; ++k) {
            dim_t oKOff = oStrs[2] * k;
            dim_t iKOff = iStrs[2] * (k - lBoundPadSize[2]);

            for (dim_t j = lBoundPadSize[1]; j < validRegEnds[1]; ++j) {
                dim_t oJOff = oStrs[1] * j;
                dim_t iJOff = iStrs[1] * (j - lBoundPadSize[1]);

                if (isInputLinear) {
                    T const* const sptr = src + iLOff + iKOff + iJOff;
                    T* dptr = dst + oLOff + oKOff + oJOff + lBoundPadSize[0];

                    std::copy(sptr, sptr + iDims[0], dptr);
                } else {
                    for (dim_t i = lBoundPadSize[0]; i < validRegEnds[0]; ++i) {
                        dim_t oIOff = oStrs[0] * i;
                        dim_t iIOff = iStrs[0] * (i - lBoundPadSize[0]);

                        dst[oLOff + oKOff + oJOff + oIOff] =
                            src[iLOff + iKOff + iJOff + iIOff];
                    }
                }
            }  // second dimension loop
        }      // third dimension loop
    }          // fourth dimension loop

    // If we have to do zero padding,
    // just return as the output is filled with
    // zeros during allocation
    if (btype == AF_PAD_ZERO) return;

    /*
     * PADDED REGIONS NEED BOUND
     * CHECKS; FOLLOWING NESTED
     * LOOPS SHALL ONLY PROCESS
     * PADDED REGIONS AND SKIP REST
     * */
    for (dim_t l = 0; l < oDims[3]; ++l) {
        bool skipL  = (l >= lBoundPadSize[3] && l < validRegEnds[3]);
        dim_t oLOff = oStrs[3] * l;
        dim_t iLOff =
            iStrs[3] * idxByndEdge(l, lBoundPadSize[3], iDims[3], btype);
        for (dim_t k = 0; k < oDims[2]; ++k) {
            bool skipK  = (k >= lBoundPadSize[2] && k < validRegEnds[2]);
            dim_t oKOff = oStrs[2] * k;
            dim_t iKOff =
                iStrs[2] * idxByndEdge(k, lBoundPadSize[2], iDims[2], btype);
            for (dim_t j = 0; j < oDims[1]; ++j) {
                bool skipJ  = (j >= lBoundPadSize[1] && j < validRegEnds[1]);
                dim_t oJOff = oStrs[1] * j;
                dim_t iJOff = iStrs[1] *
                              idxByndEdge(j, lBoundPadSize[1], iDims[1], btype);
                for (dim_t i = 0; i < oDims[0]; ++i) {
                    bool skipI = (i >= lBoundPadSize[0] && i < validRegEnds[0]);
                    if (skipI && skipJ && skipK && skipL) continue;

                    dim_t oIOff = oStrs[0] * i;
                    dim_t iIOff = iStrs[0] * idxByndEdge(i, lBoundPadSize[0],
                                                         iDims[0], btype);

                    dst[oLOff + oKOff + oJOff + oIOff] =
                        src[iLOff + iKOff + iJOff + iIOff];

                }  // first dimension loop
            }      // second dimension loop
        }          // third dimension loop
    }              // fourth dimension loop
}