void diff2(Param<T> out, CParam<T> in, int const dim)
{
    af::dim4 dims = out.dims();
    // Bool for dimension
    bool is_dim0 = dim == 0;
    bool is_dim1 = dim == 1;
    bool is_dim2 = dim == 2;
    bool is_dim3 = dim == 3;

    T const * const inPtr = in.get();
    T * outPtr = out.get();

    // TODO: Improve this
    for(dim_t l = 0; l < dims[3]; l++) {
        for(dim_t k = 0; k < dims[2]; k++) {
            for(dim_t j = 0; j < dims[1]; j++) {
                for(dim_t i = 0; i < dims[0]; i++) {
                    // Operation: out[index] = in[index + 1 * dim_size] - in[index]
                    int idx = getIdx(in.strides(), i, j, k, l);
                    int jdx = getIdx(in.strides(),
                            i + is_dim0, j + is_dim1,
                            k + is_dim2, l + is_dim3);
                    int kdx = getIdx(in.strides(),
                            i + 2 * is_dim0, j + 2 * is_dim1,
                            k + 2 * is_dim2, l + 2 * is_dim3);
                    int odx = getIdx(out.strides(), i, j, k, l);
                    outPtr[odx] = inPtr[kdx] + inPtr[idx] - inPtr[jdx] - inPtr[jdx];
                }
            }
        }
    }
}
Exemple #2
0
void transpose(Param<T> output, CParam<T> input) {
    const dim4 odims    = output.dims();
    const dim4 ostrides = output.strides();
    const dim4 istrides = input.strides();

    T *out            = output.get();
    T const *const in = input.get();

    for (dim_t l = 0; l < odims[3]; ++l) {
        for (dim_t k = 0; k < odims[2]; ++k) {
            // Outermost loop handles batch mode
            // if input has no data along third dimension
            // this loop runs only once
            for (dim_t j = 0; j < odims[1]; ++j) {
                for (dim_t i = 0; i < odims[0]; ++i) {
                    // calculate array indices based on offsets and strides
                    // the helper getIdx takes care of indices
                    const dim_t inIdx  = getIdx(istrides, j, i, k, l);
                    const dim_t outIdx = getIdx(ostrides, i, j, k, l);
                    if (conjugate)
                        out[outIdx] = getConjugate(in[inIdx]);
                    else
                        out[outIdx] = in[inIdx];
                }
            }
            // outData and inData pointers doesn't need to be
            // offset as the getIdx function is taking care
            // of the batch parameter
        }
    }
}
Exemple #3
0
void transpose_inplace(Param<T> input) {
    const dim4 idims    = input.dims();
    const dim4 istrides = input.strides();

    T *in = input.get();

    for (dim_t l = 0; l < idims[3]; ++l) {
        for (dim_t k = 0; k < idims[2]; ++k) {
            // Outermost loop handles batch mode
            // if input has no data along third dimension
            // this loop runs only once
            //
            // Run only bottom triangle. std::swap swaps with upper triangle
            for (dim_t j = 0; j < idims[1]; ++j) {
                for (dim_t i = j + 1; i < idims[0]; ++i) {
                    // calculate array indices based on offsets and strides
                    // the helper getIdx takes care of indices
                    const dim_t iIdx = getIdx(istrides, j, i, k, l);
                    const dim_t oIdx = getIdx(istrides, i, j, k, l);
                    if (conjugate) {
                        in[iIdx] = getConjugate(in[iIdx]);
                        in[oIdx] = getConjugate(in[oIdx]);
                        std::swap(in[iIdx], in[oIdx]);
                    } else {
                        std::swap(in[iIdx], in[oIdx]);
                    }
                }
            }
        }
    }
}
Exemple #4
0
void wrap_dim(Param<T> out, CParam<T> in, const dim_t wx, const dim_t wy,
              const dim_t sx, const dim_t sy, const dim_t px, const dim_t py) {
    const T* inPtr = in.get();
    T* outPtr      = out.get();

    af::dim4 idims    = in.dims();
    af::dim4 odims    = out.dims();
    af::dim4 istrides = in.strides();
    af::dim4 ostrides = out.strides();

    dim_t nx = (odims[0] + 2 * px - wx) / sx + 1;

    for (dim_t w = 0; w < idims[3]; w++) {
        for (dim_t z = 0; z < idims[2]; z++) {
            dim_t cIn      = w * istrides[3] + z * istrides[2];
            dim_t cOut     = w * ostrides[3] + z * ostrides[2];
            const T* iptr_ = inPtr + cIn;
            T* optr        = outPtr + cOut;

            for (dim_t col = 0; col < idims[d]; col++) {
                // Offset output ptr
                const T* iptr = iptr_ + col * istrides[d];

                // Calculate input window index
                dim_t winy = (col / nx);
                dim_t winx = (col % nx);

                dim_t startx = winx * sx;
                dim_t starty = winy * sy;

                dim_t spx = startx - px;
                dim_t spy = starty - py;

                // Short cut condition ensuring all values within input
                // dimensions
                bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 &&
                             spy + wy < odims[1]);

                for (dim_t y = 0; y < wy; y++) {
                    for (dim_t x = 0; x < wx; x++) {
                        dim_t xpad = spx + x;
                        dim_t ypad = spy + y;

                        dim_t iloc = (y * wx + x);
                        if (d == 0) iloc *= istrides[1];

                        if (cond || (xpad >= 0 && xpad < odims[0] &&
                                     ypad >= 0 && ypad < odims[1])) {
                            dim_t oloc =
                                (ypad * ostrides[1] + xpad * ostrides[0]);
                            // FIXME: When using threads, atomize this
                            optr[oloc] += iptr[iloc];
                        }
                    }
                }
            }
        }
    }
}
void bilateral(Param<OutT> out, CParam<InT> in, float const s_sigma, float const c_sigma)
{
    af::dim4 const dims     = in.dims();
    af::dim4 const istrides = in.strides();
    af::dim4 const ostrides = out.strides();

    // clamp spatical and chromatic sigma's
    float space_       = std::min(11.5f, std::max(s_sigma, 0.f));
    float color_       = std::max(c_sigma, 0.f);
    dim_t const radius = std::max((dim_t)(space_ * 1.5f), (dim_t)1);
    float const svar   = space_*space_;
    float const cvar   = color_*color_;

    for(dim_t b3=0; b3<dims[3]; ++b3) {

        OutT *outData = out.get() + b3 * ostrides[3];
        InT const * inData  = in.get() + b3 * istrides[3];

        // b3 for loop handles following batch configurations
        //  - gfor
        //  - input based batch
        //      - when input is 4d array for color images
        for(dim_t b2=0; b2<dims[2]; ++b2) {
            // b2 for loop handles following batch configurations
            //  - channels
            //  - input based batch
            //      - when input is 3d array for grayscale images
            for(dim_t j=0; j<dims[1]; ++j) {
                // j steps along 2nd dimension
                for(dim_t i=0; i<dims[0]; ++i) {
                    // i steps along 1st dimension
                    OutT norm = 0.0;
                    OutT res  = 0.0;
                    OutT const center = (OutT)inData[getIdx(istrides, i, j)];
                    for(dim_t wj=-radius; wj<=radius; ++wj) {
                        // clamps offsets
                        dim_t tj = clamp(j+wj, dim_t(0), dims[1]-1);
                        for(dim_t wi=-radius; wi<=radius; ++wi) {
                            // clamps offsets
                            dim_t ti = clamp(i+wi, dim_t(0), dims[0]-1);
                            // proceed
                            OutT const val= (OutT)inData[getIdx(istrides, ti, tj)];
                            OutT const gauss_space = (wi*wi+wj*wj)/(-2.0*svar);
                            OutT const gauss_range = ((center-val)*(center-val))/(-2.0*cvar);
                            OutT const weight = std::exp(gauss_space+gauss_range);
                            norm += weight;
                            res += val*weight;
                        }
                    } // filter loop ends here

                    outData[getIdx(ostrides, i, j)] = res/norm;
                } //1st dimension loop ends here
            } //2nd dimension loop ends here
            outData += ostrides[2];
            inData  += istrides[2];
        }
    }
}
Exemple #6
0
void select(Param<T> out, CParam<char> cond, CParam<T> a, CParam<T> b) {
    af::dim4 adims    = a.dims();
    af::dim4 astrides = a.strides();
    af::dim4 bdims    = b.dims();
    af::dim4 bstrides = b.strides();

    af::dim4 cdims    = cond.dims();
    af::dim4 cstrides = cond.strides();

    af::dim4 odims    = out.dims();
    af::dim4 ostrides = out.strides();

    bool is_a_same[] = {adims[0] == odims[0], adims[1] == odims[1],
                        adims[2] == odims[2], adims[3] == odims[3]};

    bool is_b_same[] = {bdims[0] == odims[0], bdims[1] == odims[1],
                        bdims[2] == odims[2], bdims[3] == odims[3]};

    bool is_c_same[] = {cdims[0] == odims[0], cdims[1] == odims[1],
                        cdims[2] == odims[2], cdims[3] == odims[3]};

    const T *aptr    = a.get();
    const T *bptr    = b.get();
    T *optr          = out.get();
    const char *cptr = cond.get();

    for (int l = 0; l < odims[3]; l++) {
        int o_off3 = ostrides[3] * l;
        int a_off3 = astrides[3] * is_a_same[3] * l;
        int b_off3 = bstrides[3] * is_b_same[3] * l;
        int c_off3 = cstrides[3] * is_c_same[3] * l;

        for (int k = 0; k < odims[2]; k++) {
            int o_off2 = ostrides[2] * k + o_off3;
            int a_off2 = astrides[2] * is_a_same[2] * k + a_off3;
            int b_off2 = bstrides[2] * is_b_same[2] * k + b_off3;
            int c_off2 = cstrides[2] * is_c_same[2] * k + c_off3;

            for (int j = 0; j < odims[1]; j++) {
                int o_off1 = ostrides[1] * j + o_off2;
                int a_off1 = astrides[1] * is_a_same[1] * j + a_off2;
                int b_off1 = bstrides[1] * is_b_same[1] * j + b_off2;
                int c_off1 = cstrides[1] * is_c_same[1] * j + c_off2;

                for (int i = 0; i < odims[0]; i++) {
                    bool cval = is_c_same[0] ? cptr[c_off1 + i] : cptr[c_off1];
                    T aval    = is_a_same[0] ? aptr[a_off1 + i] : aptr[a_off1];
                    T bval    = is_b_same[0] ? bptr[b_off1 + i] : bptr[b_off1];
                    T oval    = cval ? aval : bval;
                    optr[o_off1 + i] = oval;
                }
            }
        }
    }
}
void diagExtract(Param<T> out, CParam<T> in, int const num)
{
    af::dim4 const odims = out.dims();
    af::dim4 const idims = in.dims();

    int const i_off = (num > 0) ? (num * in.strides(1)) : (-num);

    for (int l = 0; l < (int)odims[3]; l++) {

        for (int k = 0; k < (int)odims[2]; k++) {
            const T *iptr = in.get() + l * in.strides(3) + k * in.strides(2) + i_off;
            T *optr = out.get() + l * out.strides(3) + k * out.strides(2);

            for (int i = 0; i < (int)odims[0]; i++) {
                T val = scalar<T>(0);
                if (i < idims[0] && i < idims[1]) val =  iptr[i * in.strides(1) + i];
                optr[i] = val;
            }
        }
    }
}
void approx2(Param<InT> output, CParam<InT> input,
             CParam<LocT> xposition, CParam<LocT> yposition,
             float const offGrid, af_interp_type method)
{
    InT * out = output.get();
    const LocT *xpos = xposition.get();
    const LocT *ypos = yposition.get();

    af::dim4 const odims     = output.dims();
    af::dim4 const idims     = input.dims();
    af::dim4 const xdims     = xposition.dims();
    af::dim4 const ostrides  = output.strides();
    af::dim4 const istrides  = input.strides();
    af::dim4 const xstrides  = xposition.strides();
    af::dim4 const ystrides  = yposition.strides();

    Interp2<InT, LocT, order> interp;
    bool batch = !(xdims[2] == 1 && xdims[3] == 1);

    for(dim_t idw = 0; idw < odims[3]; idw++) {
        for(dim_t idz = 0; idz < odims[2]; idz++) {

            dim_t xoffzw = idw * xstrides[3] + idz * xstrides[2];
            dim_t yoffzw = idw * ystrides[3] + idz * ystrides[2];
            dim_t ooffzw = idw * ostrides[3] + idz * ostrides[2];
            dim_t ioffzw = idw * istrides[3] + idz * istrides[2];

            for(dim_t idy = 0; idy < odims[1]; idy++) {
                dim_t xoff = xoffzw * batch + idy * xstrides[1];
                dim_t yoff = yoffzw * batch + idy * ystrides[1];
                dim_t ooff = ooffzw         + idy * ostrides[1];

                for(dim_t idx = 0; idx < odims[0]; idx++) {

                    const LocT x = xpos[xoff + idx];
                    const LocT y = ypos[yoff + idx];

                    // FIXME: Only cubic interpolation is doing clamping
                    // We need to make it consistent across all methods
                    // Not changing the behavior because tests will fail
                    bool clamp = order == 3;

                    if (x < 0 || idims[0] < x + 1 ||
                        y < 0 || idims[1] < y + 1 ) {
                        out[ooff + idx] = scalar<InT>(offGrid);
                    } else {
                        interp(output, ooff + idx, input, ioffzw, x, y, method, 1, clamp);
                    }
                }
            }
        }
    }
}
void diagCreate(Param<T> out, CParam<T> in, int const num)
{
    int batch = in.dims(1);
    int size  = out.dims(0);

    T const * iptr = in.get();
    T * optr = out.get();

    for (int k = 0; k < batch; k++) {
        for (int j = 0; j < size; j++) {
            for (int i = 0; i < size; i++) {
                T val = scalar<T>(0);
                if (i == j - num) {
                    val = (num > 0) ? iptr[i] : iptr[j];
                }
                optr[i + j * out.strides(1)] = val;
            }
        }
        optr += out.strides(2);
        iptr += in.strides(1);
    }
}
void copyElemwise(Param<OutT> dst, CParam<InT> src, OutT default_value, double factor)
{
    af::dim4 src_dims       = src.dims();
    af::dim4 dst_dims       = dst.dims();
    af::dim4 src_strides    = src.strides();
    af::dim4 dst_strides    = dst.strides();

    InT const * const src_ptr = src.get();
    OutT * dst_ptr      = dst.get();

    dim_t trgt_l = std::min(dst_dims[3], src_dims[3]);
    dim_t trgt_k = std::min(dst_dims[2], src_dims[2]);
    dim_t trgt_j = std::min(dst_dims[1], src_dims[1]);
    dim_t trgt_i = std::min(dst_dims[0], src_dims[0]);

    for(dim_t l=0; l<dst_dims[3]; ++l) {

        dim_t src_loff = l*src_strides[3];
        dim_t dst_loff = l*dst_strides[3];
        bool isLvalid = l<trgt_l;

        for(dim_t k=0; k<dst_dims[2]; ++k) {

            dim_t src_koff = k*src_strides[2];
            dim_t dst_koff = k*dst_strides[2];
            bool isKvalid = k<trgt_k;

            for(dim_t j=0; j<dst_dims[1]; ++j) {

                dim_t src_joff = j*src_strides[1];
                dim_t dst_joff = j*dst_strides[1];
                bool isJvalid = j<trgt_j;

                for(dim_t i=0; i<dst_dims[0]; ++i) {
                    OutT temp = default_value;
                    if (isLvalid && isKvalid && isJvalid && i<trgt_i) {
                        dim_t src_idx = i*src_strides[0] + src_joff + src_koff + src_loff;
                        temp = OutT(src_ptr[src_idx])*OutT(factor);
                    }
                    dim_t dst_idx = i*dst_strides[0] + dst_joff + dst_koff + dst_loff;
                    dst_ptr[dst_idx] = temp;
                }
            }
        }
    }
}
void join(const int dim, Param<T> out, const std::vector<CParam<T>> inputs)
{
    af::dim4 zero(0,0,0,0);
    af::dim4 d = zero;
    switch(dim) {
        case 0:
            join_append<T, T, 0>(out.get(), inputs[0].get(), zero,
                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
            for(int i = 1; i < n_arrays; i++) {
                d += inputs[i - 1].dims();
                join_append<T, T, 0>(out.get(), inputs[i].get(), calcOffset<0>(d),
                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
            }
            break;
        case 1:
            join_append<T, T, 1>(out.get(), inputs[0].get(), zero,
                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
            for(int i = 1; i < n_arrays; i++) {
                d += inputs[i - 1].dims();
                join_append<T, T, 1>(out.get(), inputs[i].get(), calcOffset<1>(d),
                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
            }
            break;
        case 2:
            join_append<T, T, 2>(out.get(), inputs[0].get(), zero,
                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
            for(int i = 1; i < n_arrays; i++) {
                d += inputs[i - 1].dims();
                join_append<T, T, 2>(out.get(), inputs[i].get(), calcOffset<2>(d),
                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
            }
            break;
        case 3:
            join_append<T, T, 3>(out.get(), inputs[0].get(), zero,
                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
            for(int i = 1; i < n_arrays; i++) {
                d += inputs[i - 1].dims();
                join_append<T, T, 3>(out.get(), inputs[i].get(), calcOffset<3>(d),
                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
            }
            break;
    }
}
Exemple #12
0
void select_scalar(Param<T> out, CParam<char> cond, CParam<T> a,
                   const double b) {
    af::dim4 astrides = a.strides();
    af::dim4 adims    = a.dims();
    af::dim4 cstrides = cond.strides();
    af::dim4 cdims    = cond.dims();

    af::dim4 odims    = out.dims();
    af::dim4 ostrides = out.strides();

    const T *aptr    = a.get();
    T *optr          = out.get();
    const char *cptr = cond.get();

    bool is_a_same[] = {adims[0] == odims[0], adims[1] == odims[1],
                        adims[2] == odims[2], adims[3] == odims[3]};

    bool is_c_same[] = {cdims[0] == odims[0], cdims[1] == odims[1],
                        cdims[2] == odims[2], cdims[3] == odims[3]};

    for (int l = 0; l < odims[3]; l++) {
        int o_off3 = ostrides[3] * l;
        int a_off3 = astrides[3] * is_a_same[3] * l;
        int c_off3 = cstrides[3] * is_c_same[3] * l;

        for (int k = 0; k < odims[2]; k++) {
            int o_off2 = ostrides[2] * k + o_off3;
            int a_off2 = astrides[2] * is_a_same[2] * k + a_off3;
            int c_off2 = cstrides[2] * is_c_same[2] * k + c_off3;

            for (int j = 0; j < odims[1]; j++) {
                int o_off1 = ostrides[1] * j + o_off2;
                int a_off1 = astrides[1] * is_a_same[1] * j + a_off2;
                int c_off1 = cstrides[1] * is_c_same[1] * j + c_off2;

                for (int i = 0; i < odims[0]; i++) {
                    bool cval = is_c_same[0] ? cptr[c_off1 + i] : cptr[c_off1];
                    T aval    = is_a_same[0] ? aptr[a_off1 + i] : aptr[a_off1];
                    optr[o_off1 + i] = (flip ^ cval) ? aval : b;
                }
            }
        }
    }
}
Exemple #13
0
void lookup(Param<InT> out, CParam<InT> input, CParam<IndexT> indices,
            unsigned const dim) {
    const af::dim4 iDims    = input.dims();
    const af::dim4 oDims    = out.dims();
    const af::dim4 iStrides = input.strides();
    const af::dim4 oStrides = out.strides();
    const InT *inPtr        = input.get();
    const IndexT *idxPtr    = indices.get();

    InT *outPtr = out.get();

    for (dim_t l = 0; l < oDims[3]; ++l) {
        dim_t iLOff = iStrides[3] *
                      (dim == 3 ? trimIndex((dim_t)idxPtr[l], iDims[3]) : l);
        dim_t oLOff = l * oStrides[3];

        for (dim_t k = 0; k < oDims[2]; ++k) {
            dim_t iKOff =
                iStrides[2] *
                (dim == 2 ? trimIndex((dim_t)idxPtr[k], iDims[2]) : k);
            dim_t oKOff = k * oStrides[2];

            for (dim_t j = 0; j < oDims[1]; ++j) {
                dim_t iJOff =
                    iStrides[1] *
                    (dim == 1 ? trimIndex((dim_t)idxPtr[j], iDims[1]) : j);
                dim_t oJOff = j * oStrides[1];

                for (dim_t i = 0; i < oDims[0]; ++i) {
                    dim_t iIOff =
                        iStrides[0] *
                        (dim == 0 ? trimIndex((dim_t)idxPtr[i], iDims[0]) : i);
                    dim_t oIOff = i * oStrides[0];

                    outPtr[oLOff + oKOff + oJOff + oIOff] =
                        inPtr[iLOff + iKOff + iJOff + iIOff];
                }
            }
        }
    }
}
void join(Param<Tx> out, const int dim, CParam<Tx> first, CParam<Ty> second)
{
    Tx* outPtr = out.get();
    const Tx* fptr = first.get();
    const Ty* sptr = second.get();

    af::dim4 zero(0,0,0,0);
    const af::dim4 odims = out.dims();
    const af::dim4 fdims = first.dims();
    const af::dim4 sdims = second.dims();

    switch(dim) {
        case 0:
            join_append<Tx, Tx, 0>(outPtr, fptr, zero,
                                   odims, fdims, out.strides(), first.strides());
            join_append<Tx, Ty, 0>(outPtr, sptr, calcOffset<0>(fdims),
                                   odims, sdims, out.strides(), second.strides());
            break;
        case 1:
            join_append<Tx, Tx, 1>(outPtr, fptr, zero,
                                   odims, fdims, out.strides(), first.strides());
            join_append<Tx, Ty, 1>(outPtr, sptr, calcOffset<1>(fdims),
                                   odims, sdims, out.strides(), second.strides());
            break;
        case 2:
            join_append<Tx, Tx, 2>(outPtr, fptr, zero,
                                   odims, fdims, out.strides(), first.strides());
            join_append<Tx, Ty, 2>(outPtr, sptr, calcOffset<2>(fdims),
                                   odims, sdims, out.strides(), second.strides());
            break;
        case 3:
            join_append<Tx, Tx, 3>(outPtr, fptr, zero,
                                   odims, fdims, out.strides(), first.strides());
            join_append<Tx, Ty, 3>(outPtr, sptr, calcOffset<3>(fdims),
                                   odims, sdims, out.strides(), second.strides());
            break;
    }
}
Exemple #15
0
void fft_c2r(Param<Tr> out, const af::dim4 oDataDims,
             CParam<Tc> in, const af::dim4 iDataDims,
             const af::dim4 odims)
{
    int t_dims[rank];
    int in_embed[rank];
    int out_embed[rank];

    computeDims<rank>(t_dims  , odims);
    computeDims<rank>(in_embed , iDataDims);
    computeDims<rank>(out_embed , oDataDims);

    const af::dim4 istrides = in.strides();
    const af::dim4 ostrides = out.strides();

    typedef typename fftw_real_transform<Tr, Tc>::ctype_t ctype_t;
    typename fftw_real_transform<Tr, Tc>::plan_t plan;

    fftw_real_transform<Tr, Tc> transform;

    int batch = 1;
    for (int i = rank; i < 4; i++) {
        batch *= odims[i];
    }

    plan = transform.create(rank,
                            t_dims,
                            (int)batch,
                            (ctype_t *)in.get(),
                            in_embed, (int)istrides[0],
                            (int)istrides[rank],
                            (Tr *)out.get(),
                            out_embed, (int)ostrides[0],
                            (int)ostrides[rank],
                            FFTW_ESTIMATE);

    transform.execute(plan);
    transform.destroy(plan);
}
    static void copy(Param<T> dst, CParam<T> src)
    {
        af::dim4 src_dims       = src.dims();
        af::dim4 dst_dims       = dst.dims();
        af::dim4 src_strides    = src.strides();
        af::dim4 dst_strides    = dst.strides();

        T const * src_ptr = src.get();
        T * dst_ptr       = dst.get();

        // find the major-most dimension, which is linear in both arrays
        int linear_end = 0;
        dim_t count = 1;
        while (linear_end < 4
                && count == src_strides[linear_end]
                && count == dst_strides[linear_end]) {
            count *= src_dims[linear_end];
            ++linear_end;
        }

        // traverse through the array using strides only until neccessary
        copy_go(dst_ptr, dst_strides, dst_dims, src_ptr, src_strides, src_dims, 3, linear_end);
    }
Exemple #17
0
void fft_inplace(Param<T> in, const af::dim4 iDataDims)
{
    int t_dims[rank];
    int in_embed[rank];

    const af::dim4 idims = in.dims();

    computeDims<rank>(t_dims  , idims);
    computeDims<rank>(in_embed , iDataDims);

    const af::dim4 istrides = in.strides();

    typedef typename fftw_transform<T>::ctype_t ctype_t;
    typename fftw_transform<T>::plan_t plan;

    fftw_transform<T> transform;

    int batch = 1;
    for (int i = rank; i < 4; i++) {
        batch *= idims[i];
    }

    plan = transform.create(rank,
                            t_dims,
                            (int)batch,
                            (ctype_t *)in.get(),
                            in_embed, (int)istrides[0],
                            (int)istrides[rank],
                            (ctype_t *)in.get(),
                            in_embed, (int)istrides[0],
                            (int)istrides[rank],
                            direction ? FFTW_FORWARD : FFTW_BACKWARD,
                            FFTW_ESTIMATE);

    transform.execute(plan);
    transform.destroy(plan);
}
void padBorders(Param<T> out, CParam<T> in, const dim4 lBoundPadSize,
                const dim4 uBoundPadSize, const af::borderType btype) {
    const dim4& oDims = out.dims();
    const dim4& oStrs = out.strides();
    const dim4& iDims = in.dims();
    const dim4& iStrs = in.strides();

    T const* const src = in.get();
    T* dst             = out.get();

    const dim4 validRegEnds(
        oDims[0] - uBoundPadSize[0], oDims[1] - uBoundPadSize[1],
        oDims[2] - uBoundPadSize[2], oDims[3] - uBoundPadSize[3]);
    const bool isInputLinear = iStrs[0] == 1;

    /*
     * VALID REGION COPYING DOES
     * NOT NEED ANY BOUND CHECKS
     * */
    for (dim_t l = lBoundPadSize[3]; l < validRegEnds[3]; ++l) {
        dim_t oLOff = oStrs[3] * l;
        dim_t iLOff = iStrs[3] * (l - lBoundPadSize[3]);

        for (dim_t k = lBoundPadSize[2]; k < validRegEnds[2]; ++k) {
            dim_t oKOff = oStrs[2] * k;
            dim_t iKOff = iStrs[2] * (k - lBoundPadSize[2]);

            for (dim_t j = lBoundPadSize[1]; j < validRegEnds[1]; ++j) {
                dim_t oJOff = oStrs[1] * j;
                dim_t iJOff = iStrs[1] * (j - lBoundPadSize[1]);

                if (isInputLinear) {
                    T const* const sptr = src + iLOff + iKOff + iJOff;
                    T* dptr = dst + oLOff + oKOff + oJOff + lBoundPadSize[0];

                    std::copy(sptr, sptr + iDims[0], dptr);
                } else {
                    for (dim_t i = lBoundPadSize[0]; i < validRegEnds[0]; ++i) {
                        dim_t oIOff = oStrs[0] * i;
                        dim_t iIOff = iStrs[0] * (i - lBoundPadSize[0]);

                        dst[oLOff + oKOff + oJOff + oIOff] =
                            src[iLOff + iKOff + iJOff + iIOff];
                    }
                }
            }  // second dimension loop
        }      // third dimension loop
    }          // fourth dimension loop

    // If we have to do zero padding,
    // just return as the output is filled with
    // zeros during allocation
    if (btype == AF_PAD_ZERO) return;

    /*
     * PADDED REGIONS NEED BOUND
     * CHECKS; FOLLOWING NESTED
     * LOOPS SHALL ONLY PROCESS
     * PADDED REGIONS AND SKIP REST
     * */
    for (dim_t l = 0; l < oDims[3]; ++l) {
        bool skipL  = (l >= lBoundPadSize[3] && l < validRegEnds[3]);
        dim_t oLOff = oStrs[3] * l;
        dim_t iLOff =
            iStrs[3] * idxByndEdge(l, lBoundPadSize[3], iDims[3], btype);
        for (dim_t k = 0; k < oDims[2]; ++k) {
            bool skipK  = (k >= lBoundPadSize[2] && k < validRegEnds[2]);
            dim_t oKOff = oStrs[2] * k;
            dim_t iKOff =
                iStrs[2] * idxByndEdge(k, lBoundPadSize[2], iDims[2], btype);
            for (dim_t j = 0; j < oDims[1]; ++j) {
                bool skipJ  = (j >= lBoundPadSize[1] && j < validRegEnds[1]);
                dim_t oJOff = oStrs[1] * j;
                dim_t iJOff = iStrs[1] *
                              idxByndEdge(j, lBoundPadSize[1], iDims[1], btype);
                for (dim_t i = 0; i < oDims[0]; ++i) {
                    bool skipI = (i >= lBoundPadSize[0] && i < validRegEnds[0]);
                    if (skipI && skipJ && skipK && skipL) continue;

                    dim_t oIOff = oStrs[0] * i;
                    dim_t iIOff = iStrs[0] * idxByndEdge(i, lBoundPadSize[0],
                                                         iDims[0], btype);

                    dst[oLOff + oKOff + oJOff + oIOff] =
                        src[iLOff + iKOff + iJOff + iIOff];

                }  // first dimension loop
            }      // second dimension loop
        }          // third dimension loop
    }              // fourth dimension loop
}
Exemple #19
0
void transform(Param<T> output, CParam<T> input, CParam<float> transform,
               const bool inverse, const bool perspective,
               af_interp_type method) {
    typedef typename af::dtype_traits<T>::base_type BT;
    typedef wtype_t<BT> WT;

    const af::dim4 idims    = input.dims();
    const af::dim4 odims    = output.dims();
    const af::dim4 tdims    = transform.dims();
    const af::dim4 tstrides = transform.strides();
    const af::dim4 istrides = input.strides();
    const af::dim4 ostrides = output.strides();

    T *out          = output.get();
    const float *tf = transform.get();

    int batch_size = 1;
    if (idims[2] != tdims[2]) batch_size = idims[2];

    Interp2<T, WT, order> interp;
    for (int idw = 0; idw < (int)odims[3]; idw++) {
        dim_t out_offw = idw * ostrides[3];
        dim_t in_offw  = (idims[3] > 1) * idw * istrides[3];
        dim_t tf_offw  = (tdims[3] > 1) * idw * tstrides[3];

        for (int idz = 0; idz < (int)odims[2]; idz += batch_size) {
            dim_t out_offzw = out_offw + idz * ostrides[2];
            dim_t in_offzw  = in_offw + (idims[2] > 1) * idz * istrides[2];
            dim_t tf_offzw  = tf_offw + (tdims[2] > 1) * idz * tstrides[2];

            const float *tptr = tf + tf_offzw;

            float tmat[9];
            calc_transform_inverse(tmat, tptr, inverse, perspective,
                                   perspective ? 9 : 6);

            for (int idy = 0; idy < (int)odims[1]; idy++) {
                for (int idx = 0; idx < (int)odims[0]; idx++) {
                    WT xidi = idx * tmat[0] + idy * tmat[1] + tmat[2];
                    WT yidi = idx * tmat[3] + idy * tmat[4] + tmat[5];

                    if (perspective) {
                        WT W = idx * tmat[6] + idy * tmat[7] + tmat[8];
                        xidi /= W;
                        yidi /= W;
                    }

                    // FIXME: Nearest and lower do not do clamping, but other
                    // methods do Make it consistent
                    bool clamp = order != 1;
                    bool condX = xidi >= -0.0001 && xidi < idims[0];
                    bool condY = yidi >= -0.0001 && yidi < idims[1];

                    int ooff = out_offzw + idy * ostrides[1] + idx;
                    if (condX && condY) {
                        interp(output, ooff, input, in_offzw, xidi, yidi,
                               method, batch_size, clamp);
                    } else {
                        for (int n = 0; n < batch_size; n++) {
                            out[ooff + n * ostrides[2]] = scalar<T>(0);
                        }
                    }
                }
            }
        }
    }
}