示例#1
0
    Array<T> resize(const Array<T> &in, const dim_type odim0, const dim_type odim1,
                    const af_interp_type method)
    {
        if ((std::is_same<T, double>::value || std::is_same<T, cdouble>::value) &&
            !isDoubleSupported(getActiveDeviceId())) {
            OPENCL_NOT_SUPPORTED();
        }
        const af::dim4 iDims = in.dims();
        af::dim4 oDims(odim0, odim1, iDims[2], iDims[3]);

        if(iDims.elements() == 0 || oDims.elements() == 0) {
            throw std::runtime_error("Elements is 0");
        }

        Array<T> out = createEmptyArray<T>(oDims);

        switch(method) {
            case AF_INTERP_NEAREST:
                kernel::resize<T, AF_INTERP_NEAREST> (out, in);
                break;
            case AF_INTERP_BILINEAR:
                kernel::resize<T, AF_INTERP_BILINEAR>(out, in);
                break;
            default:
                break;
        }
        return out;
    }
示例#2
0
Array<in_t>* arrayIndex(const Array<in_t> &input, const Array<idx_t> &indices, const unsigned dim)
{
    const dim4 iDims = input.dims();
    const dim4 iStrides = input.strides();

    const in_t *inPtr = input.get();
    const idx_t *idxPtr = indices.get();

    dim4 oDims(1);
    for (dim_type d=0; d<4; ++d)
        oDims[d] = (d==int(dim) ? indices.elements() : iDims[d]);

    Array<in_t>* out = createEmptyArray<in_t>(oDims);

    dim4 oStrides = out->strides();

    in_t *outPtr = out->get();

    for (dim_type l=0; l<oDims[3]; ++l) {

        dim_type iLOff = iStrides[3]*(dim==3 ? trimIndex((dim_type)idxPtr[l], iDims[3]): l);
        dim_type oLOff = l*oStrides[3];

        for (dim_type k=0; k<oDims[2]; ++k) {

            dim_type iKOff = iStrides[2]*(dim==2 ? trimIndex((dim_type)idxPtr[k], iDims[2]): k);
            dim_type oKOff = k*oStrides[2];

            for (dim_type j=0; j<oDims[1]; ++j) {

                dim_type iJOff = iStrides[1]*(dim==1 ? trimIndex((dim_type)idxPtr[j], iDims[1]): j);
                dim_type oJOff = j*oStrides[1];

                for (dim_type i=0; i<oDims[0]; ++i) {

                    dim_type iIOff = iStrides[0]*(dim==0 ? trimIndex((dim_type)idxPtr[i], iDims[0]): i);
                    dim_type oIOff = i*oStrides[0];

                    outPtr[oLOff+oKOff+oJOff+oIOff] = inPtr[iLOff+iKOff+iJOff+iIOff];
                }
            }
        }
    }

    return out;
}
示例#3
0
    Array<T> *reorder(const Array<T> &in, const af::dim4 &rdims)
    {
        if ((std::is_same<T, double>::value || std::is_same<T, cdouble>::value) &&
            !isDoubleSupported(getActiveDeviceId())) {
            OPENCL_NOT_SUPPORTED();
        }
        const af::dim4 iDims = in.dims();
        af::dim4 oDims(0);
        for(int i = 0; i < 4; i++)
            oDims[i] = iDims[rdims[i]];

        Array<T> *out = createEmptyArray<T>(oDims);

        kernel::reorder<T>(*out, in, rdims.get());

        return out;
    }
示例#4
0
Array<T> resize(const Array<T> &in, const dim_t odim0, const dim_t odim1,
                const af_interp_type method) {
    const af::dim4 iDims = in.dims();
    af::dim4 oDims(odim0, odim1, iDims[2], iDims[3]);

    Array<T> out = createEmptyArray<T>(oDims);

    switch (method) {
        case AF_INTERP_NEAREST:
            kernel::resize<T, AF_INTERP_NEAREST>(out, in);
            break;
        case AF_INTERP_BILINEAR:
            kernel::resize<T, AF_INTERP_BILINEAR>(out, in);
            break;
        case AF_INTERP_LOWER:
            kernel::resize<T, AF_INTERP_LOWER>(out, in);
            break;
        default: break;
    }
    return out;
}
示例#5
0
Array<in_t> lookup(const Array<in_t> &input,
                   const Array<idx_t> &indices, const unsigned dim)
{
    const dim4 iDims = input.dims();

    dim4 oDims(1);
    for (int d=0; d<4; ++d)
        oDims[d] = (d==int(dim) ? indices.elements() : iDims[d]);

    Array<in_t> out = createEmptyArray<in_t>(oDims);

    dim_t nDims = iDims.ndims();

    switch(dim) {
        case 0: kernel::lookup<in_t, idx_t, 0>(out, input, indices, nDims); break;
        case 1: kernel::lookup<in_t, idx_t, 1>(out, input, indices, nDims); break;
        case 2: kernel::lookup<in_t, idx_t, 2>(out, input, indices, nDims); break;
        case 3: kernel::lookup<in_t, idx_t, 3>(out, input, indices, nDims); break;
    }

    return out;
}
示例#6
0
Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
                     const bool expand, ConvolveBatchKind kind)
{
    const af::dim4 sd = signal.dims();
    const af::dim4 fd = filter.dims();

    dim_t fftScale = 1;

    af::dim4 packed_dims;
    int fft_dims[baseDim];
    af::dim4 sig_tmp_dims, sig_tmp_strides;
    af::dim4 filter_tmp_dims, filter_tmp_strides;

    // Pack both signal and filter on same memory array, this will ensure
    // better use of batched cuFFT capabilities
    for (dim_t k = 0; k < 4; k++) {
        if (k < baseDim)
            packed_dims[k] = nextpow2((unsigned)(sd[k] + fd[k] - 1));
        else if (k == baseDim)
            packed_dims[k] = sd[k] + fd[k];
        else
            packed_dims[k] = 1;

        if (k < baseDim) {
            fft_dims[baseDim-k-1] = (k == 0) ? packed_dims[k] / 2 : packed_dims[k];
            fftScale *= fft_dims[baseDim-k-1];
        }
    }

    Array<convT> packed = createEmptyArray<convT>(packed_dims);
    convT *packed_ptr = packed.get();

    const af::dim4 packed_strides = packed.strides();

    sig_tmp_dims[0]    = filter_tmp_dims[0] = packed_dims[0];
    sig_tmp_strides[0] = filter_tmp_strides[0] = 1;

    for (dim_t k = 1; k < 4; k++) {
        if (k < baseDim) {
            sig_tmp_dims[k]    = packed_dims[k];
            filter_tmp_dims[k] = packed_dims[k];
        }
        else {
            sig_tmp_dims[k]    = sd[k];
            filter_tmp_dims[k] = fd[k];
        }

        sig_tmp_strides[k]    = sig_tmp_strides[k - 1] * sig_tmp_dims[k - 1];
        filter_tmp_strides[k] = filter_tmp_strides[k - 1] * filter_tmp_dims[k - 1];
    }

    // Calculate memory offsets for packed signal and filter
    convT *sig_tmp_ptr    = packed_ptr;
    convT *filter_tmp_ptr = packed_ptr + sig_tmp_strides[3] * sig_tmp_dims[3];

    // Number of packed complex elements in dimension 0
    dim_t sig_half_d0 = divup(sd[0], 2);

    // Pack signal in a complex matrix where first dimension is half the input
    // (allows faster FFT computation) and pad array to a power of 2 with 0s
    packData<convT, T>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, signal);

    // Pad filter array with 0s
    padArray<convT, T>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, filter);

    // Compute forward FFT
    if (isDouble) {
        fftw_plan plan = fftw_plan_many_dft(baseDim,
                                            fft_dims,
                                            packed_dims[baseDim],
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            FFTW_FORWARD,
                                            FFTW_ESTIMATE);

        fftw_execute(plan);
        fftw_destroy_plan(plan);
    }
    else {
        fftwf_plan plan = fftwf_plan_many_dft(baseDim,
                                              fft_dims,
                                              packed_dims[baseDim],
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              FFTW_FORWARD,
                                              FFTW_ESTIMATE);

        fftwf_execute(plan);
        fftwf_destroy_plan(plan);
    }

    // Multiply filter and signal FFT arrays
    if (kind == ONE2MANY)
        complexMultiply<convT>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               kind);
    else
        complexMultiply<convT>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               kind);

    // Compute inverse FFT
    if (isDouble) {
        fftw_plan plan = fftw_plan_many_dft(baseDim,
                                            fft_dims,
                                            packed_dims[baseDim],
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            FFTW_BACKWARD,
                                            FFTW_ESTIMATE);

        fftw_execute(plan);
        fftw_destroy_plan(plan);
    }
    else {
        fftwf_plan plan = fftwf_plan_many_dft(baseDim,
                                              fft_dims,
                                              packed_dims[baseDim],
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              FFTW_BACKWARD,
                                              FFTW_ESTIMATE);

        fftwf_execute(plan);
        fftwf_destroy_plan(plan);
    }

    // Compute output dimensions
    dim4 oDims(1);
    if (expand) {
        for(dim_t d=0; d<4; ++d) {
            if (kind==ONE2ONE || kind==ONE2MANY) {
                oDims[d] = sd[d]+fd[d]-1;
            } else {
                oDims[d] = (d<baseDim ? sd[d]+fd[d]-1 : sd[d]);
            }
        }
    } else {
        oDims = sd;
        if (kind==ONE2MANY) {
            for (dim_t i=baseDim; i<4; ++i)
                oDims[i] = fd[i];
        }
    }

    Array<T> out = createEmptyArray<T>(oDims);
    T* out_ptr = out.get();
    const af::dim4 out_dims = out.dims();
    const af::dim4 out_strides = out.strides();

    const af::dim4 filter_dims = filter.dims();

    // Reorder the output
    if (kind == ONE2MANY) {
        reorderOutput<T, convT, roundOut>
            (out_ptr, out_dims, out_strides,
             filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
             filter_dims, sig_half_d0, baseDim, fftScale, expand);
    }
    else {
        reorderOutput<T, convT, roundOut>
            (out_ptr, out_dims, out_strides,
             sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
             filter_dims, sig_half_d0, baseDim, fftScale, expand);
    }

    return out;
}