Example #1
0
    Array<T> wrap(const Array<T> &in,
                  const dim_t ox, const dim_t oy,
                  const dim_t wx, const dim_t wy,
                  const dim_t sx, const dim_t sy,
                  const dim_t px, const dim_t py,
                  const bool is_column)
    {
        af::dim4 idims = in.dims();
        af::dim4 odims(ox, oy, idims[2], idims[3]);
        Array<T> out = createValueArray<T>(odims, scalar<T>(0));

        const T *inPtr = in.get();
        T *outPtr = out.get();

        af::dim4 istrides = in.strides();
        af::dim4 ostrides = out.strides();

        if (is_column) {
            wrap_dim<T, true >(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
        } else {
            wrap_dim<T, false>(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
        }

        return out;
    }
Example #2
0
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
{
    float step = (maxval - minval)/(float)nbins;

    const dim4 inDims  = in.dims();
    dim4 iStrides      = in.strides();
    dim4 outDims       = dim4(nbins,1,inDims[2],inDims[3]);
    Array<outType> out = createValueArray<outType>(outDims, outType(0));
    dim4 oStrides      = out.strides();
    dim_t nElems    = inDims[0]*inDims[1];

    outType *outData    = out.get();
    const inType* inData= in.get();

    for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
        for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
            for(dim_t i=0; i<nElems; i++) {
                int bin = (int)((inData[i] - minval) / step);
                bin = std::max(bin, 0);
                bin = std::min(bin, (int)(nbins - 1));
                outData[bin]++;
            }
            inData  += iStrides[2];
            outData += oStrides[2];
        }
    }

    return out;
}
Example #3
0
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                 const Array<T> &b, const af_mat_prop options)
{
    if(OpenCLCPUOffload()) {
        return cpu::solveLU(A, pivot, b, options);
    }

    int N = A.dims()[0];
    int NRHS = b.dims()[1];

    std::vector<int> ipiv(N);
    copyData(&ipiv[0], pivot);

    Array< T > B = copyArray<T>(b);

    const cl::Buffer *A_buf = A.get();
    cl::Buffer *B_buf = B.get();

    int info = 0;
    magma_getrs_gpu<T>(MagmaNoTrans, N, NRHS,
                       (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0],
                       (*B_buf)(), B.getOffset(), B.strides()[1],
                       getQueue()(), &info);
    return B;
}
Example #4
0
void transpose(Array<T> output, const Array<T> input)
{
    const dim4 odims    = output.dims();
    const dim4 ostrides = output.strides();
    const dim4 istrides = input.strides();

    T * out = output.get();
    T const * const in = input.get();

    for (dim_t l = 0; l < odims[3]; ++l) {
        for (dim_t k = 0; k < odims[2]; ++k) {
            // Outermost loop handles batch mode
            // if input has no data along third dimension
            // this loop runs only once
            for (dim_t j = 0; j < odims[1]; ++j) {
                for (dim_t i = 0; i < odims[0]; ++i) {
                    // calculate array indices based on offsets and strides
                    // the helper getIdx takes care of indices
                    const dim_t inIdx  = getIdx(istrides,j,i,k,l);
                    const dim_t outIdx = getIdx(ostrides,i,j,k,l);
                    if(conjugate)
                        out[outIdx] = getConjugate(in[inIdx]);
                    else
                        out[outIdx] = in[inIdx];
                }
            }
            // outData and inData pointers doesn't need to be
            // offset as the getIdx function is taking care
            // of the batch parameter
        }
    }
}
Example #5
0
Array<T> generalSolve(const Array<T> &a, const Array<T> &b)
{

    dim4 iDims = a.dims();
    int M = iDims[0];
    int N = iDims[1];
    int MN = std::min(M, N);
    std::vector<int> ipiv(MN);

    Array<T> A = copyArray<T>(a);
    Array<T> B = copyArray<T>(b);

    cl::Buffer *A_buf = A.get();
    int info = 0;
    magma_getrf_gpu<T>(M, N, (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0], getQueue()(), &info);

    cl::Buffer *B_buf = B.get();
    int K = B.dims()[1];
    magma_getrs_gpu<T>(MagmaNoTrans, M, K,
                       (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0],
                       (*B_buf)(), B.getOffset(), B.strides()[1],
                       getQueue()(), &info);
    return B;
}
Example #6
0
    Array<T> diagCreate(const Array<T> &in, const int num)
    {
        int size = in.dims()[0] + std::abs(num);
        int batch = in.dims()[1];
        Array<T> out = createEmptyArray<T>(dim4(size, size, batch));

        const T *iptr = in.get();
        T *optr = out.get();

        for (int k = 0; k < batch; k++) {
            for (int j = 0; j < size; j++) {
                for (int i = 0; i < size; i++) {
                    T val = scalar<T>(0);
                    if (i == j - num) {
                        val = (num > 0) ? iptr[i] : iptr[j];
                    }
                    optr[i + j * out.strides()[1]] = val;
                }
            }
            optr += out.strides()[2];
            iptr += in.strides()[1];
        }

        return out;
    }
Example #7
0
void morph3d(Array<T> out, Array<T> const in, Array<T> const mask)
{
    const af::dim4 dims     = in.dims();
    const af::dim4 window   = mask.dims();
    const dim_t R0      = window[0]/2;
    const dim_t R1      = window[1]/2;
    const dim_t R2      = window[2]/2;
    const af::dim4 istrides = in.strides();
    const af::dim4 fstrides = mask.strides();
    const dim_t bCount  = dims[3];
    const af::dim4 ostrides = out.strides();
    T* outData          = out.get();
    const T*   inData   = in.get();
    const T*   filter   = mask.get();

    for(dim_t batchId=0; batchId<bCount; ++batchId) {
        // either channels or batch is handled by outer most loop
        for(dim_t k=0; k<dims[2]; ++k) {
            // k steps along 3rd dimension
            for(dim_t j=0; j<dims[1]; ++j) {
                // j steps along 2nd dimension
                for(dim_t i=0; i<dims[0]; ++i) {
                    // i steps along 1st dimension
                    T filterResult = inData[ getIdx(istrides, i, j, k) ];

                    // wk, wj,wi steps along 2nd & 1st dimensions of filter window respectively
                    for(dim_t wk=0; wk<window[2]; wk++) {
                        for(dim_t wj=0; wj<window[1]; wj++) {
                            for(dim_t wi=0; wi<window[0]; wi++) {

                                dim_t offk = k+wk-R2;
                                dim_t offj = j+wj-R1;
                                dim_t offi = i+wi-R0;

                                T maskValue = filter[ getIdx(fstrides, wi, wj, wk) ];

                                if ((maskValue > (T)0) && offi>=0 && offj>=0 && offk>=0 &&
                                        offi<dims[0] && offj<dims[1] && offk<dims[2]) {

                                    T inValue   = inData[ getIdx(istrides, offi, offj, offk) ];

                                    if (IsDilation)
                                        filterResult = std::max(filterResult, inValue);
                                    else
                                        filterResult = std::min(filterResult, inValue);
                                }

                            } // window 1st dimension loop ends here
                        }  // window 1st dimension loop ends here
                    }// filter window loop ends here

                    outData[ getIdx(ostrides, i, j, k) ] = filterResult;
                } //1st dimension loop ends here
            } // 2nd dimension loop ends here
        } // 3rd dimension loop ends here
        // next iteration will be next batch if any
        outData += ostrides[3];
        inData  += istrides[3];
    }
}
Example #8
0
void select(Array<T> out, const Array<char> cond, const Array<T> a, const Array<T> b)
{
    af::dim4 adims = a.dims();
    af::dim4 astrides = a.strides();
    af::dim4 bdims = b.dims();
    af::dim4 bstrides = b.strides();

    af::dim4 cdims = cond.dims();
    af::dim4 cstrides = cond.strides();

    af::dim4 odims = out.dims();
    af::dim4 ostrides = out.strides();

    bool is_a_same[] = {adims[0] == odims[0], adims[1] == odims[1],
        adims[2] == odims[2], adims[3] == odims[3]};

    bool is_b_same[] = {bdims[0] == odims[0], bdims[1] == odims[1],
        bdims[2] == odims[2], bdims[3] == odims[3]};

    bool is_c_same[] = {cdims[0] == odims[0], cdims[1] == odims[1],
        cdims[2] == odims[2], cdims[3] == odims[3]};

    const T *aptr = a.get();
    const T *bptr = b.get();
    T *optr = out.get();
    const char *cptr = cond.get();

    for (int l = 0; l < odims[3]; l++) {

        int o_off3   = ostrides[3] * l;
        int a_off3   = astrides[3] * is_a_same[3] * l;
        int b_off3   = bstrides[3] * is_b_same[3] * l;
        int c_off3   = cstrides[3] * is_c_same[3] * l;

        for (int k = 0; k < odims[2]; k++) {

            int o_off2   = ostrides[2] * k + o_off3;
            int a_off2   = astrides[2] * is_a_same[2] * k + a_off3;
            int b_off2   = bstrides[2] * is_b_same[2] * k + b_off3;
            int c_off2   = cstrides[2] * is_c_same[2] * k + c_off3;

            for (int j = 0; j < odims[1]; j++) {

                int o_off1   = ostrides[1] * j + o_off2;
                int a_off1   = astrides[1] * is_a_same[1] * j + a_off2;
                int b_off1   = bstrides[1] * is_b_same[1] * j + b_off2;
                int c_off1   = cstrides[1] * is_c_same[1] * j + c_off2;

                for (int i = 0; i < odims[0]; i++) {

                    bool cval = is_c_same[0] ? cptr[c_off1 + i] : cptr[c_off1];
                    T    aval = is_a_same[0] ? aptr[a_off1 + i] : aptr[a_off1];
                    T    bval = is_b_same[0] ? bptr[b_off1 + i] : bptr[b_off1];
                    T    oval = cval ? aval : bval;
                    optr[o_off1 + i] = oval;
                }
            }
        }
    }
}
Example #9
0
void histogram(Array<OutT> out, Array<InT> const in,
               unsigned const nbins, double const minval, double const maxval)
{
    dim4 const outDims   = out.dims();
    float const step     = (maxval - minval)/(float)nbins;
    dim4 const inDims    = in.dims();
    dim4 const iStrides  = in.strides();
    dim4 const oStrides  = out.strides();
    dim_t const nElems   = inDims[0]*inDims[1];

    OutT *outData    = out.get();
    const InT* inData= in.get();

    for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
        for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
            for(dim_t i=0; i<nElems; i++) {
                int idx = IsLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
                int bin = (int)((inData[idx] - minval) / step);
                bin = std::max(bin, 0);
                bin = std::min(bin, (int)(nbins - 1));
                outData[bin]++;
            }
            inData  += iStrides[2];
            outData += oStrides[2];
        }
    }
}
Example #10
0
    Array<Ty> *approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1,
                       const af_interp_type method, const float offGrid)
    {
        af::dim4 odims = in.dims();
        odims[0] = pos0.dims()[0];
        odims[1] = pos0.dims()[1];

        // Create output placeholder
        Array<Ty> *out = createEmptyArray<Ty>(odims);

        switch(method) {
            case AF_INTERP_NEAREST:
                approx2_<Ty, Tp, AF_INTERP_NEAREST>
                        (out->get(), out->dims(), out->elements(),
                         in.get(), in.dims(), in.elements(),
                         pos0.get(), pos0.dims(), pos1.get(), pos1.dims(),
                         out->strides(), in.strides(), pos0.strides(), pos1.strides(),
                         offGrid);
                break;
            case AF_INTERP_LINEAR:
                approx2_<Ty, Tp, AF_INTERP_LINEAR>
                        (out->get(), out->dims(), out->elements(),
                         in.get(), in.dims(), in.elements(),
                         pos0.get(), pos0.dims(), pos1.get(), pos1.dims(),
                         out->strides(), in.strides(), pos0.strides(), pos1.strides(),
                         offGrid);
                break;
            default:
                break;
        }
        return out;
    }
Example #11
0
Array<T> morph(const Array<T> &in, const Array<T> &mask)
{
    const dim4 dims       = in.dims();
    const dim4 window     = mask.dims();
    const dim_t R0     = window[0]/2;
    const dim_t R1     = window[1]/2;
    const dim4 istrides   = in.strides();
    const dim4 fstrides   = mask.strides();

    Array<T> out         = createEmptyArray<T>(dims);
    const dim4 ostrides   = out.strides();

    T* outData            = out.get();
    const T*   inData     = in.get();
    const T*   filter     = mask.get();

    for(dim_t b3=0; b3<dims[3]; ++b3) {
        for(dim_t b2=0; b2<dims[2]; ++b2) {
            // either channels or batch is handled by outer most loop
            for(dim_t j=0; j<dims[1]; ++j) {
                // j steps along 2nd dimension
                for(dim_t i=0; i<dims[0]; ++i) {
                    // i steps along 1st dimension
                    T filterResult = inData[ getIdx(istrides, i, j) ];

                    // wj,wi steps along 2nd & 1st dimensions of filter window respectively
                    for(dim_t wj=0; wj<window[1]; wj++) {
                        for(dim_t wi=0; wi<window[0]; wi++) {

                            dim_t offj = j+wj-R1;
                            dim_t offi = i+wi-R0;

                            T maskValue = filter[ getIdx(fstrides, wi, wj) ];

                            if ((maskValue > (T)0) && offi>=0 && offj>=0 && offi<dims[0] && offj<dims[1]) {

                                T inValue   = inData[ getIdx(istrides, offi, offj) ];

                                if (isDilation)
                                    filterResult = std::max(filterResult, inValue);
                                else
                                    filterResult = std::min(filterResult, inValue);
                            }

                        } // window 1st dimension loop ends here
                    } // filter window loop ends here

                    outData[ getIdx(ostrides, i, j) ] = filterResult;
                } //1st dimension loop ends here
            } // 2nd dimension loop ends here

            // next iteration will be next batch if any
            outData += ostrides[2];
            inData  += istrides[2];
        }
    }

    return out;
}
Example #12
0
void wrap_dim(Array<T> out, const Array<T> in, const dim_t wx, const dim_t wy,
              const dim_t sx, const dim_t sy, const dim_t px, const dim_t py)
{
    const T *inPtr = in.get();
    T *outPtr      = out.get();

    af::dim4 idims    = in.dims();
    af::dim4 odims    = out.dims();
    af::dim4 istrides = in.strides();
    af::dim4 ostrides = out.strides();

    dim_t nx = (odims[0] + 2 * px - wx) / sx + 1;

    for(dim_t w = 0; w < idims[3]; w++) {
        for(dim_t z = 0; z < idims[2]; z++) {

            dim_t cIn  = w * istrides[3] + z * istrides[2];
            dim_t cOut = w * ostrides[3] + z * ostrides[2];
            const T* iptr_ = inPtr  + cIn;
            T* optr= outPtr + cOut;

            for(dim_t col = 0; col < idims[d]; col++) {
                // Offset output ptr
                const T* iptr = iptr_ + col * istrides[d];

                // Calculate input window index
                dim_t winy = (col / nx);
                dim_t winx = (col % nx);

                dim_t startx = winx * sx;
                dim_t starty = winy * sy;

                dim_t spx = startx - px;
                dim_t spy = starty - py;

                // Short cut condition ensuring all values within input dimensions
                bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 && spy + wy < odims[1]);

                for(dim_t y = 0; y < wy; y++) {
                    for(dim_t x = 0; x < wx; x++) {
                        dim_t xpad = spx + x;
                        dim_t ypad = spy + y;

                        dim_t iloc = (y * wx + x);
                        if (d == 0) iloc *= istrides[1];

                        if(cond || (xpad >= 0 && xpad < odims[0] && ypad >= 0 && ypad < odims[1])) {
                            dim_t oloc = (ypad * ostrides[1] + xpad * ostrides[0]);
                            // FIXME: When using threads, atomize this
                            optr[oloc] += iptr[iloc];
                        }
                    }
                }
            }
        }
    }
}
Example #13
0
Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
                af_blas_transpose optLhs, af_blas_transpose optRhs)
{
    initBlas();
    clblasTranspose lOpts = toClblasTranspose(optLhs);
    clblasTranspose rOpts = toClblasTranspose(optRhs);

    int aRowDim = (lOpts == clblasNoTrans) ? 0 : 1;
    int aColDim = (lOpts == clblasNoTrans) ? 1 : 0;
    int bColDim = (rOpts == clblasNoTrans) ? 1 : 0;

    dim4 lDims = lhs.dims();
    dim4 rDims = rhs.dims();
    int M = lDims[aRowDim];
    int N = rDims[bColDim];
    int K = lDims[aColDim];

    //FIXME: Leaks on errors.
    Array<T> out = createEmptyArray<T>(af::dim4(M, N, 1, 1));
    auto alpha = scalar<T>(1);
    auto beta  = scalar<T>(0);

    dim4 lStrides = lhs.strides();
    dim4 rStrides = rhs.strides();
    clblasStatus err;
    cl::Event event;
    if(rDims[bColDim] == 1) {
        N = lDims[aColDim];
        gemv_func<T> gemv;
        err = gemv(
            clblasColumnMajor, lOpts,
            lDims[0], lDims[1],
            alpha,
            (*lhs.get())(),    lhs.getOffset(),   lStrides[1],
            (*rhs.get())(),    rhs.getOffset(),   rStrides[0],
            beta ,
            (*out.get())(),   out.getOffset(),             1,
            1, &getQueue()(), 0, nullptr, &event());
    } else {
        gemm_func<T> gemm;
        err = gemm(
                clblasColumnMajor, lOpts, rOpts,
                M, N, K,
                alpha,
                (*lhs.get())(),    lhs.getOffset(),   lStrides[1],
                (*rhs.get())(),    rhs.getOffset(),   rStrides[1],
                beta,
                (*out.get())(),   out.getOffset(),  out.dims()[0],
                1, &getQueue()(), 0, nullptr, &event());

    }
    if(err) {
        throw runtime_error(std::string("CLBLAS error: ") + std::to_string(err));
    }

    return out;
}
Example #14
0
void transpose_inplace(Array<T> &in, const bool conjugate)
{
    // get data pointers for input and output Arrays
    T* inData = in.get();

    if(conjugate) {
        transpose_inplace<T, true >(inData, in.dims(), in.strides());
    } else {
        transpose_inplace<T, false>(inData, in.dims(), in.strides());
    }
}
Example #15
0
Array<T> hsv2rgb(const Array<T>& in)
{
    const dim4 dims    = in.dims();
    const dim4 strides = in.strides();
    Array<T> out       = createEmptyArray<T>(dims);
    dim_type obStride  = out.strides()[3];
    dim_type coff      = strides[2];
    dim_type bCount    = dims[3];

    for(dim_type b=0; b<bCount; ++b) {
        const T* src = in.get() + b * strides[3];
        T* dst       = out.get() + b * obStride;

        for(dim_type j=0; j<dims[1]; ++j) {
            dim_type jOff = j*strides[1];
            // j steps along 2nd dimension
            for(dim_type i=0; i<dims[0]; ++i) {
                // i steps along 1st dimension
                dim_type hIdx = i*strides[0] + jOff;
                dim_type sIdx = hIdx + coff;
                dim_type vIdx = sIdx + coff;

                T H = src[hIdx];
                T S = src[sIdx];
                T V = src[vIdx];

                T R, G, B;
                R = G = B = 0;

                int   m = (int)(H * 6);
                T f = H * 6 - m;
                T p = V * (1 - S);
                T q = V * (1 - f * S);
                T t = V * (1 - (1 - f) * S);

                switch (m % 6) {
                    case 0: R = V, G = t, B = p; break;
                    case 1: R = q, G = V, B = p; break;
                    case 2: R = p, G = V, B = t; break;
                    case 3: R = p, G = q, B = V; break;
                    case 4: R = t, G = p, B = V; break;
                    case 5: R = V, G = p, B = q; break;
                }

                dst[hIdx] = R;
                dst[sIdx] = G;
                dst[vIdx] = B;
            }
        }
    }

    return out;
}
Example #16
0
Array<T> rgb2hsv(const Array<T>& in)
{
    const dim4 dims    = in.dims();
    const dim4 strides = in.strides();
    Array<T> out       = createEmptyArray<T>(dims);
    dim4 oStrides      = out.strides();
    dim_t bCount    = dims[3];

    for(dim_t b=0; b<bCount; ++b) {
        const T* src = in.get() + b * strides[3];
        T* dst       = out.get() + b * oStrides[3];

        for(dim_t j=0; j<dims[1]; ++j) {
            // j steps along 2nd dimension
            dim_t oj = j * oStrides[1];
            dim_t ij = j * strides[1];

            for(dim_t i=0; i<dims[0]; ++i) {
                // i steps along 1st dimension
                dim_t oIdx0 = i * oStrides[0] + oj;
                dim_t oIdx1 = oIdx0 + oStrides[2];
                dim_t oIdx2 = oIdx1 + oStrides[2];

                dim_t iIdx0 = i * strides[0]  + ij;
                dim_t iIdx1 = iIdx0 + strides[2];
                dim_t iIdx2 = iIdx1 + strides[2];

                T R = src[iIdx0];
                T G = src[iIdx1];
                T B = src[iIdx2];
                T Cmax = std::max(std::max(R, G), B);
                T Cmin = std::min(std::min(R, G), B);
                T delta= Cmax-Cmin;

                T H = 0;

                if (Cmax!=Cmin) {
                    if (Cmax==R) H = (G-B)/delta + (G<B ? 6 : 0);
                    if (Cmax==G) H = (B-R)/delta + 2;
                    if (Cmax==B) H = (R-G)/delta + 4;
                    H = H / 6.0f;
                }

                dst[oIdx0] = H;
                dst[oIdx1] = (Cmax==0.0f ? 0 : delta/Cmax);
                dst[oIdx2] = Cmax;
            }
        }
    }

    return out;
}
Example #17
0
Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
                af_mat_prop optLhs, af_mat_prop optRhs)
{
    cublasOperation_t lOpts = toCblasTranspose(optLhs);
    cublasOperation_t rOpts = toCblasTranspose(optRhs);

    int aRowDim = (lOpts == CUBLAS_OP_N) ? 0 : 1;
    int aColDim = (lOpts == CUBLAS_OP_N) ? 1 : 0;
    int bColDim = (rOpts == CUBLAS_OP_N) ? 1 : 0;

    dim4 lDims = lhs.dims();
    dim4 rDims = rhs.dims();
    int M = lDims[aRowDim];
    int N = rDims[bColDim];
    int K = lDims[aColDim];

    Array<T> out = createEmptyArray<T>(af::dim4(M, N, 1, 1));
    T alpha = scalar<T>(1);
    T beta  = scalar<T>(0);

    dim4 lStrides = lhs.strides();
    dim4 rStrides = rhs.strides();
    if(rDims[bColDim] == 1) {
        N = lDims[aColDim];
        CUBLAS_CHECK(gemv_func<T>()(
                         getHandle(),
                         lOpts,
                         lDims[0],
                         lDims[1],
                         &alpha,
                         lhs.get(), lStrides[1],
                         rhs.get(), rStrides[0],
                         &beta,
                         out.get(), 1));
    } else {
        CUBLAS_CHECK(gemm_func<T>()(
                         getHandle(),
                         lOpts,
                         rOpts,
                         M, N, K,
                         &alpha,
                         lhs.get(), lStrides[1],
                         rhs.get(), rStrides[1],
                         &beta,
                         out.get(),
                         out.dims()[0]));
    }

    return out;

}
Example #18
0
    void ireduce(Array<T> &out, Array<uint> &loc,
                 const Array<T> &in, const int dim)
    {
        dim4 odims = in.dims();
        odims[dim] = 1;

        switch (in.ndims()) {
        case 1:
            ireduce_dim<op, T, 1>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;

        case 2:
            ireduce_dim<op, T, 2>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;

        case 3:
            ireduce_dim<op, T, 3>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;

        case 4:
            ireduce_dim<op, T, 4>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;
        }
    }
Example #19
0
    Array<To> scan(const Array<Ti>& in, const int dim)
    {
        dim4 dims = in.dims();

        Array<To> out = createValueArray<To>(dims, 0);

        switch (in.ndims()) {
        case 1:
            scan_dim<op, Ti, To, 1>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;

        case 2:
            scan_dim<op, Ti, To, 2>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;

        case 3:
            scan_dim<op, Ti, To, 3>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;

        case 4:
            scan_dim<op, Ti, To, 4>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;
        }

        return out;
    }
Example #20
0
Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
             af_mat_prop optLhs, af_mat_prop optRhs)
{
    int N = lhs.dims()[0];

    T out;

    CUBLAS_CHECK(dot_func<T>()(getHandle(),
                               N,
                               lhs.get(), lhs.strides()[0],
                               rhs.get(), rhs.strides()[0],
                               &out));

    return createValueArray(af::dim4(1), out);
}
Array<T> matmul(const common::SparseArray<T> lhs, const Array<T> rhs,
                af_mat_prop optLhs, af_mat_prop optRhs)
{
    lhs.eval();
    rhs.eval();

    // Similar Operations to GEMM
    sparse_operation_t lOpts = toSparseTranspose(optLhs);

    int lRowDim = (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) ? 0 : 1;

    static const int rColDim = 1;

    dim4 lDims = lhs.dims();
    dim4 rDims = rhs.dims();
    int M = lDims[lRowDim];
    int N = rDims[rColDim];

    Array<T> out = createValueArray<T>(af::dim4(M, N, 1, 1), scalar<T>(0));
    out.eval();

    int ldb = rhs.strides()[1];
    int ldc = out.strides()[1];

    Array<T  > values = lhs.getValues();
    Array<int> rowIdx = lhs.getRowIdx();
    Array<int> colIdx = lhs.getColIdx();

    if(rDims[rColDim] == 1) {
        if (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) {
            mv<T, false>(out, values, rowIdx, colIdx, rhs, M);
        } else if (lOpts == SPARSE_OPERATION_TRANSPOSE) {
            mtv<T, false>(out, values, rowIdx, colIdx, rhs, M);
        } else if (lOpts == SPARSE_OPERATION_CONJUGATE_TRANSPOSE) {
            mtv<T, true>(out, values, rowIdx, colIdx, rhs, M);
        }
    } else {
        if (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) {
            mm<T, false>(out, values, rowIdx, colIdx, rhs, M, N, ldb, ldc);
        } else if (lOpts == SPARSE_OPERATION_TRANSPOSE) {
            mtm<T, false>(out, values, rowIdx, colIdx, rhs, M, N, ldb, ldc);
        } else if (lOpts == SPARSE_OPERATION_CONJUGATE_TRANSPOSE) {
            mtm<T, true>(out, values, rowIdx, colIdx, rhs, M, N, ldb, ldc);
        }
    }

    return out;
}
Example #22
0
void fft_inplace(Array<T> &in)
{
    verifySupported<rank>(in.dims());
    size_t tdims[4], istrides[4];

    computeDims(tdims   , in.dims());
    computeDims(istrides, in.strides());

    clfftPlanHandle plan;

    int batch = 1;
    for (int i = rank; i < 4; i++) {
        batch *= tdims[i];
    }

    find_clfft_plan(plan,
                    CLFFT_COMPLEX_INTERLEAVED,
                    CLFFT_COMPLEX_INTERLEAVED,
                    (clfftDim)rank, tdims,
                    istrides, istrides[rank],
                    istrides, istrides[rank],
                    (clfftPrecision)Precision<T>::type,
                    batch);

    cl_mem imem = (*in.get())();
    cl_command_queue queue = getQueue()();

    CLFFT_CHECK(clfftEnqueueTransform(plan,
                                      direction ? CLFFT_FORWARD : CLFFT_BACKWARD,
                                      1, &queue, 0, NULL, NULL,
                                      &imem, &imem, NULL));
}
Example #23
0
void transpose_inplace(Array<T> input)
{
    const dim4 idims    = input.dims();
    const dim4 istrides = input.strides();

    T * in = input.get();

    for (dim_t l = 0; l < idims[3]; ++l) {
        for (dim_t k = 0; k < idims[2]; ++k) {
            // Outermost loop handles batch mode
            // if input has no data along third dimension
            // this loop runs only once
            //
            // Run only bottom triangle. std::swap swaps with upper triangle
            for (dim_t j = 0; j < idims[1]; ++j) {
                for (dim_t i = j + 1; i < idims[0]; ++i) {
                    // calculate array indices based on offsets and strides
                    // the helper getIdx takes care of indices
                    const dim_t iIdx  = getIdx(istrides,j,i,k,l);
                    const dim_t oIdx = getIdx(istrides,i,j,k,l);
                    if(conjugate) {
                        in[iIdx] = getConjugate(in[iIdx]);
                        in[oIdx] = getConjugate(in[oIdx]);
                        std::swap(in[iIdx], in[oIdx]);
                    }
                    else {
                        std::swap(in[iIdx], in[oIdx]);
                    }
                }
            }
        }
    }
}
Example #24
0
    Array<T> createSubArray(const Array<T>& parent,
                            const std::vector<af_seq> &index,
                            bool copy)
    {
        parent.eval();

        dim4 dDims = parent.getDataDims();
        dim4 pDims = parent.dims();

        dim4 dims    = toDims  (index, pDims);
        dim4 strides = toStride (index, dDims);

        // Find total offsets after indexing
        dim4 offsets = toOffset(index, pDims);
        dim4 parent_strides = parent.strides();
        dim_t offset = parent.getOffset();
        for (int i = 0; i < 4; i++) offset += offsets[i] * parent_strides[i];

        Array<T> out = Array<T>(parent, dims, offset, strides);

        if (!copy) return out;

        if (strides[0] != 1 ||
            strides[1] <  0 ||
            strides[2] <  0 ||
            strides[3] <  0) {

            out = copyArray(out);
        }

        return out;
    }
Example #25
0
    To reduce_all(const Array<Ti> &in)
    {
        Transform<Ti, To, op> transform;
        Binary<To, op> reduce;

        To out = reduce.init();

        // Decrement dimension of select dimension
        af::dim4 dims = in.dims();
        af::dim4 strides = in.strides();
        const Ti *inPtr = in.get();

        for(dim_t l = 0; l < dims[3]; l++) {
            dim_t off3 = l * strides[3];

            for(dim_t k = 0; k < dims[2]; k++) {
                dim_t off2 = k * strides[2];

                for(dim_t j = 0; j < dims[1]; j++) {
                    dim_t off1 = j * strides[1];

                    for(dim_t i = 0; i < dims[0]; i++) {
                        dim_t idx = i + off1 + off2 + off3;

                        To val = transform(inPtr[idx]);
                        out = reduce(val, out);
                    }
                }
            }
        }

        return out;
    }
Example #26
0
void padArray(To* out_ptr, const af::dim4& od, const af::dim4& os,
              Array<Ti> const& in)
{
    const af::dim4 id = in.dims();
    const af::dim4 is = in.strides();
    const Ti* in_ptr = in.get();

    for (int d3 = 0; d3 < (int)od[3]; d3++) {
        for (int d2 = 0; d2 < (int)od[2]; d2++) {
            for (int d1 = 0; d1 < (int)od[1]; d1++) {
                for (int d0 = 0; d0 < (int)od[0] / 2; d0++) {
                    const dim_t oidx = d3*os[3] + d2*os[2] + d1*os[1] + d0*2;

                    if (d0 < (int)id[0] && d1 < (int)id[1] && d2 < (int)id[2] && d3 < (int)id[3]) {
                        // Copy input elements to real elements, set imaginary elements to 0
                        const dim_t iidx = d3*is[3] + d2*is[2] + d1*is[1] + d0;
                        out_ptr[oidx]   = (To)in_ptr[iidx];
                        out_ptr[oidx+1] = (To)0;
                    }
                    else {
                        // Pad remaining of the matrix to 0s
                        out_ptr[oidx]   = (To)0;
                        out_ptr[oidx+1] = (To)0;
                    }
                }
            }
        }
    }
}
Example #27
0
Array<T>::Array(const Array<T>& parent, const dim4 &dims, const dim4 &offsets, const dim4 &strides) :
    ArrayInfo(parent.getDevId(), dims, offsets, strides, (af_dtype)dtype_traits<T>::af_type),
    data(parent.getData()), data_dims(parent.getDataDims()),
    node(), ready(true),
    offset(parent.getOffset() + calcOffset(parent.strides(), offsets)),
    owner(false)
{ }
Example #28
0
    Array<To> reduce(const Array<Ti> &in, const int dim)
    {
        dim4 odims = in.dims();
        odims[dim] = 1;

        Array<To> out = createEmptyArray<To>(odims);
        static reduce_dim_func<op, Ti, To>  reduce_funcs[4] = { reduce_dim<op, Ti, To, 1>()
                                                              , reduce_dim<op, Ti, To, 2>()
                                                              , reduce_dim<op, Ti, To, 3>()
                                                              , reduce_dim<op, Ti, To, 4>()};

        reduce_funcs[in.ndims() - 1](out.get(), out.strides(), out.dims(),
                                    in.get(), in.strides(), in.dims(), dim);

        return out;
    }
Example #29
0
    T ireduce_all(unsigned *loc, const Array<T> &in)
    {
        af::dim4 dims = in.dims();
        af::dim4 strides = in.strides();
        const T *inPtr = in.get();

        MinMaxOp<op, T> Op(inPtr[0], 0);

        for(dim_t l = 0; l < dims[3]; l++) {
            dim_t off3 = l * strides[3];

            for(dim_t k = 0; k < dims[2]; k++) {
                dim_t off2 = k * strides[2];

                for(dim_t j = 0; j < dims[1]; j++) {
                    dim_t off1 = j * strides[1];

                    for(dim_t i = 0; i < dims[0]; i++) {
                        dim_t idx = i + off1 + off2 + off3;
                        Op(inPtr[idx], idx);
                    }
                }
            }
        }

        *loc = Op.m_idx;
        return Op.m_val;
    }
Example #30
0
void packData(To* out_ptr, const af::dim4& od, const af::dim4& os,
              Array<Ti> const& in)
{
    const af::dim4 id = in.dims();
    const af::dim4 is = in.strides();
    const Ti* in_ptr = in.get();

    int id0_half = divup(id[0], 2);
    bool odd_id0 = (id[0] % 2 == 1);

    for (int d3 = 0; d3 < (int)od[3]; d3++) {
        for (int d2 = 0; d2 < (int)od[2]; d2++) {
            for (int d1 = 0; d1 < (int)od[1]; d1++) {
                for (int d0 = 0; d0 < (int)od[0] / 2; d0++) {
                    const dim_t oidx = d3*os[3] + d2*os[2] + d1*os[1] + d0*2;

                    if (d0 < (int)id0_half && d1 < (int)id[1] && d2 < (int)id[2] && d3 < (int)id[3]) {
                        const dim_t iidx = d3*is[3] + d2*is[2] + d1*is[1] + d0;
                        out_ptr[oidx]   = (To)in_ptr[iidx];
                        if (d0 == id0_half-1 && odd_id0)
                            out_ptr[oidx+1] = (To)0;
                        else
                            out_ptr[oidx+1] = (To)in_ptr[iidx+id0_half];
                    }
                    else {
                        // Pad remaining elements with 0s
                        out_ptr[oidx]   = (To)0;
                        out_ptr[oidx+1] = (To)0;
                    }
                }
            }
        }
    }
}