Example #1
0
Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second)
{
    first.eval();
    second.eval();

    // All dimensions except join dimension must be equal
    // Compute output dims
    af::dim4 odims;
    af::dim4 fdims = first.dims();
    af::dim4 sdims = second.dims();

    for(int i = 0; i < 4; i++) {
        if(i == dim) {
            odims[i] = fdims[i] + sdims[i];
        } else {
            odims[i] = fdims[i];
        }
    }

    Array<Tx> out = createEmptyArray<Tx>(odims);

    getQueue().enqueue(kernel::join<Tx, Ty>, out, dim, first, second);

    return out;
}
Example #2
0
    Array<T> sort(const Array<T> &in, const unsigned dim, bool isAscending)
    {
        try {
            Array<T> out = copyArray<T>(in);
            switch(dim) {
                case 0: kernel::sort0<T>(out, isAscending); break;
                case 1: kernel::sortBatched<T, 1>(out, isAscending); break;
                case 2: kernel::sortBatched<T, 2>(out, isAscending); break;
                case 3: kernel::sortBatched<T, 3>(out, isAscending); break;
                default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
            }

            if(dim != 0) {
                af::dim4 preorderDims = out.dims();
                af::dim4 reorderDims(0, 1, 2, 3);
                reorderDims[dim] = 0;
                preorderDims[0] = out.dims()[dim];
                for(int i = 1; i <= (int)dim; i++) {
                    reorderDims[i - 1] = i;
                    preorderDims[i] = out.dims()[i - 1];
                }

                out.setDataDims(preorderDims);
                out = reorder<T>(out, reorderDims);
            }
            return out;
        } catch (std::exception &ex) {
            AF_ERROR(ex.what(), AF_ERR_INTERNAL);
        }
    }
Example #3
0
    Array<T> iir(const Array<T> &b, const Array<T> &a, const Array<T> &x)
    {
        try {

            AF_BATCH_KIND type = x.ndims() == 1 ? AF_BATCH_NONE : AF_BATCH_SAME;
            if (x.ndims() != b.ndims()) {
                type = (x.ndims() < b.ndims()) ?  AF_BATCH_RHS  : AF_BATCH_LHS;
            }

            // Extract the first N elements
            Array<T> c = convolve<T, T, 1, true>(x, b, type);
            dim4 cdims = c.dims();
            cdims[0] = x.dims()[0];
            c.resetDims(cdims);

            int num_a = a.dims()[0];

            if (num_a == 1) return c;

            dim4 ydims = c.dims();
            Array<T> y = createEmptyArray<T>(ydims);

            if (a.ndims() > 1) {
                kernel::iir<T,  true>(y, c, a);
            } else {
                kernel::iir<T, false>(y, c, a);
            }

            return y;
        } catch (cl::Error &err) {
            CL_TO_AF_ERROR(err);
        }
    }
    Array<Ty> approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1,
                      const af_interp_type method, const float offGrid)
    {
        af::dim4 odims = pos0.dims();
        odims[2] = in.dims()[2];
        odims[3] = in.dims()[3];

        // Create output placeholder
        Array<Ty> out = createEmptyArray<Ty>(odims);

        switch(method) {
        case AF_INTERP_NEAREST:
        case AF_INTERP_LOWER:
            kernel::approx2<Ty, Tp, 1> (out, in, pos0, pos1, offGrid, method);
            break;
        case AF_INTERP_LINEAR:
        case AF_INTERP_BILINEAR:
        case AF_INTERP_LINEAR_COSINE:
        case AF_INTERP_BILINEAR_COSINE:
            kernel::approx2<Ty, Tp, 2> (out, in, pos0, pos1, offGrid, method);
            break;
        case AF_INTERP_CUBIC:
        case AF_INTERP_BICUBIC:
        case AF_INTERP_CUBIC_SPLINE:
        case AF_INTERP_BICUBIC_SPLINE:
            kernel::approx2<Ty, Tp, 3> (out, in, pos0, pos1, offGrid, method);
            break;
        default:
            break;
        }

        return out;
    }
Example #5
0
    Array<Ty> *approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1,
                       const af_interp_type method, const float offGrid)
    {
        af::dim4 odims = in.dims();
        odims[0] = pos0.dims()[0];
        odims[1] = pos0.dims()[1];

        // Create output placeholder
        Array<Ty> *out = createEmptyArray<Ty>(odims);

        switch(method) {
            case AF_INTERP_NEAREST:
                approx2_<Ty, Tp, AF_INTERP_NEAREST>
                        (out->get(), out->dims(), out->elements(),
                         in.get(), in.dims(), in.elements(),
                         pos0.get(), pos0.dims(), pos1.get(), pos1.dims(),
                         out->strides(), in.strides(), pos0.strides(), pos1.strides(),
                         offGrid);
                break;
            case AF_INTERP_LINEAR:
                approx2_<Ty, Tp, AF_INTERP_LINEAR>
                        (out->get(), out->dims(), out->elements(),
                         in.get(), in.dims(), in.elements(),
                         pos0.get(), pos0.dims(), pos1.get(), pos1.dims(),
                         out->strides(), in.strides(), pos0.strides(), pos1.strides(),
                         offGrid);
                break;
            default:
                break;
        }
        return out;
    }
Example #6
0
    Array<Ty> approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1,
                      const af_interp_type method, const float offGrid)
    {
        if ((std::is_same<Ty, double>::value || std::is_same<Ty, cdouble>::value) &&
            !isDoubleSupported(getActiveDeviceId())) {
            OPENCL_NOT_SUPPORTED();
        }
        af::dim4 odims = pos0.dims();
        odims[2] = in.dims()[2];
        odims[3] = in.dims()[3];

        // Create output placeholder
        Array<Ty> out = createEmptyArray<Ty>(odims);

        switch(method) {
            case AF_INTERP_NEAREST:
                kernel::approx2<Ty, Tp, AF_INTERP_NEAREST>(out, in, pos0, pos1, offGrid);
                break;
            case AF_INTERP_LINEAR:
                kernel::approx2<Ty, Tp, AF_INTERP_LINEAR> (out, in, pos0, pos1, offGrid);
                break;
            default:
                break;
        }
        return out;
    }
Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<accT> const& r_filter)
{
    const dim4 cfDims   = c_filter.dims();
    const dim4 rfDims   = r_filter.dims();

    const dim_t cfLen= cfDims.elements();
    const dim_t rfLen= rfDims.elements();

    const dim4 sDims = signal.dims();
    dim4 tDims = sDims;
    dim4 oDims = sDims;

    if (expand) {
        tDims[0] += cfLen - 1;
        oDims[0] += cfLen - 1;
        oDims[1] += rfLen - 1;
    }

    Array<T> temp= createEmptyArray<T>(tDims);
    Array<T> out = createEmptyArray<T>(oDims);

    kernel::convolve2<T, accT, 0, expand>(temp, signal, c_filter);
    kernel::convolve2<T, accT, 1, expand>(out, temp, r_filter);

    return out;
}
Example #8
0
Array<T> generalSolve(const Array<T> &a, const Array<T> &b)
{

    dim4 iDims = a.dims();
    int M = iDims[0];
    int N = iDims[1];
    int MN = std::min(M, N);
    std::vector<int> ipiv(MN);

    Array<T> A = copyArray<T>(a);
    Array<T> B = copyArray<T>(b);

    cl::Buffer *A_buf = A.get();
    int info = 0;
    magma_getrf_gpu<T>(M, N, (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0], getQueue()(), &info);

    cl::Buffer *B_buf = B.get();
    int K = B.dims()[1];
    magma_getrs_gpu<T>(MagmaNoTrans, M, K,
                       (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0],
                       (*B_buf)(), B.getOffset(), B.strides()[1],
                       getQueue()(), &info);
    return B;
}
Example #9
0
void histogram(Array<OutT> out, Array<InT> const in,
               unsigned const nbins, double const minval, double const maxval)
{
    dim4 const outDims   = out.dims();
    float const step     = (maxval - minval)/(float)nbins;
    dim4 const inDims    = in.dims();
    dim4 const iStrides  = in.strides();
    dim4 const oStrides  = out.strides();
    dim_t const nElems   = inDims[0]*inDims[1];

    OutT *outData    = out.get();
    const InT* inData= in.get();

    for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
        for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
            for(dim_t i=0; i<nElems; i++) {
                int idx = IsLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
                int bin = (int)((inData[idx] - minval) / step);
                bin = std::max(bin, 0);
                bin = std::min(bin, (int)(nbins - 1));
                outData[bin]++;
            }
            inData  += iStrides[2];
            outData += oStrides[2];
        }
    }
}
Example #10
0
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                 const Array<T> &b, const af_mat_prop options)
{
    if(OpenCLCPUOffload()) {
        return cpu::solveLU(A, pivot, b, options);
    }

    int N = A.dims()[0];
    int NRHS = b.dims()[1];

    std::vector<int> ipiv(N);
    copyData(&ipiv[0], pivot);

    Array< T > B = copyArray<T>(b);

    const cl::Buffer *A_buf = A.get();
    cl::Buffer *B_buf = B.get();

    int info = 0;
    magma_getrs_gpu<T>(MagmaNoTrans, N, NRHS,
                       (*A_buf)(), A.getOffset(), A.strides()[1],
                       &ipiv[0],
                       (*B_buf)(), B.getOffset(), B.strides()[1],
                       getQueue()(), &info);
    return B;
}
Example #11
0
void select(Array<T> out, const Array<char> cond, const Array<T> a, const Array<T> b)
{
    af::dim4 adims = a.dims();
    af::dim4 astrides = a.strides();
    af::dim4 bdims = b.dims();
    af::dim4 bstrides = b.strides();

    af::dim4 cdims = cond.dims();
    af::dim4 cstrides = cond.strides();

    af::dim4 odims = out.dims();
    af::dim4 ostrides = out.strides();

    bool is_a_same[] = {adims[0] == odims[0], adims[1] == odims[1],
        adims[2] == odims[2], adims[3] == odims[3]};

    bool is_b_same[] = {bdims[0] == odims[0], bdims[1] == odims[1],
        bdims[2] == odims[2], bdims[3] == odims[3]};

    bool is_c_same[] = {cdims[0] == odims[0], cdims[1] == odims[1],
        cdims[2] == odims[2], cdims[3] == odims[3]};

    const T *aptr = a.get();
    const T *bptr = b.get();
    T *optr = out.get();
    const char *cptr = cond.get();

    for (int l = 0; l < odims[3]; l++) {

        int o_off3   = ostrides[3] * l;
        int a_off3   = astrides[3] * is_a_same[3] * l;
        int b_off3   = bstrides[3] * is_b_same[3] * l;
        int c_off3   = cstrides[3] * is_c_same[3] * l;

        for (int k = 0; k < odims[2]; k++) {

            int o_off2   = ostrides[2] * k + o_off3;
            int a_off2   = astrides[2] * is_a_same[2] * k + a_off3;
            int b_off2   = bstrides[2] * is_b_same[2] * k + b_off3;
            int c_off2   = cstrides[2] * is_c_same[2] * k + c_off3;

            for (int j = 0; j < odims[1]; j++) {

                int o_off1   = ostrides[1] * j + o_off2;
                int a_off1   = astrides[1] * is_a_same[1] * j + a_off2;
                int b_off1   = bstrides[1] * is_b_same[1] * j + b_off2;
                int c_off1   = cstrides[1] * is_c_same[1] * j + c_off2;

                for (int i = 0; i < odims[0]; i++) {

                    bool cval = is_c_same[0] ? cptr[c_off1 + i] : cptr[c_off1];
                    T    aval = is_a_same[0] ? aptr[a_off1 + i] : aptr[a_off1];
                    T    bval = is_b_same[0] ? bptr[b_off1 + i] : bptr[b_off1];
                    T    oval = cval ? aval : bval;
                    optr[o_off1 + i] = oval;
                }
            }
        }
    }
}
Example #12
0
void nearest_neighbour_(Array<uint>& idx, Array<To>& dist,
                        const Array<T>& query, const Array<T>& train,
                        const uint dist_dim, const uint n_dist)
{
    uint sample_dim = (dist_dim == 0) ? 1 : 0;
    const dim4 qDims = query.dims();
    const dim4 tDims = train.dims();

    if (n_dist > 1) {
        CPU_NOT_SUPPORTED();
    }

    const unsigned distLength = qDims[dist_dim];
    const unsigned nQuery = qDims[sample_dim];
    const unsigned nTrain = tDims[sample_dim];

    const dim4 outDims(n_dist, nQuery);

    idx  = createEmptyArray<uint>(outDims);
    dist = createEmptyArray<To  >(outDims);

    const T* qPtr = query.get();
    const T* tPtr = train.get();
    uint* iPtr = idx.get();
    To* dPtr = dist.get();

    dist_op<T, To, dist_type> op;

    for (unsigned i = 0; i < nQuery; i++) {
        To best_dist = limit_max<To>();
        unsigned best_idx  = 0;

        for (unsigned j = 0; j < nTrain; j++) {
            To local_dist = 0;
            for (unsigned k = 0; k < distLength; k++) {
                size_t qIdx, tIdx;
                if (sample_dim == 0) {
                    qIdx = k * qDims[0] + i;
                    tIdx = k * tDims[0] + j;
                }
                else {
                    qIdx = i * qDims[0] + k;
                    tIdx = j * tDims[0] + k;
                }

                local_dist += op(qPtr[qIdx], tPtr[tIdx]);
            }

            if (local_dist < best_dist) {
                best_dist = local_dist;
                best_idx  = j;
            }
        }

        size_t oIdx;
        oIdx = i;
        iPtr[oIdx] = best_idx;
        dPtr[oIdx] = best_dist;
    }
}
Example #13
0
    Array<T> diagCreate(const Array<T> &in, const int num)
    {
        int size = in.dims()[0] + std::abs(num);
        int batch = in.dims()[1];
        Array<T> out = createEmptyArray<T>(dim4(size, size, batch));

        const T *iptr = in.get();
        T *optr = out.get();

        for (int k = 0; k < batch; k++) {
            for (int j = 0; j < size; j++) {
                for (int i = 0; i < size; i++) {
                    T val = scalar<T>(0);
                    if (i == j - num) {
                        val = (num > 0) ? iptr[i] : iptr[j];
                    }
                    optr[i + j * out.strides()[1]] = val;
                }
            }
            optr += out.strides()[2];
            iptr += in.strides()[1];
        }

        return out;
    }
Example #14
0
static af_array stdev(const af_array& in, int dim)
{
    Array<inType> _in    = getArray<inType>(in);
    Array<outType> input = cast<outType>(_in);
    dim4 iDims = input.dims();

    Array<outType> meanArr = mean<inType, outType>(_in, dim);

    /* now tile meanArr along dim and use it for variance computation */
    dim4 tileDims(1);
    tileDims[dim] = iDims[dim];
    Array<outType> tMeanArr = detail::tile<outType>(meanArr, tileDims);
    /* now mean array is ready */

    Array<outType> diff    = detail::arithOp<outType, af_sub_t>(input, tMeanArr, tMeanArr.dims());
    Array<outType> diffSq  = detail::arithOp<outType, af_mul_t>(diff, diff, diff.dims());
    Array<outType> redDiff = reduce<af_add_t, outType, outType>(diffSq, dim);
    dim4 oDims = redDiff.dims();

    Array<outType> divArr = createValueArray<outType>(oDims, scalar<outType>(iDims[dim]));
    Array<outType> varArr = detail::arithOp<outType, af_div_t>(redDiff, divArr, redDiff.dims());
    Array<outType> result = detail::unaryOp<outType, af_sqrt_t>(varArr);

    return getHandle<outType>(result);
}
Array<T> convolve(Array<T> const& signal, Array<accT> const& filter, AF_BATCH_KIND kind)
{
    const dim4 sDims    = signal.dims();
    const dim4 fDims    = filter.dims();

    dim4 oDims(1);
    if (expand) {
        for(dim_t d=0; d<4; ++d) {
            if (kind==AF_BATCH_NONE || kind==AF_BATCH_RHS) {
                oDims[d] = sDims[d]+fDims[d]-1;
            } else {
                oDims[d] = (d<baseDim ? sDims[d]+fDims[d]-1 : sDims[d]);
            }
        }
    } else {
        oDims = sDims;
        if (kind==AF_BATCH_RHS) {
            for (dim_t i=baseDim; i<4; ++i)
                oDims[i] = fDims[i];
        }
    }

    Array<T> out   = createEmptyArray<T>(oDims);

    kernel::convolve_nd<T, accT, baseDim, expand>(out, signal, filter, kind);

    return out;
}
Example #16
0
void morph3d(Array<T> out, Array<T> const in, Array<T> const mask)
{
    const af::dim4 dims     = in.dims();
    const af::dim4 window   = mask.dims();
    const dim_t R0      = window[0]/2;
    const dim_t R1      = window[1]/2;
    const dim_t R2      = window[2]/2;
    const af::dim4 istrides = in.strides();
    const af::dim4 fstrides = mask.strides();
    const dim_t bCount  = dims[3];
    const af::dim4 ostrides = out.strides();
    T* outData          = out.get();
    const T*   inData   = in.get();
    const T*   filter   = mask.get();

    for(dim_t batchId=0; batchId<bCount; ++batchId) {
        // either channels or batch is handled by outer most loop
        for(dim_t k=0; k<dims[2]; ++k) {
            // k steps along 3rd dimension
            for(dim_t j=0; j<dims[1]; ++j) {
                // j steps along 2nd dimension
                for(dim_t i=0; i<dims[0]; ++i) {
                    // i steps along 1st dimension
                    T filterResult = inData[ getIdx(istrides, i, j, k) ];

                    // wk, wj,wi steps along 2nd & 1st dimensions of filter window respectively
                    for(dim_t wk=0; wk<window[2]; wk++) {
                        for(dim_t wj=0; wj<window[1]; wj++) {
                            for(dim_t wi=0; wi<window[0]; wi++) {

                                dim_t offk = k+wk-R2;
                                dim_t offj = j+wj-R1;
                                dim_t offi = i+wi-R0;

                                T maskValue = filter[ getIdx(fstrides, wi, wj, wk) ];

                                if ((maskValue > (T)0) && offi>=0 && offj>=0 && offk>=0 &&
                                        offi<dims[0] && offj<dims[1] && offk<dims[2]) {

                                    T inValue   = inData[ getIdx(istrides, offi, offj, offk) ];

                                    if (IsDilation)
                                        filterResult = std::max(filterResult, inValue);
                                    else
                                        filterResult = std::min(filterResult, inValue);
                                }

                            } // window 1st dimension loop ends here
                        }  // window 1st dimension loop ends here
                    }// filter window loop ends here

                    outData[ getIdx(ostrides, i, j, k) ] = filterResult;
                } //1st dimension loop ends here
            } // 2nd dimension loop ends here
        } // 3rd dimension loop ends here
        // next iteration will be next batch if any
        outData += ostrides[3];
        inData  += istrides[3];
    }
}
Example #17
0
void fft_inplace(Array<T> &in)
{
    verifySupported<rank>(in.dims());
    size_t tdims[4], istrides[4];

    computeDims(tdims   , in.dims());
    computeDims(istrides, in.strides());

    clfftPlanHandle plan;

    int batch = 1;
    for (int i = rank; i < 4; i++) {
        batch *= tdims[i];
    }

    find_clfft_plan(plan,
                    CLFFT_COMPLEX_INTERLEAVED,
                    CLFFT_COMPLEX_INTERLEAVED,
                    (clfftDim)rank, tdims,
                    istrides, istrides[rank],
                    istrides, istrides[rank],
                    (clfftPrecision)Precision<T>::type,
                    batch);

    cl_mem imem = (*in.get())();
    cl_command_queue queue = getQueue()();

    CLFFT_CHECK(clfftEnqueueTransform(plan,
                                      direction ? CLFFT_FORWARD : CLFFT_BACKWARD,
                                      1, &queue, 0, NULL, NULL,
                                      &imem, &imem, NULL));
}
Example #18
0
    Array<T> diagExtract(const Array<T> &in, const int num)
    {
        const dim_t *idims = in.dims().get();
        dim_t size = std::max(idims[0], idims[1]) - std::abs(num);
        Array<T> out = createEmptyArray<T>(dim4(size, 1, idims[2], idims[3]));

        const dim_t *odims = out.dims().get();

        const int i_off = (num > 0) ? (num * in.strides()[1]) : (-num);

        for (int l = 0; l < (int)odims[3]; l++) {

            for (int k = 0; k < (int)odims[2]; k++) {
                const T *iptr = in.get() + l * in.strides()[3] + k * in.strides()[2] + i_off;
                T *optr = out.get() + l * out.strides()[3] + k * out.strides()[2];

                for (int i = 0; i < (int)odims[0]; i++) {
                    T val = scalar<T>(0);
                    if (i < idims[0] && i < idims[1]) val =  iptr[i * in.strides()[1] + i];
                    optr[i] = val;
                }
            }
        }

        return out;
    }
Example #19
0
Array<T> morph(const Array<T> &in, const Array<T> &mask)
{
    const dim4 dims       = in.dims();
    const dim4 window     = mask.dims();
    const dim_t R0     = window[0]/2;
    const dim_t R1     = window[1]/2;
    const dim4 istrides   = in.strides();
    const dim4 fstrides   = mask.strides();

    Array<T> out         = createEmptyArray<T>(dims);
    const dim4 ostrides   = out.strides();

    T* outData            = out.get();
    const T*   inData     = in.get();
    const T*   filter     = mask.get();

    for(dim_t b3=0; b3<dims[3]; ++b3) {
        for(dim_t b2=0; b2<dims[2]; ++b2) {
            // either channels or batch is handled by outer most loop
            for(dim_t j=0; j<dims[1]; ++j) {
                // j steps along 2nd dimension
                for(dim_t i=0; i<dims[0]; ++i) {
                    // i steps along 1st dimension
                    T filterResult = inData[ getIdx(istrides, i, j) ];

                    // wj,wi steps along 2nd & 1st dimensions of filter window respectively
                    for(dim_t wj=0; wj<window[1]; wj++) {
                        for(dim_t wi=0; wi<window[0]; wi++) {

                            dim_t offj = j+wj-R1;
                            dim_t offi = i+wi-R0;

                            T maskValue = filter[ getIdx(fstrides, wi, wj) ];

                            if ((maskValue > (T)0) && offi>=0 && offj>=0 && offi<dims[0] && offj<dims[1]) {

                                T inValue   = inData[ getIdx(istrides, offi, offj) ];

                                if (isDilation)
                                    filterResult = std::max(filterResult, inValue);
                                else
                                    filterResult = std::min(filterResult, inValue);
                            }

                        } // window 1st dimension loop ends here
                    } // filter window loop ends here

                    outData[ getIdx(ostrides, i, j) ] = filterResult;
                } //1st dimension loop ends here
            } // 2nd dimension loop ends here

            // next iteration will be next batch if any
            outData += ostrides[2];
            inData  += istrides[2];
        }
    }

    return out;
}
Example #20
0
void sort0ByKey(Array<Tk> okey, Array<Tv> oval, bool isAscending)
{
    int higherDims =  okey.dims()[1] * okey.dims()[2] * okey.dims()[3];
    // TODO Make a better heurisitic
    if(higherDims > 4)
        kernel::sortByKeyBatched<Tk, Tv>(okey, oval, 0, isAscending);
    else
        kernel::sort0ByKeyIterative<Tk, Tv>(okey, oval, isAscending);
}
Example #21
0
Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
                af_blas_transpose optLhs, af_blas_transpose optRhs)
{
    initBlas();
    clblasTranspose lOpts = toClblasTranspose(optLhs);
    clblasTranspose rOpts = toClblasTranspose(optRhs);

    int aRowDim = (lOpts == clblasNoTrans) ? 0 : 1;
    int aColDim = (lOpts == clblasNoTrans) ? 1 : 0;
    int bColDim = (rOpts == clblasNoTrans) ? 1 : 0;

    dim4 lDims = lhs.dims();
    dim4 rDims = rhs.dims();
    int M = lDims[aRowDim];
    int N = rDims[bColDim];
    int K = lDims[aColDim];

    //FIXME: Leaks on errors.
    Array<T> out = createEmptyArray<T>(af::dim4(M, N, 1, 1));
    auto alpha = scalar<T>(1);
    auto beta  = scalar<T>(0);

    dim4 lStrides = lhs.strides();
    dim4 rStrides = rhs.strides();
    clblasStatus err;
    cl::Event event;
    if(rDims[bColDim] == 1) {
        N = lDims[aColDim];
        gemv_func<T> gemv;
        err = gemv(
            clblasColumnMajor, lOpts,
            lDims[0], lDims[1],
            alpha,
            (*lhs.get())(),    lhs.getOffset(),   lStrides[1],
            (*rhs.get())(),    rhs.getOffset(),   rStrides[0],
            beta ,
            (*out.get())(),   out.getOffset(),             1,
            1, &getQueue()(), 0, nullptr, &event());
    } else {
        gemm_func<T> gemm;
        err = gemm(
                clblasColumnMajor, lOpts, rOpts,
                M, N, K,
                alpha,
                (*lhs.get())(),    lhs.getOffset(),   lStrides[1],
                (*rhs.get())(),    rhs.getOffset(),   rStrides[1],
                beta,
                (*out.get())(),   out.getOffset(),  out.dims()[0],
                1, &getQueue()(), 0, nullptr, &event());

    }
    if(err) {
        throw runtime_error(std::string("CLBLAS error: ") + std::to_string(err));
    }

    return out;
}
Example #22
0
void wrap_dim(Array<T> out, const Array<T> in, const dim_t wx, const dim_t wy,
              const dim_t sx, const dim_t sy, const dim_t px, const dim_t py)
{
    const T *inPtr = in.get();
    T *outPtr      = out.get();

    af::dim4 idims    = in.dims();
    af::dim4 odims    = out.dims();
    af::dim4 istrides = in.strides();
    af::dim4 ostrides = out.strides();

    dim_t nx = (odims[0] + 2 * px - wx) / sx + 1;

    for(dim_t w = 0; w < idims[3]; w++) {
        for(dim_t z = 0; z < idims[2]; z++) {

            dim_t cIn  = w * istrides[3] + z * istrides[2];
            dim_t cOut = w * ostrides[3] + z * ostrides[2];
            const T* iptr_ = inPtr  + cIn;
            T* optr= outPtr + cOut;

            for(dim_t col = 0; col < idims[d]; col++) {
                // Offset output ptr
                const T* iptr = iptr_ + col * istrides[d];

                // Calculate input window index
                dim_t winy = (col / nx);
                dim_t winx = (col % nx);

                dim_t startx = winx * sx;
                dim_t starty = winy * sy;

                dim_t spx = startx - px;
                dim_t spy = starty - py;

                // Short cut condition ensuring all values within input dimensions
                bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 && spy + wy < odims[1]);

                for(dim_t y = 0; y < wy; y++) {
                    for(dim_t x = 0; x < wx; x++) {
                        dim_t xpad = spx + x;
                        dim_t ypad = spy + y;

                        dim_t iloc = (y * wx + x);
                        if (d == 0) iloc *= istrides[1];

                        if(cond || (xpad >= 0 && xpad < odims[0] && ypad >= 0 && ypad < odims[1])) {
                            dim_t oloc = (ypad * ostrides[1] + xpad * ostrides[0]);
                            // FIXME: When using threads, atomize this
                            optr[oloc] += iptr[iloc];
                        }
                    }
                }
            }
        }
    }
}
    Array<T> diagCreate(const Array<T> &in, const int num)
    {
        int size = in.dims()[0] + std::abs(num);
        int batch = in.dims()[1];
        Array<T> out = createEmptyArray<T>(dim4(size, size, batch));

        kernel::diagCreate<T>(out, in, num);

        return out;
    }
Example #24
0
 void copyData(T *to, const Array<T> &from)
 {
     if(from.isOwner()) {
         // FIXME: Check for errors / exceptions
         memcpy(to, from.get(), from.elements()*sizeof(T));
     } else {
         dim4 ostrides = calcStrides(from.dims());
         stridedCopy<T>(to, ostrides, from.get(), from.dims(), from.strides(), from.ndims() - 1);
     }
 }
Example #25
0
 void sort_index(Array<T> &val, Array<uint> &idx, const Array<T> &in, const uint dim)
 {
     val = createEmptyArray<T>(in.dims());
     idx = createEmptyArray<uint>(in.dims());
     switch(dim) {
         case 0: sort0_index<T, isAscending>(val, idx, in);
                 break;
         default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
     }
 }
Example #26
0
Array<T> normalizePerType(const Array<T>& in)
{
    Array<float> inFloat = cast<float, T>(in);

    Array<float> cnst = createValueArray<float>(in.dims(), 1.0 - 1.0e-6f);

    Array<float> scaled = arithOp<float, af_mul_t>(inFloat, cnst, in.dims());

    return cast<T, float>(scaled);
}
Example #27
0
    void ireduce(Array<T> &out, Array<uint> &loc,
                 const Array<T> &in, const int dim)
    {
        dim4 odims = in.dims();
        odims[dim] = 1;

        switch (in.ndims()) {
        case 1:
            ireduce_dim<op, T, 1>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;

        case 2:
            ireduce_dim<op, T, 2>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;

        case 3:
            ireduce_dim<op, T, 3>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;

        case 4:
            ireduce_dim<op, T, 4>()(out.get(), out.strides(), out.dims(),
                                    loc.get(),
                                    in.get(), in.strides(), in.dims(), dim);
            break;
        }
    }
Example #28
0
    Array<To> scan(const Array<Ti>& in, const int dim)
    {
        dim4 dims = in.dims();

        Array<To> out = createValueArray<To>(dims, 0);

        switch (in.ndims()) {
        case 1:
            scan_dim<op, Ti, To, 1>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;

        case 2:
            scan_dim<op, Ti, To, 2>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;

        case 3:
            scan_dim<op, Ti, To, 3>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;

        case 4:
            scan_dim<op, Ti, To, 4>()(out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim);
            break;
        }

        return out;
    }
Example #29
0
static outType stdev(const af_array& in)
{
    Array<inType> _in       = getArray<inType>(in);
    Array<outType> input    = cast<outType>(_in);
    Array<outType> meanCnst = createValueArray<outType>(input.dims(), mean<inType, outType>(_in));
    Array<outType> diff     = detail::arithOp<outType, af_sub_t>(input, meanCnst, input.dims());
    Array<outType> diffSq   = detail::arithOp<outType, af_mul_t>(diff, diff, diff.dims());
    outType result          = division(reduce_all<af_add_t, outType, outType>(diffSq), input.elements());

    return sqrt(result);
}
Example #30
0
 void sort_by_key(Array<Tk> &okey, Array<Tv> &oval,
            const Array<Tk> &ikey, const Array<Tv> &ival, const uint dim)
 {
     okey = createEmptyArray<Tk>(ikey.dims());
     oval = createEmptyArray<Tv>(ival.dims());
     switch(dim) {
         case 0: sort0_by_key<Tk, Tv, isAscending>(okey, oval, ikey, ival);
                 break;
         default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
     }
 }