Example #1
0
    Array<Ty> *approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1,
                       const af_interp_type method, const float offGrid)
    {
        af::dim4 odims = in.dims();
        odims[0] = pos0.dims()[0];
        odims[1] = pos0.dims()[1];

        // Create output placeholder
        Array<Ty> *out = createEmptyArray<Ty>(odims);

        switch(method) {
            case AF_INTERP_NEAREST:
                approx2_<Ty, Tp, AF_INTERP_NEAREST>
                        (out->get(), out->dims(), out->elements(),
                         in.get(), in.dims(), in.elements(),
                         pos0.get(), pos0.dims(), pos1.get(), pos1.dims(),
                         out->strides(), in.strides(), pos0.strides(), pos1.strides(),
                         offGrid);
                break;
            case AF_INTERP_LINEAR:
                approx2_<Ty, Tp, AF_INTERP_LINEAR>
                        (out->get(), out->dims(), out->elements(),
                         in.get(), in.dims(), in.elements(),
                         pos0.get(), pos0.dims(), pos1.get(), pos1.dims(),
                         out->strides(), in.strides(), pos0.strides(), pos1.strides(),
                         offGrid);
                break;
            default:
                break;
        }
        return out;
    }
Example #2
0
Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
{
    dim4 iDims = in.dims();
    int M = iDims[0];
    int N = iDims[1];

    int *pivotPtr  = pinnedAlloc<int>(min(M, N));
    T   *inPtr     = pinnedAlloc<T>  (in.elements());
    copyData(inPtr, in);

    getrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N,
                    inPtr, in.strides()[1],
                    pivotPtr);

    if(convert_pivot) convertPivot(&pivotPtr, M, min(M, N));

    writeHostDataArray<T>(in, inPtr, in.elements() * sizeof(T));
    Array<int> pivot = createHostDataArray<int>(af::dim4(M), pivotPtr);

    pivot.eval();

    pinnedFree(inPtr);
    pinnedFree(pivotPtr);

    return pivot;
}
Example #3
0
Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter,
                   Array<accT> const& r_filter) {
    const dim_t cflen = (dim_t)c_filter.elements();
    const dim_t rflen = (dim_t)r_filter.elements();

    if ((cflen > kernel::MAX_SCONV_FILTER_LEN) ||
        (rflen > kernel::MAX_SCONV_FILTER_LEN)) {
        // TODO call upon fft
        char errMessage[256];
        snprintf(errMessage, sizeof(errMessage),
                 "\nOpenCL Separable convolution doesn't support %lld(coloumn) "
                 "%lld(row) filters\n",
                 cflen, rflen);
        OPENCL_NOT_SUPPORTED(errMessage);
    }

    const dim4 sDims = signal.dims();
    dim4 tDims       = sDims;
    dim4 oDims       = sDims;

    if (expand) {
        tDims[0] += cflen - 1;
        oDims[0] += cflen - 1;
        oDims[1] += rflen - 1;
    }

    Array<T> temp = createEmptyArray<T>(tDims);
    Array<T> out  = createEmptyArray<T>(oDims);

    kernel::convSep<T, accT, 0, expand>(temp, signal, c_filter);
    kernel::convSep<T, accT, 1, expand>(out, temp, r_filter);

    return out;
}
Example #4
0
static
void assign(Array<Tout> &out, const unsigned &ndims, const af_seq *index, const Array<Tin> &in_)
{
    dim4 const outDs = out.dims();
    dim4 const iDims = in_.dims();

    DIM_ASSERT(0, (outDs.ndims()>=iDims.ndims()));
    DIM_ASSERT(0, (outDs.ndims()>=(dim_t)ndims));

    out.eval();

    vector<af_seq> index_(index, index+ndims);

    dim4 oDims = toDims(index_, outDs);

    bool is_vector = true;
    for (int i = 0; is_vector && i < (int)oDims.ndims() - 1; i++) {
        is_vector &= oDims[i] == 1;
    }

    is_vector &= in_.isVector() || in_.isScalar();

    for (dim_t i = ndims; i < (int)in_.ndims(); i++) {
        oDims[i] = 1;
    }


    if (is_vector) {
        if (oDims.elements() != (dim_t)in_.elements() &&
            in_.elements() != 1) {
            AF_ERROR("Size mismatch between input and output", AF_ERR_SIZE);
        }

        // If both out and in are vectors of equal elements, reshape in to out dims
        Array<Tin> in = in_.elements() == 1 ? tile(in_, oDims) : modDims(in_, oDims);
        Array<Tout> dst = createSubArray<Tout>(out, index_, false);

        copyArray<Tin , Tout>(dst, in);
    } else {
        for (int i = 0; i < 4; i++) {
            if (oDims[i] != iDims[i]) {
                AF_ERROR("Size mismatch between input and output", AF_ERR_SIZE);
            }
        }
        Array<Tout> dst = createSubArray<Tout>(out, index_, false);

        copyArray<Tin , Tout>(dst, in_);
    }
}
Example #5
0
    void evalMultiple(std::vector<Array<T>*> arrays)
    {
        std::vector<Param<T> > outputs;
        std::vector<JIT::Node *> nodes;

        for (int i = 0; i < (int)arrays.size(); i++) {
            Array<T> *array = arrays[i];

            if (array->isReady()) {
                continue;
            }

            array->ready = true;
            array->setId(getActiveDeviceId());
            array->data = shared_ptr<T>(memAlloc<T>(array->elements()).release(), memFree<T>);

            outputs.push_back(*array);
            nodes.push_back(array->node.get());
        }

        evalNodes(outputs, nodes);

        for (int i = 0; i < (int)arrays.size(); i++) {
            Array<T> *array = arrays[i];

            if (array->isReady()) continue;
            // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
            array->node = bufferNodePtr<T>();
        }
        return;
    }
Example #6
0
static outType varAll(const af_array& in, const bool isbiased)
{
    typedef typename baseOutType<outType>::type weightType;
    Array<inType> inArr = getArray<inType>(in);
    Array<outType> input = cast<outType>(inArr);

    Array<outType> meanCnst= createValueArray<outType>(input.dims(), mean<inType, weightType, outType>(inArr));

    Array<outType> diff    = arithOp<outType, af_sub_t>(input, meanCnst, input.dims());

    Array<outType> diffSq  = arithOp<outType, af_mul_t>(diff, diff, diff.dims());

    outType result = division(reduce_all<af_add_t, outType, outType>(diffSq),
        isbiased ? input.elements() : input.elements() - 1);

    return result;
}
Example #7
0
void sort0(Array<T>& val, bool isAscending) {
    int higherDims = val.elements() / val.dims()[0];
    // TODO Make a better heurisitic
    if (higherDims > 10)
        sortBatched<T, 0>(val, isAscending);
    else
        getQueue().enqueue(kernel::sort0Iterative<T>, val, isAscending);
}
Example #8
0
unsigned susan(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out,
               const Array<T> &in,
               const unsigned radius, const float diff_thr, const float geom_thr,
               const float feature_ratio, const unsigned edge)
{
    dim4 idims = in.dims();

    const unsigned corner_lim = in.elements() * feature_ratio;
    cl::Buffer* x_corners     = bufferAlloc(corner_lim * sizeof(float));
    cl::Buffer* y_corners     = bufferAlloc(corner_lim * sizeof(float));
    cl::Buffer* resp_corners  = bufferAlloc(corner_lim * sizeof(float));

    cl::Buffer* resp = bufferAlloc(in.elements()*sizeof(float));

    switch(radius) {
    case 1: kernel::susan<T, 1>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 2: kernel::susan<T, 2>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 3: kernel::susan<T, 3>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 4: kernel::susan<T, 4>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 5: kernel::susan<T, 5>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 6: kernel::susan<T, 6>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 7: kernel::susan<T, 7>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 8: kernel::susan<T, 8>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    case 9: kernel::susan<T, 9>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
    }

    unsigned corners_found = kernel::nonMaximal<T>(x_corners, y_corners, resp_corners,
                                                   idims[0], idims[1], resp, edge, corner_lim);
    bufferFree(resp);

    const unsigned corners_out = std::min(corners_found, corner_lim);
    if (corners_out == 0) {
        bufferFree(x_corners);
        bufferFree(y_corners);
        bufferFree(resp_corners);
        x_out    = createEmptyArray<float>(dim4());
        y_out    = createEmptyArray<float>(dim4());
        resp_out = createEmptyArray<float>(dim4());
        return 0;
    } else {
        x_out    = createDeviceDataArray<float>(dim4(corners_out), (void*)((*x_corners)()));
        y_out    = createDeviceDataArray<float>(dim4(corners_out), (void*)((*y_corners)()));
        resp_out = createDeviceDataArray<float>(dim4(corners_out), (void*)((*resp_corners)()));
        return corners_out;
    }
}
Example #9
0
 void copyData(T *to, const Array<T> &from)
 {
     if(from.isOwner()) {
         // FIXME: Check for errors / exceptions
         memcpy(to, from.get(), from.elements()*sizeof(T));
     } else {
         stridedCopy<T>(to, from.get(), from.dims(), from.strides(), from.ndims() - 1);
     }
 }
Example #10
0
static void assign(Array<Tout>& out, const vector<af_seq> seqs,
                   const Array<Tin>& in) {
    size_t ndims      = seqs.size();
    const dim4& outDs = out.dims();
    const dim4& iDims = in.dims();

    if (iDims.elements() == 0) return;

    out.eval();

    dim4 oDims = toDims(seqs, outDs);

    bool isVec = true;
    for (int i = 0; isVec && i < (int)oDims.ndims() - 1; i++) {
        isVec &= oDims[i] == 1;
    }

    isVec &= in.isVector() || in.isScalar();

    for (dim_t i = ndims; i < (int)in.ndims(); i++) { oDims[i] = 1; }

    if (isVec) {
        if (oDims.elements() != (dim_t)in.elements() && in.elements() != 1) {
            AF_ERROR("Size mismatch between input and output", AF_ERR_SIZE);
        }

        // If both out and in are vectors of equal elements,
        // reshape in to out dims
        Array<Tin> in_ =
            in.elements() == 1 ? tile(in, oDims) : modDims(in, oDims);
        auto dst = createSubArray<Tout>(out, seqs, false);

        copyArray<Tin, Tout>(dst, in_);
    } else {
        for (int i = 0; i < AF_MAX_DIMS; i++) {
            if (oDims[i] != iDims[i])
                AF_ERROR("Size mismatch between input and output", AF_ERR_SIZE);
        }
        Array<Tout> dst = createSubArray<Tout>(out, seqs, false);

        copyArray<Tin, Tout>(dst, in);
    }
}
Example #11
0
SparseArray<T> sparseConvertDenseToCOO(const Array<T> &in)
{
    in.eval();

    Array<uint> nonZeroIdx_ = where<T>(in);
    Array<int> nonZeroIdx = cast<int, uint>(nonZeroIdx_);

    dim_t nNZ = nonZeroIdx.elements();

    Array<int> constNNZ = createValueArray<int>(dim4(nNZ), nNZ);
    constNNZ.eval();

    Array<int> rowIdx = arithOp<int, af_mod_t>(nonZeroIdx, constNNZ, nonZeroIdx.dims());
    Array<int> colIdx = arithOp<int, af_div_t>(nonZeroIdx, constNNZ, nonZeroIdx.dims());

    Array<T> values = copyArray<T>(in);
    values.modDims(dim4(values.elements()));
    values = lookup<T, int>(values, nonZeroIdx, 0);

    return createArrayDataSparseArray<T>(in.dims(), values, rowIdx, colIdx, AF_STORAGE_COO);
}
Example #12
0
unsigned susan(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out,
               const Array<T> &in,
               const unsigned radius, const float diff_thr, const float geom_thr,
               const float feature_ratio, const unsigned edge)
{
    in.eval();

    dim4 idims = in.dims();
    const unsigned corner_lim = in.elements() * feature_ratio;

    auto x_corners    = createEmptyArray<float>(dim4(corner_lim));
    auto y_corners    = createEmptyArray<float>(dim4(corner_lim));
    auto resp_corners = createEmptyArray<float>(dim4(corner_lim));
    auto response     = createEmptyArray<T>(dim4(in.elements()));
    auto corners_found= std::shared_ptr<unsigned>(memAlloc<unsigned>(1).release(), memFree<unsigned>);
    corners_found.get()[0] = 0;

    getQueue().enqueue(kernel::susan_responses<T>, response, in, idims[0], idims[1],
                       radius, diff_thr, geom_thr, edge);
    getQueue().enqueue(kernel::non_maximal<T>, x_corners, y_corners, resp_corners, corners_found,
                       idims[0], idims[1], response, edge, corner_lim);
    getQueue().sync();

    const unsigned corners_out = min((corners_found.get())[0], corner_lim);
    if (corners_out == 0) {
        x_out    = createEmptyArray<float>(dim4());
        y_out    = createEmptyArray<float>(dim4());
        resp_out = createEmptyArray<float>(dim4());
        return 0;
    } else {
        x_out = x_corners;
        y_out = y_corners;
        resp_out = resp_corners;
        x_out.resetDims(dim4(corners_out));
        y_out.resetDims(dim4(corners_out));
        resp_out.resetDims(dim4(corners_out));
        return corners_out;
    }
}
Example #13
0
fg::Histogram* setup_histogram(const af_array in, const double minval, const double maxval)
{
    Array<T> histogramInput = getArray<T>(in);
    dim_t nBins = histogramInput.elements();

    T freqMax = detail::reduce_all<af_max_t, T, T>(histogramInput);

    /* retrieve Forge Histogram with nBins and array type */
    ForgeManager& fgMngr = ForgeManager::getInstance();
    fg::Histogram* hist = fgMngr.getHistogram(nBins, getGLType<T>());
    /* set histogram bar colors to orange */
    hist->setBarColor(0.929f, 0.486f, 0.2745f);
    /* set x axis limits to maximum and minimum values of data
     * and y axis limits to range [0, nBins]*/
    hist->setAxesLimits(maxval, minval, double(freqMax), 0.0f);
    hist->setAxesTitles("Bins", "Frequency");

    copy_histogram<T>(histogramInput, hist);

    return hist;
}
Example #14
0
Array<in_t> lookup(const Array<in_t> &input,
                   const Array<idx_t> &indices, const unsigned dim)
{
    const dim4 iDims = input.dims();

    dim4 oDims(1);
    for (int d=0; d<4; ++d)
        oDims[d] = (d==int(dim) ? indices.elements() : iDims[d]);

    Array<in_t> out = createEmptyArray<in_t>(oDims);

    dim_t nDims = iDims.ndims();

    switch(dim) {
        case 0: kernel::lookup<in_t, idx_t, 0>(out, input, indices, nDims); break;
        case 1: kernel::lookup<in_t, idx_t, 1>(out, input, indices, nDims); break;
        case 2: kernel::lookup<in_t, idx_t, 2>(out, input, indices, nDims); break;
        case 3: kernel::lookup<in_t, idx_t, 3>(out, input, indices, nDims); break;
    }

    return out;
}
Example #15
0
static To corrcoef(const af_array& X, const af_array& Y)
{
    Array<To> xIn = cast<To>(getArray<Ti>(X));
    Array<To> yIn = cast<To>(getArray<Ti>(Y));

    dim4 dims = xIn.dims();
    dim_t n= xIn.elements();

    To xSum = detail::reduce_all<af_add_t, To, To>(xIn);
    To ySum = detail::reduce_all<af_add_t, To, To>(yIn);

    Array<To> xSq = detail::arithOp<To, af_mul_t>(xIn, xIn, dims);
    Array<To> ySq = detail::arithOp<To, af_mul_t>(yIn, yIn, dims);
    Array<To> xy  = detail::arithOp<To, af_mul_t>(xIn, yIn, dims);

    To xSqSum = detail::reduce_all<af_add_t, To, To>(xSq);
    To ySqSum = detail::reduce_all<af_add_t, To, To>(ySq);
    To xySum  = detail::reduce_all<af_add_t, To, To>(xy);

    To result = (n*xySum - xSum*ySum)/(sqrt(n*xSqSum-xSum*xSum)*sqrt(n*ySqSum-ySum*ySum));

    return result;
}
Example #16
0
    Array<uint> where(const Array<T> &in)
    {
        const dim_t *dims    = in.dims().get();
        const dim_t *strides = in.strides().get();
        static const T zero = scalar<T>(0);

        const T *iptr = in.get();
        uint *out_vec  = memAlloc<uint>(in.elements());

        dim_t count = 0;
        dim_t idx = 0;
        for (dim_t w = 0; w < dims[3]; w++) {
            uint offw = w * strides[3];

            for (dim_t z = 0; z < dims[2]; z++) {
                uint offz = offw + z * strides[2];

                for (dim_t y = 0; y < dims[1]; y++) {
                    uint offy = y * strides[1] + offz;

                    for (dim_t x = 0; x < dims[0]; x++) {

                        T val = iptr[offy + x];
                        if (val != zero) {
                            out_vec[count] = idx;
                            count++;
                        }
                        idx++;
                    }
                }
            }
        }

        Array<uint> out = createHostDataArray(dim4(count), out_vec);
        memFree<uint>(out_vec);
        return out;
    }
Example #17
0
static af_array hist_equal(const af_array& in, const af_array& hist)
{
    const Array<T> input = getArray<T>(in);

    af_array vInput = 0;
    AF_CHECK(af_flat(&vInput, in));

    Array<float> fHist  = cast<float>(getArray<hType>(hist));

    dim4 hDims = fHist.dims();
    dim_t grayLevels = fHist.elements();

    Array<float> cdf = scan<af_add_t, float, float>(fHist, 0);

    float minCdf = reduce_all<af_min_t, float, float>(cdf);
    float maxCdf = reduce_all<af_max_t, float, float>(cdf);
    float factor = (float)(grayLevels-1)/(maxCdf - minCdf);

    // constant array of min value from cdf
    Array<float> minCnst = createValueArray<float>(hDims, minCdf);
    // constant array of factor variable
    Array<float> facCnst = createValueArray<float>(hDims, factor);
    // cdf(i) - min for all elements
    Array<float> diff    = arithOp<float, af_sub_t>(cdf, minCnst, hDims);
    // multiply factor with difference
    Array<float> normCdf = arithOp<float, af_mul_t>(diff, facCnst, hDims);
    // index input array with normalized cdf array
    Array<float> idxArr  = lookup<float, T>(normCdf, getArray<T>(vInput), 0);

    Array<T> result = cast<T>(idxArr);
    result.modDims(input.dims());

    AF_CHECK(af_release_array(vInput));

    return getHandle<T>(result);
}
Example #18
0
To mean_all(Param in)
{
    int in_elements = in.info.dims[0] * in.info.dims[1] * in.info.dims[2] * in.info.dims[3];

    // FIXME: Use better heuristics to get to the optimum number
    if (in_elements > 4096) {
        bool is_linear = (in.info.strides[0] == 1);
        for (int k = 1; k < 4; k++) {
            is_linear &= (in.info.strides[k] == (in.info.strides[k - 1] * in.info.dims[k - 1]));
        }

        if (is_linear) {
            in.info.dims[0] = in_elements;
            for (int k = 1; k < 4; k++) {
                in.info.dims[k] = 1;
                in.info.strides[k] = in_elements;
            }
        }

        uint threads_x = nextpow2(std::max(32u, (uint)in.info.dims[0]));
        threads_x = std::min(threads_x, THREADS_PER_GROUP);
        uint threads_y = THREADS_PER_GROUP / threads_x;

        uint groups_x = divup(in.info.dims[0], threads_x * REPEAT);
        uint groups_y = divup(in.info.dims[1], threads_y);

        Array<To> tmpOut = createEmptyArray<To>(groups_x);
        Array<Tw> tmpCt = createEmptyArray<Tw>(groups_x);
        Param iWt;

        mean_first_launcher<Ti, Tw, To>(tmpOut, tmpCt, in, iWt, threads_x, groups_x, groups_y);

        vector<To> h_ptr(tmpOut.elements());
        vector<Tw> h_cptr(tmpOut.elements());

        getQueue().enqueueReadBuffer(*tmpOut.get(), CL_TRUE, 0, sizeof(To) * tmpOut.elements(), h_ptr.data());
        getQueue().enqueueReadBuffer(*tmpCt.get(),  CL_TRUE, 0, sizeof(Tw) * tmpCt.elements(), h_cptr.data());

        MeanOp<To, Tw> Op(h_ptr[0], h_cptr[0]);
        for (int i = 1; i < (int)h_ptr.size(); i++) {
            Op(h_ptr[i], h_cptr[i]);
        }

        return Op.runningMean;
    } else {
        vector<Ti> h_ptr(in_elements);

        getQueue().enqueueReadBuffer(*in.data, CL_TRUE, sizeof(Ti) * in.info.offset,
                                     sizeof(Ti) * in_elements, h_ptr.data());

        //TODO : MeanOp with (Tw)1
        Transform<Ti, To, af_add_t> transform;
        Transform<uint, Tw, af_add_t> transform_weight;
        MeanOp<To, Tw> Op(transform(h_ptr[0]), transform_weight(1));
        for (int i = 1; i < (int)in_elements; i++) {
            Op(transform(h_ptr[i]), transform_weight(1));
        }

        return Op.runningMean;
    }
}
Array<outType> match_template(const Array<inType> &sImg, const Array<inType> &tImg)
{
    const dim4 sDims = sImg.dims();
    const dim4 tDims = tImg.dims();
    const dim4 sStrides = sImg.strides();
    const dim4 tStrides = tImg.strides();

    const dim_t tDim0  = tDims[0];
    const dim_t tDim1  = tDims[1];
    const dim_t sDim0  = sDims[0];
    const dim_t sDim1  = sDims[1];

    Array<outType> out = createEmptyArray<outType>(sDims);
    const dim4 oStrides = out.strides();

    outType tImgMean = outType(0);
    dim_t winNumElements = tImg.elements();
    bool needMean = mType==AF_ZSAD || mType==AF_LSAD ||
                    mType==AF_ZSSD || mType==AF_LSSD ||
                    mType==AF_ZNCC;
    const inType * tpl = tImg.get();

    if (needMean) {
        for(dim_t tj=0; tj<tDim1; tj++) {
            dim_t tjStride = tj*tStrides[1];

            for(dim_t ti=0; ti<tDim0; ti++) {
                tImgMean += (outType)tpl[tjStride+ti*tStrides[0]];
            }
        }
        tImgMean /= winNumElements;
    }

    outType * dst      = out.get();
    const inType * src = sImg.get();

    for(dim_t b3=0; b3<sDims[3]; ++b3) {
    for(dim_t b2=0; b2<sDims[2]; ++b2) {

        // slide through image window after window
        for(dim_t sj=0; sj<sDim1; sj++) {

            dim_t ojStride = sj*oStrides[1];

            for(dim_t si=0; si<sDim0; si++) {
                outType disparity = outType(0);

                // mean for window
                // this variable will be used based on mType value
                outType wImgMean = outType(0);
                if (needMean) {
                    for(dim_t tj=0,j=sj; tj<tDim1; tj++, j++) {
                        dim_t jStride = j*sStrides[1];

                        for(dim_t ti=0, i=si; ti<tDim0; ti++, i++) {
                            inType sVal = ((j<sDim1 && i<sDim0) ?
                                    src[jStride + i*sStrides[0]] : inType(0));
                            wImgMean += (outType)sVal;
                        }
                    }
                    wImgMean /= winNumElements;
                }

                // run the window match metric
                for(dim_t tj=0,j=sj; tj<tDim1; tj++, j++) {
                    dim_t jStride = j*sStrides[1];
                    dim_t tjStride = tj*tStrides[1];

                    for(dim_t ti=0, i=si; ti<tDim0; ti++, i++) {
                        inType sVal = ((j<sDim1 && i<sDim0) ?
                                            src[jStride + i*sStrides[0]] : inType(0));
                        inType tVal = tpl[tjStride+ti*tStrides[0]];
                        outType temp;
                        switch(mType) {
                            case AF_SAD:
                                disparity += fabs((outType)sVal-(outType)tVal);
                                break;
                            case AF_ZSAD:
                                disparity += fabs((outType)sVal - wImgMean -
                                                  (outType)tVal + tImgMean);
                                break;
                            case AF_LSAD:
                                disparity += fabs((outType)sVal-(wImgMean/tImgMean)*tVal);
                                break;
                            case AF_SSD:
                                disparity += ((outType)sVal-(outType)tVal)*((outType)sVal-(outType)tVal);
                                break;
                            case AF_ZSSD:
                                temp = ((outType)sVal - wImgMean - (outType)tVal + tImgMean);
                                disparity += temp*temp;
                                break;
                            case AF_LSSD:
                                temp = ((outType)sVal-(wImgMean/tImgMean)*tVal);
                                disparity += temp*temp;
                                break;
                            case AF_NCC:
                                //TODO: furture implementation
                                break;
                            case AF_ZNCC:
                                //TODO: furture implementation
                                break;
                            case AF_SHD:
                                //TODO: furture implementation
                                break;
                        }
                    }
                }
                // output is just created, hence not doing the
                // extra multiplication for 0th dim stride
                dst[ojStride + si] = disparity;
            }
        }
        src += sStrides[2];
        dst += oStrides[2];
    }
        src += sStrides[3];
        dst += oStrides[3];
    }

    return out;
}
forge::Chart* setup_surface(const forge::Window* const window,
                            const af_array xVals, const af_array yVals, const af_array zVals,
                            const af_cell* const props)
{
    Array<T> xIn = getArray<T>(xVals);
    Array<T> yIn = getArray<T>(yVals);
    Array<T> zIn = getArray<T>(zVals);

    const ArrayInfo& Xinfo = getInfo(xVals);
    const ArrayInfo& Yinfo = getInfo(yVals);
    const ArrayInfo& Zinfo = getInfo(zVals);

    af::dim4 X_dims = Xinfo.dims();
    af::dim4 Y_dims = Yinfo.dims();
    af::dim4 Z_dims = Zinfo.dims();

    if(Xinfo.isVector()){
        // Convert xIn is a column vector
        xIn = modDims(xIn, xIn.elements());
        // Now tile along second dimension
        dim4 x_tdims(1, Y_dims[0], 1, 1);
        xIn = tile(xIn, x_tdims);

        // Convert yIn to a row vector
        yIn= modDims(yIn, af::dim4(1, yIn.elements()));
        // Now tile along first dimension
        dim4 y_tdims(X_dims[0], 1, 1, 1);
        yIn = tile(yIn, y_tdims);
    }

    // Flatten xIn, yIn and zIn into row vectors
    dim4 rowDims = dim4(1, zIn.elements());
    xIn = modDims(xIn, rowDims);
    yIn = modDims(yIn, rowDims);
    zIn = modDims(zIn, rowDims);

    // Now join along first dimension, skip reorder
    std::vector<Array<T> > inputs{xIn, yIn, zIn};
    Array<T> Z = join(0, inputs);

    ForgeManager& fgMngr = ForgeManager::getInstance();

    // Get the chart for the current grid position (if any)
    forge::Chart* chart = NULL;
    if (props->col>-1 && props->row>-1)
        chart = fgMngr.getChart(window, props->row, props->col, FG_CHART_3D);
    else
        chart = fgMngr.getChart(window, 0, 0, FG_CHART_3D);

    forge::Surface* surface = fgMngr.getSurface(chart, Z_dims[0], Z_dims[1], getGLType<T>());

    surface->setColor(0.0, 1.0, 0.0, 1.0);

    // If chart axes limits do not have a manual override
    // then compute and set axes limits
    if(!fgMngr.getChartAxesOverride(chart)) {
        float cmin[3], cmax[3];
        T     dmin[3], dmax[3];
        chart->getAxesLimits(&cmin[0], &cmax[0], &cmin[1], &cmax[1], &cmin[2], &cmax[2]);
        dmin[0] = reduce_all<af_min_t, T, T>(xIn);
        dmax[0] = reduce_all<af_max_t, T, T>(xIn);
        dmin[1] = reduce_all<af_min_t, T, T>(yIn);
        dmax[1] = reduce_all<af_max_t, T, T>(yIn);
        dmin[2] = reduce_all<af_min_t, T, T>(zIn);
        dmax[2] = reduce_all<af_max_t, T, T>(zIn);

        if(cmin[0] == 0 && cmax[0] == 0
        && cmin[1] == 0 && cmax[1] == 0
        && cmin[2] == 0 && cmax[2] == 0) {
            // No previous limits. Set without checking
            cmin[0] = step_round(dmin[0], false);
            cmax[0] = step_round(dmax[0], true);
            cmin[1] = step_round(dmin[1], false);
            cmax[1] = step_round(dmax[1], true);
            cmin[2] = step_round(dmin[2], false);
            cmax[2] = step_round(dmax[2], true);
        } else {
            if(cmin[0] > dmin[0]) cmin[0] = step_round(dmin[0], false);
            if(cmax[0] < dmax[0]) cmax[0] = step_round(dmax[0], true);
            if(cmin[1] > dmin[1]) cmin[1] = step_round(dmin[1], false);
            if(cmax[1] < dmax[1]) cmax[1] = step_round(dmax[1], true);
            if(cmin[2] > dmin[2]) cmin[2] = step_round(dmin[2], false);
            if(cmax[2] < dmax[2]) cmax[2] = step_round(dmax[2], true);
        }

        chart->setAxesLimits(cmin[0], cmax[0], cmin[1], cmax[1], cmin[2], cmax[2]);
    }

    copy_surface<T>(Z, surface);

    return chart;
}
Example #21
0
void sortByKeyBatched(Array<Tk> okey, Array<Tv> oval, const int dim, bool isAscending)
{
    af::dim4 inDims = okey.dims();

    af::dim4 tileDims(1);
    af::dim4 seqDims = inDims;
    tileDims[dim] = inDims[dim];
    seqDims[dim] = 1;

    uint* key = memAlloc<uint>(inDims.elements());
    // IOTA
    {
        af::dim4 dims    = inDims;
        uint* out        = key;
        af::dim4 strides(1);
        for(int i = 1; i < 4; i++)
            strides[i] = strides[i-1] * dims[i-1];

        for(dim_t w = 0; w < dims[3]; w++) {
            dim_t offW = w * strides[3];
            uint okeyW = (w % seqDims[3]) * seqDims[0] * seqDims[1] * seqDims[2];
            for(dim_t z = 0; z < dims[2]; z++) {
                dim_t offWZ = offW + z * strides[2];
                uint okeyZ = okeyW + (z % seqDims[2]) * seqDims[0] * seqDims[1];
                for(dim_t y = 0; y < dims[1]; y++) {
                    dim_t offWZY = offWZ + y * strides[1];
                    uint okeyY = okeyZ + (y % seqDims[1]) * seqDims[0];
                    for(dim_t x = 0; x < dims[0]; x++) {
                        dim_t id = offWZY + x;
                        out[id] = okeyY + (x % seqDims[0]);
                    }
                }
            }
        }
    }

    // initialize original index locations
    Tk *okey_ptr = okey.get();
    Tv *oval_ptr = oval.get();

    typedef KeyIndexPair<Tk, Tv> CurrentTuple;
    size_t size = okey.elements();
    size_t bytes = okey.elements() * sizeof(CurrentTuple);
    CurrentTuple *tupleKeyValIdx = (CurrentTuple *)memAlloc<char>(bytes);

    for(unsigned i = 0; i < size; i++) {
        tupleKeyValIdx[i] = std::make_tuple(okey_ptr[i], oval_ptr[i], key[i]);
    }

    memFree(key); // key is no longer required

    if(isAscending) {
      std::stable_sort(tupleKeyValIdx, tupleKeyValIdx + size, KIPCompareV<Tk, Tv, true>());
    }
    else {
      std::stable_sort(tupleKeyValIdx, tupleKeyValIdx + size, KIPCompareV<Tk, Tv, false>());
    }

    std::stable_sort(tupleKeyValIdx, tupleKeyValIdx + size, KIPCompareK<Tk, Tv, true>());

    for(unsigned x = 0; x < okey.elements(); x++) {
        okey_ptr[x] = std::get<0>(tupleKeyValIdx[x]);
        oval_ptr[x] = std::get<1>(tupleKeyValIdx[x]);
    }

    memFree((char *)tupleKeyValIdx);
    return;
}
Example #22
0
unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out,
                const Array<T> &in, const unsigned max_corners, const float min_response,
                const float sigma, const unsigned filter_len, const float k_thr)
{
    dim4 idims = in.dims();

    // Window filter
    convAccT* h_filter = memAlloc<convAccT>(filter_len);
    // Decide between rectangular or circular filter
    if (sigma < 0.5f) {
        for (unsigned i = 0; i < filter_len; i++)
            h_filter[i] = (T)1.f / (filter_len);
    }
    else {
        gaussian1D<convAccT>(h_filter, (int)filter_len, sigma);
    }
    Array<convAccT> filter = createDeviceDataArray<convAccT>(dim4(filter_len), (const void*)h_filter);

    unsigned border_len = filter_len / 2 + 1;

    Array<T> ix = createEmptyArray<T>(idims);
    Array<T> iy = createEmptyArray<T>(idims);

    // Compute first order derivatives
    gradient<T>(iy, ix, in);

    Array<T> ixx = createEmptyArray<T>(idims);
    Array<T> ixy = createEmptyArray<T>(idims);
    Array<T> iyy = createEmptyArray<T>(idims);

    // Compute second-order derivatives
    second_order_deriv<T>(ixx.get(), ixy.get(), iyy.get(),
                          in.elements(), ix.get(), iy.get());

    // Convolve second-order derivatives with proper window filter
    ixx = convolve2<T, convAccT, false>(ixx, filter, filter);
    ixy = convolve2<T, convAccT, false>(ixy, filter, filter);
    iyy = convolve2<T, convAccT, false>(iyy, filter, filter);

    const unsigned corner_lim = in.elements() * 0.2f;

    float* x_corners = memAlloc<float>(corner_lim);
    float* y_corners = memAlloc<float>(corner_lim);
    float* resp_corners = memAlloc<float>(corner_lim);

    T* resp = memAlloc<T>(in.elements());

    // Calculate Harris responses for all pixels
    harris_responses<T>(resp,
                        idims[0], idims[1],
                        ixx.get(), ixy.get(), iyy.get(),
                        k_thr, border_len);

    const unsigned min_r = (max_corners > 0) ? 0.f : min_response;
    unsigned corners_found = 0;

    // Performs non-maximal suppression
    non_maximal<T>(x_corners, y_corners, resp_corners, &corners_found,
                   idims[0], idims[1], resp, min_r, border_len, corner_lim);

    memFree(resp);

    const unsigned corners_out = (max_corners > 0) ?
                                 min(corners_found, max_corners) :
                                 min(corners_found, corner_lim);
    if (corners_out == 0)
        return 0;

    if (max_corners > 0 && corners_found > corners_out) {
        Array<float> harris_responses = createDeviceDataArray<float>(dim4(corners_found), (void*)resp_corners);
        Array<float> harris_sorted = createEmptyArray<float>(dim4(corners_found));
        Array<unsigned> harris_idx = createEmptyArray<unsigned>(dim4(corners_found));

        // Sort Harris responses
        sort_index<float, false>(harris_sorted, harris_idx, harris_responses, 0);

        x_out = createEmptyArray<float>(dim4(corners_out));
        y_out = createEmptyArray<float>(dim4(corners_out));
        resp_out = createEmptyArray<float>(dim4(corners_out));

        // Keep only the corners with higher Harris responses
        keep_corners(x_out.get(), y_out.get(), resp_out.get(),
                     x_corners, y_corners, harris_sorted.get(), harris_idx.get(),
                     corners_out);

        memFree(x_corners);
        memFree(y_corners);
    }
    else if (max_corners == 0 && corners_found < corner_lim) {
        x_out = createEmptyArray<float>(dim4(corners_out));
        y_out = createEmptyArray<float>(dim4(corners_out));
        resp_out = createEmptyArray<float>(dim4(corners_out));

        memcpy(x_out.get(), x_corners, corners_out * sizeof(float));
        memcpy(y_out.get(), y_corners, corners_out * sizeof(float));
        memcpy(resp_out.get(), resp_corners, corners_out * sizeof(float));

        memFree(x_corners);
        memFree(y_corners);
        memFree(resp_corners);
    }
    else {
        x_out = createDeviceDataArray<float>(dim4(corners_out), (void*)x_corners);
        y_out = createDeviceDataArray<float>(dim4(corners_out), (void*)y_corners);
        resp_out = createDeviceDataArray<float>(dim4(corners_out), (void*)resp_corners);
    }

    return corners_out;
}
Example #23
0
T mean_all_weighted(Param in, Param inWeight)
{
    int in_elements = in.info.dims[0] * in.info.dims[1] * in.info.dims[2] * in.info.dims[3];

    // FIXME: Use better heuristics to get to the optimum number
    if (in_elements > 4096) {

        bool in_is_linear = (in.info.strides[0] == 1);
        bool wt_is_linear = (in.info.strides[0] == 1);
        for (int k = 1; k < 4; k++) {
            in_is_linear &= ( in.info.strides[k] == ( in.info.strides[k - 1] *  in.info.dims[k - 1]));
            wt_is_linear &= (inWeight.info.strides[k] == (inWeight.info.strides[k - 1] * inWeight.info.dims[k - 1]));
        }

        if (in_is_linear && wt_is_linear) {
            in.info.dims[0] = in_elements;
            for (int k = 1; k < 4; k++) {
                in.info.dims[k] = 1;
                in.info.strides[k] = in_elements;
            }
            inWeight.info = in.info;
        }

        uint threads_x = nextpow2(std::max(32u, (uint)in.info.dims[0]));
        threads_x = std::min(threads_x, THREADS_PER_GROUP);
        uint threads_y = THREADS_PER_GROUP / threads_x;

        uint groups_x = divup(in.info.dims[0], threads_x * REPEAT);
        uint groups_y = divup(in.info.dims[1], threads_y);

        Array<T> tmpOut = createEmptyArray<T>(groups_x);
        Array<Tw> tmpWeight = createEmptyArray<Tw>(groups_x);

        mean_first_launcher<T, Tw, T>(tmpOut, tmpWeight, in, inWeight, threads_x, groups_x, groups_y);

        vector<T> h_ptr(tmpOut.elements());
        vector<Tw> h_wptr(tmpWeight.elements());

        getQueue().enqueueReadBuffer(*tmpOut.get(), CL_TRUE, 0, sizeof(T) * tmpOut.elements(), h_ptr.data());
        getQueue().enqueueReadBuffer(*tmpWeight.get(),  CL_TRUE, 0, sizeof(Tw) * tmpWeight.elements(), h_wptr.data());

        MeanOp<T, Tw> Op(h_ptr[0], h_wptr[0]);
        for (int i = 1; i < (int)tmpOut.elements(); i++) {
            Op(h_ptr[i], h_wptr[i]);
        }

        return Op.runningMean;

    } else {

        vector<T> h_ptr(in_elements);
        vector<Tw> h_wptr(in_elements);

        getQueue().enqueueReadBuffer(*in.data, CL_TRUE, sizeof(T) * in.info.offset,
                                     sizeof(T) * in_elements, h_ptr.data());
        getQueue().enqueueReadBuffer(*inWeight.data, CL_TRUE, sizeof(Tw) * inWeight.info.offset,
                                     sizeof(Tw) * in_elements, h_wptr.data());

        MeanOp<T, Tw> Op(h_ptr[0], h_wptr[0]);
        for (int i = 1; i < (int)in_elements; i++) {
            Op(h_ptr[i], h_wptr[i]);
        }

        return Op.runningMean;
    }
}
Example #24
0
static outType stdev(const af_array& in)
{
    Array<inType> _in       = getArray<inType>(in);
    Array<outType> input    = cast<outType>(_in);
    Array<outType> meanCnst = createValueArray<outType>(input.dims(), mean<inType, outType>(_in));
    Array<outType> diff     = detail::arithOp<outType, af_sub_t>(input, meanCnst, input.dims());
    Array<outType> diffSq   = detail::arithOp<outType, af_mul_t>(diff, diff, diff.dims());
    outType result          = division(reduce_all<af_add_t, outType, outType>(diffSq), input.elements());

    return sqrt(result);
}