Array<T> exampleFunction(const Array<T> &a, const Array<T> &b, const af_someenum_t method) { a.eval(); // All input Arrays should call eval mandatorily // in CPU backend function implementations. Since // the cpu fns are asynchronous launches, any Arrays // that are either views/JIT nodes needs to evaluated // before they are passed onto functions that are // enqueued onto the queues. b.eval(); dim4 outputDims; // this should be '= in.dims();' in most cases // but would definitely depend on the type of // algorithm you are implementing. Array<T> out = createEmptyArray<T>(outputDims); // Please use the create***Array<T> helper // functions defined in Array.hpp to create // different types of Arrays. Please check the // file to know what are the different types you // can create. // Enqueue the function call on the worker thread // This code will be present in src/backend/cpu/kernel/exampleFunction.hpp getQueue().enqueue(kernel::exampleFunction<T>, out, a, b, method); return out; // return the result }
void approx1(Array<Ty> &yo, const Array<Ty> &yi, const Array<Tp> &xo, const int xdim, const Tp &xi_beg, const Tp &xi_step, const af_interp_type method, const float offGrid) { yi.eval(); xo.eval(); switch (method) { case AF_INTERP_NEAREST: case AF_INTERP_LOWER: getQueue().enqueue(kernel::approx1<Ty, Tp, 1>, yo, yi, xo, xdim, xi_beg, xi_step, offGrid, method); break; case AF_INTERP_LINEAR: case AF_INTERP_LINEAR_COSINE: getQueue().enqueue(kernel::approx1<Ty, Tp, 2>, yo, yi, xo, xdim, xi_beg, xi_step, offGrid, method); break; case AF_INTERP_CUBIC: case AF_INTERP_CUBIC_SPLINE: getQueue().enqueue(kernel::approx1<Ty, Tp, 3>, yo, yi, xo, xdim, xi_beg, xi_step, offGrid, method); break; default: break; } }
void assign(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs) { out.eval(); rhs.eval(); vector<bool> isSeq(4); vector<af_seq> seqs(4, af_span); // create seq vector to retrieve output dimensions, offsets & offsets for (dim_t x=0; x<4; ++x) { if (idxrs[x].isSeq) { seqs[x] = idxrs[x].idx.seq; } isSeq[x] = idxrs[x].isSeq; } vector< Array<uint> > idxArrs(4, createEmptyArray<uint>(dim4())); // look through indexs to read af_array indexs for (dim_t x=0; x<4; ++x) { if (!isSeq[x]) { idxArrs[x] = castArray<uint>(idxrs[x].idx.arr); idxArrs[x].eval(); } } vector<CParam<uint>> idxParams(idxArrs.begin(), idxArrs.end()); getQueue().enqueue(kernel::assign<T>, out, out.getDataDims(), rhs, move(isSeq), move(seqs), move(idxParams)); }
Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second) { first.eval(); second.eval(); // All dimensions except join dimension must be equal // Compute output dims af::dim4 odims; af::dim4 fdims = first.dims(); af::dim4 sdims = second.dims(); for(int i = 0; i < 4; i++) { if(i == dim) { odims[i] = fdims[i] + sdims[i]; } else { odims[i] = fdims[i]; } } Array<Tx> out = createEmptyArray<Tx>(odims); getQueue().enqueue(kernel::join<Tx, Ty>, out, dim, first, second); return out; }
void nearest_neighbour(Array<uint>& idx, Array<To>& dist, const Array<T>& query, const Array<T>& train, const uint dist_dim, const uint n_dist, const af_match_type dist_type) { if (n_dist > 1) { CPU_NOT_SUPPORTED(); } idx.eval(); dist.eval(); query.eval(); train.eval(); uint sample_dim = (dist_dim == 0) ? 1 : 0; const dim4 qDims = query.dims(); const dim4 outDims(n_dist, qDims[sample_dim]); idx = createEmptyArray<uint>(outDims); dist = createEmptyArray<To >(outDims); switch(dist_type) { case AF_SAD: getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SAD>, idx, dist, query, train, dist_dim, n_dist); break; case AF_SSD: getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SSD>, idx, dist, query, train, dist_dim, n_dist); break; case AF_SHD: getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SHD>, idx, dist, query, train, dist_dim, n_dist); break; default: AF_ERROR("Unsupported dist_type", AF_ERR_NOT_CONFIGURED); } }
Array<outType> padArray(Array<inType> const &in, dim4 const &dims, outType default_value, double factor) { Array<outType> ret = createValueArray<outType>(dims, default_value); ret.eval(); in.eval(); getQueue().enqueue(kernel::copyElemwise<outType, inType>, ret, in, outType(default_value), factor); return ret; }
Array<OutT> match_template(const Array<InT> &sImg, const Array<InT> &tImg) { sImg.eval(); tImg.eval(); Array<OutT> out = createEmptyArray<OutT>(sImg.dims()); getQueue().enqueue(kernel::matchTemplate<OutT, InT, MatchT>, out, sImg, tImg); return out; }
Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs, af_mat_prop optLhs, af_mat_prop optRhs) { lhs.eval(); rhs.eval(); CBLAS_TRANSPOSE lOpts = toCblasTranspose(optLhs); CBLAS_TRANSPOSE rOpts = toCblasTranspose(optRhs); int aRowDim = (lOpts == CblasNoTrans) ? 0 : 1; int aColDim = (lOpts == CblasNoTrans) ? 1 : 0; int bColDim = (rOpts == CblasNoTrans) ? 1 : 0; dim4 lDims = lhs.dims(); dim4 rDims = rhs.dims(); int M = lDims[aRowDim]; int N = rDims[bColDim]; int K = lDims[aColDim]; using BT = typename blas_base<T>::type; using CBT = const typename blas_base<T>::type; Array<T> out = createEmptyArray<T>(af::dim4(M, N, 1, 1)); auto func = [=] (Array<T> output, const Array<T> left, const Array<T> right) { auto alpha = getScale<T, 1>(); auto beta = getScale<T, 0>(); dim4 lStrides = left.strides(); dim4 rStrides = right.strides(); if(rDims[bColDim] == 1) { gemv_func<T>()( CblasColMajor, lOpts, lDims[0], lDims[1], alpha, reinterpret_cast<CBT*>(left.get()), lStrides[1], reinterpret_cast<CBT*>(right.get()), rStrides[0], beta, reinterpret_cast<BT*>(output.get()), 1); } else { gemm_func<T>()( CblasColMajor, lOpts, rOpts, M, N, K, alpha, reinterpret_cast<CBT*>(left.get()), lStrides[1], reinterpret_cast<CBT*>(right.get()), rStrides[1], beta, reinterpret_cast<BT*>(output.get()), output.dims()[0]); } }; getQueue().enqueue(func, out, lhs, rhs); return out; }
Array<char> edgeTrackingByHysteresis(const Array<char>& strong, const Array<char>& weak) { strong.eval(); weak.eval(); Array<char> out = createValueArray<char>(strong.dims(), 0); out.eval(); getQueue().enqueue(kernel::edgeTrackingHysteresis<char>, out, strong, weak); return out; }
Array<float> nonMaximumSuppression(const Array<float>& mag, const Array<float>& gx, const Array<float>& gy) { mag.eval(); gx.eval(); gy.eval(); Array<float> out = createValueArray<float>(mag.dims(), 0); out.eval(); getQueue().enqueue(kernel::nonMaxSuppression<float>, out, mag, gx, gy); return out; }
Array<T> matmul(const common::SparseArray<T> lhs, const Array<T> rhs, af_mat_prop optLhs, af_mat_prop optRhs) { lhs.eval(); rhs.eval(); // Similar Operations to GEMM sparse_operation_t lOpts = toSparseTranspose(optLhs); int lRowDim = (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) ? 0 : 1; static const int rColDim = 1; dim4 lDims = lhs.dims(); dim4 rDims = rhs.dims(); int M = lDims[lRowDim]; int N = rDims[rColDim]; Array<T> out = createValueArray<T>(af::dim4(M, N, 1, 1), scalar<T>(0)); out.eval(); int ldb = rhs.strides()[1]; int ldc = out.strides()[1]; Array<T > values = lhs.getValues(); Array<int> rowIdx = lhs.getRowIdx(); Array<int> colIdx = lhs.getColIdx(); if(rDims[rColDim] == 1) { if (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) { mv<T, false>(out, values, rowIdx, colIdx, rhs, M); } else if (lOpts == SPARSE_OPERATION_TRANSPOSE) { mtv<T, false>(out, values, rowIdx, colIdx, rhs, M); } else if (lOpts == SPARSE_OPERATION_CONJUGATE_TRANSPOSE) { mtv<T, true>(out, values, rowIdx, colIdx, rhs, M); } } else { if (lOpts == SPARSE_OPERATION_NON_TRANSPOSE) { mm<T, false>(out, values, rowIdx, colIdx, rhs, M, N, ldb, ldc); } else if (lOpts == SPARSE_OPERATION_TRANSPOSE) { mtm<T, false>(out, values, rowIdx, colIdx, rhs, M, N, ldb, ldc); } else if (lOpts == SPARSE_OPERATION_CONJUGATE_TRANSPOSE) { mtm<T, true>(out, values, rowIdx, colIdx, rhs, M, N, ldb, ldc); } } return out; }
SparseArray<T> sparseConvertDenseToStorage(const Array<T> &in_) { in_.eval(); uint nNZ = reduce_all<af_notzero_t, T, uint>(in_); SparseArray<T> sparse_ = createEmptySparseArray<T>(in_.dims(), nNZ, AF_STORAGE_CSR); sparse_.eval(); auto func = [=] (SparseArray<T> sparse, const Array<T> in) { Array<T > values = sparse.getValues(); Array<int> rowIdx = sparse.getRowIdx(); Array<int> colIdx = sparse.getColIdx(); kernel::dense_csr<T>()(values, rowIdx, colIdx, in); }; getQueue().enqueue(func, sparse_, in_); if(stype == AF_STORAGE_CSR) return sparse_; else AF_ERROR("CPU Backend only supports Dense to CSR or COO", AF_ERR_NOT_SUPPORTED); return sparse_; }
void sortBatched(Array<T>& val, bool isAscending) { af::dim4 inDims = val.dims(); // Sort dimension af::dim4 tileDims(1); af::dim4 seqDims = inDims; tileDims[dim] = inDims[dim]; seqDims[dim] = 1; Array<uint> key = iota<uint>(seqDims, tileDims); Array<uint> resKey = createEmptyArray<uint>(dim4()); Array<T> resVal = createEmptyArray<T>(dim4()); val.setDataDims(inDims.elements()); key.setDataDims(inDims.elements()); sort_by_key<T, uint>(resVal, resKey, val, key, 0, isAscending); // Needs to be ascending (true) in order to maintain the indices properly sort_by_key<uint, T>(key, val, resKey, resVal, 0, true); val.eval(); val.setDataDims(inDims); // This is correct only for dim0 }
Array<T> triangle(const Array<T> &in) { in.eval(); Array<T> out = createEmptyArray<T>(in.dims()); triangle<T, is_upper, is_unit_diag>(out, in); return out; }
Array<T> createSubArray(const Array<T>& parent, const std::vector<af_seq> &index, bool copy) { parent.eval(); dim4 dDims = parent.getDataDims(); dim4 pDims = parent.dims(); dim4 dims = toDims (index, pDims); dim4 offset = toOffset(index, dDims); dim4 stride = toStride (index, dDims); Array<T> out = Array<T>(parent, dims, offset, stride); if (!copy) return out; if (stride[0] != 1 || stride[1] < 0 || stride[2] < 0 || stride[3] < 0) { out = copyArray(out); } return out; }
Array<int> lu_inplace(Array<T> &in, const bool convert_pivot) { dim4 iDims = in.dims(); int M = iDims[0]; int N = iDims[1]; int *pivotPtr = pinnedAlloc<int>(min(M, N)); T *inPtr = pinnedAlloc<T> (in.elements()); copyData(inPtr, in); getrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N, inPtr, in.strides()[1], pivotPtr); if(convert_pivot) convertPivot(&pivotPtr, M, min(M, N)); writeHostDataArray<T>(in, inPtr, in.elements() * sizeof(T)); Array<int> pivot = createHostDataArray<int>(af::dim4(M), pivotPtr); pivot.eval(); pinnedFree(inPtr); pinnedFree(pivotPtr); return pivot; }
Array<T> sort(const Array<T>& in, const unsigned dim, bool isAscending) { in.eval(); Array<T> out = copyArray<T>(in); switch (dim) { case 0: sort0<T>(out, isAscending); break; case 1: sortBatched<T, 1>(out, isAscending); break; case 2: sortBatched<T, 2>(out, isAscending); break; case 3: sortBatched<T, 3>(out, isAscending); break; default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED); } if (dim != 0) { af::dim4 preorderDims = out.dims(); af::dim4 reorderDims(0, 1, 2, 3); reorderDims[dim] = 0; preorderDims[0] = out.dims()[dim]; for (int i = 1; i <= (int)dim; i++) { reorderDims[i - 1] = i; preorderDims[i] = out.dims()[i - 1]; } out.setDataDims(preorderDims); out = reorder<T>(out, reorderDims); } return out; }
Array<T> createSubArray(const Array<T>& parent, const std::vector<af_seq> &index, bool copy) { parent.eval(); dim4 dDims = parent.getDataDims(); dim4 pDims = parent.dims(); dim4 dims = toDims (index, pDims); dim4 strides = toStride (index, dDims); // Find total offsets after indexing dim4 offsets = toOffset(index, pDims); dim4 parent_strides = parent.strides(); dim_t offset = parent.getOffset(); for (int i = 0; i < 4; i++) offset += offsets[i] * parent_strides[i]; Array<T> out = Array<T>(parent, dims, offset, strides); if (!copy) return out; if (strides[0] != 1 || strides[1] < 0 || strides[2] < 0 || strides[3] < 0) { out = copyArray(out); } return out; }
Array<To> scan(const Array<Ti>& in, const int dim) { dim4 dims = in.dims(); Array<To> out = createEmptyArray<To>(dims); in.eval(); switch (in.ndims()) { case 1: kernel::scan_dim<op, Ti, To, 1> func1; getQueue().enqueue(func1, out, 0, in, 0, dim); break; case 2: kernel::scan_dim<op, Ti, To, 2> func2; getQueue().enqueue(func2, out, 0, in, 0, dim); break; case 3: kernel::scan_dim<op, Ti, To, 3> func3; getQueue().enqueue(func3, out, 0, in, 0, dim); break; case 4: kernel::scan_dim<op, Ti, To, 4> func4; getQueue().enqueue(func4, out, 0, in, 0, dim); break; } return out; }
void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in) { dim4 iDims = in.dims(); int M = iDims[0]; int N = iDims[1]; Array<T> in_copy = copyArray<T>(in); ////////////////////////////////////////// // LU inplace int *pivotPtr = pinnedAlloc<int>(min(M, N)); T *inPtr = pinnedAlloc<T> (in_copy.elements()); copyData(inPtr, in); getrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N, inPtr, in_copy.strides()[1], pivotPtr); convertPivot(&pivotPtr, M, min(M, N)); pivot = createHostDataArray<int>(af::dim4(M), pivotPtr); ////////////////////////////////////////// // SPLIT into lower and upper dim4 ldims(M, min(M, N)); dim4 udims(min(M, N), N); T *lowerPtr = pinnedAlloc<T>(ldims.elements()); T *upperPtr = pinnedAlloc<T>(udims.elements()); dim4 lst(1, ldims[0], ldims[0] * ldims[1], ldims[0] * ldims[1] * ldims[2]); dim4 ust(1, udims[0], udims[0] * udims[1], udims[0] * udims[1] * udims[2]); lu_split<T>(lowerPtr, upperPtr, inPtr, ldims, udims, iDims, lst, ust, in_copy.strides()); lower = createHostDataArray<T>(ldims, lowerPtr); upper = createHostDataArray<T>(udims, upperPtr); lower.eval(); upper.eval(); pinnedFree(lowerPtr); pinnedFree(upperPtr); pinnedFree(pivotPtr); pinnedFree(inPtr); }
void writeHostDataArray(Array<T> &arr, const T *const data, const size_t bytes) { if (!arr.isOwner()) { arr = copyArray<T>(arr); } arr.eval(); // Ensure the memory being written to isnt used anywhere else. getQueue().sync(); memcpy(arr.get(), data, bytes); }
Array<T> dot(const Array<T> &lhs, const Array<T> &rhs, af_mat_prop optLhs, af_mat_prop optRhs) { lhs.eval(); rhs.eval(); Array<T> out = createEmptyArray<T>(af::dim4(1)); if(optLhs == AF_MAT_CONJ && optRhs == AF_MAT_CONJ) { getQueue().enqueue(kernel::dot<T, false, true>, out, lhs, rhs, optLhs, optRhs); } else if (optLhs == AF_MAT_CONJ && optRhs == AF_MAT_NONE) { getQueue().enqueue(kernel::dot<T, true, false>,out, lhs, rhs, optLhs, optRhs); } else if (optLhs == AF_MAT_NONE && optRhs == AF_MAT_CONJ) { getQueue().enqueue(kernel::dot<T, true, false>,out, rhs, lhs, optRhs, optLhs); } else { getQueue().enqueue(kernel::dot<T, false, false>,out, lhs, rhs, optLhs, optRhs); } return out; }
Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims) { in.eval(); Array<Tr> out = createEmptyArray<Tr>(odims); getQueue().enqueue(kernel::fft_c2r<Tr, Tc, rank>, out, in, odims); return out; }
Array<T> rgb2hsv(const Array<T>& in) { in.eval(); Array<T> out = createEmptyArray<T>(in.dims()); getQueue().enqueue(kernel::rgb2hsv<T>, out, in); return out; }
Array<T> medfilt(const Array<T> &in, dim_t w_len, dim_t w_wid) { in.eval(); Array<T> out = createEmptyArray<T>(in.dims()); getQueue().enqueue(kernel::medfilt<T, pad>, out, in, w_len, w_wid); return out; }
void copy_vector_field(const Array<T> &points, const Array<T> &directions, forge::VectorField* vector_field) { points.eval(); directions.eval(); getQueue().sync(); CheckGL("Before CopyArrayToVBO"); glBindBuffer(GL_ARRAY_BUFFER, vector_field->vertices()); glBufferSubData(GL_ARRAY_BUFFER, 0, vector_field->verticesSize(), points.get()); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, vector_field->directions()); glBufferSubData(GL_ARRAY_BUFFER, 0, vector_field->directionsSize(), directions.get()); glBindBuffer(GL_ARRAY_BUFFER, 0); CheckGL("In CopyArrayToVBO"); }
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot, const Array<T> &b, const af_mat_prop options) { A.eval(); pivot.eval(); b.eval(); int N = A.dims()[0]; int NRHS = b.dims()[1]; Array< T > B = copyArray<T>(b); auto func = [=] (Array<T> A, Array<T> B, Array<int> pivot, int N, int NRHS) { getrs_func<T>()(AF_LAPACK_COL_MAJOR, 'N', N, NRHS, A.get(), A.strides()[1], pivot.get(), B.get(), B.strides()[1]); }; getQueue().enqueue(func, A, B, pivot, N, NRHS); return B; }
Array<T> wrap(const Array<T> &in, const dim_t ox, const dim_t oy, const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy, const dim_t px, const dim_t py, const bool is_column) { af::dim4 idims = in.dims(); af::dim4 odims(ox, oy, idims[2], idims[3]); Array<T> out = createValueArray<T>(odims, scalar<T>(0)); out.eval(); in.eval(); if (is_column) { getQueue().enqueue(kernel::wrap_dim<T, 1>, out, in, wx, wy, sx, sy, px, py); } else { getQueue().enqueue(kernel::wrap_dim<T, 0>, out, in, wx, wy, sx, sy, px, py); } return out; }
Array<T> sort(const Array<T> &in, const unsigned dim) { in.eval(); Array<T> out = copyArray<T>(in); switch(dim) { case 0: getQueue().enqueue(kernel::sort0<T, isAscending>, out); break; default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED); } return out; }
Array<Ty> approx2(const Array<Ty> &zi, const Array<Tp> &xo, const int xdim, const Tp &xi_beg, const Tp &xi_step, const Array<Tp> &yo, const int ydim, const Tp &yi_beg, const Tp &yi_step, const af_interp_type method, const float offGrid) { zi.eval(); xo.eval(); yo.eval(); dim4 odims = zi.dims(); odims[xdim] = xo.dims()[xdim]; odims[ydim] = xo.dims()[ydim]; Array<Ty> zo = createEmptyArray<Ty>(odims); switch (method) { case AF_INTERP_NEAREST: case AF_INTERP_LOWER: getQueue().enqueue(kernel::approx2<Ty, Tp, 1>, zo, zi, xo, xdim, xi_beg, xi_step, yo, ydim, yi_beg, yi_step, offGrid, method); break; case AF_INTERP_LINEAR: case AF_INTERP_BILINEAR: case AF_INTERP_LINEAR_COSINE: case AF_INTERP_BILINEAR_COSINE: getQueue().enqueue(kernel::approx2<Ty, Tp, 2>, zo, zi, xo, xdim, xi_beg, xi_step, yo, ydim, yi_beg, yi_step, offGrid, method); break; case AF_INTERP_CUBIC: case AF_INTERP_BICUBIC: case AF_INTERP_CUBIC_SPLINE: case AF_INTERP_BICUBIC_SPLINE: getQueue().enqueue(kernel::approx2<Ty, Tp, 3>, zo, zi, xo, xdim, xi_beg, xi_step, yo, ydim, yi_beg, yi_step, offGrid, method); break; default: break; } return zo; }