Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second) { first.eval(); second.eval(); // All dimensions except join dimension must be equal // Compute output dims af::dim4 odims; af::dim4 fdims = first.dims(); af::dim4 sdims = second.dims(); for(int i = 0; i < 4; i++) { if(i == dim) { odims[i] = fdims[i] + sdims[i]; } else { odims[i] = fdims[i]; } } Array<Tx> out = createEmptyArray<Tx>(odims); getQueue().enqueue(kernel::join<Tx, Ty>, out, dim, first, second); return out; }
Array<T> sort(const Array<T> &in, const unsigned dim, bool isAscending) { try { Array<T> out = copyArray<T>(in); switch(dim) { case 0: kernel::sort0<T>(out, isAscending); break; case 1: kernel::sortBatched<T, 1>(out, isAscending); break; case 2: kernel::sortBatched<T, 2>(out, isAscending); break; case 3: kernel::sortBatched<T, 3>(out, isAscending); break; default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED); } if(dim != 0) { af::dim4 preorderDims = out.dims(); af::dim4 reorderDims(0, 1, 2, 3); reorderDims[dim] = 0; preorderDims[0] = out.dims()[dim]; for(int i = 1; i <= (int)dim; i++) { reorderDims[i - 1] = i; preorderDims[i] = out.dims()[i - 1]; } out.setDataDims(preorderDims); out = reorder<T>(out, reorderDims); } return out; } catch (std::exception &ex) { AF_ERROR(ex.what(), AF_ERR_INTERNAL); } }
Array<T> iir(const Array<T> &b, const Array<T> &a, const Array<T> &x) { try { AF_BATCH_KIND type = x.ndims() == 1 ? AF_BATCH_NONE : AF_BATCH_SAME; if (x.ndims() != b.ndims()) { type = (x.ndims() < b.ndims()) ? AF_BATCH_RHS : AF_BATCH_LHS; } // Extract the first N elements Array<T> c = convolve<T, T, 1, true>(x, b, type); dim4 cdims = c.dims(); cdims[0] = x.dims()[0]; c.resetDims(cdims); int num_a = a.dims()[0]; if (num_a == 1) return c; dim4 ydims = c.dims(); Array<T> y = createEmptyArray<T>(ydims); if (a.ndims() > 1) { kernel::iir<T, true>(y, c, a); } else { kernel::iir<T, false>(y, c, a); } return y; } catch (cl::Error &err) { CL_TO_AF_ERROR(err); } }
Array<Ty> approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1, const af_interp_type method, const float offGrid) { af::dim4 odims = pos0.dims(); odims[2] = in.dims()[2]; odims[3] = in.dims()[3]; // Create output placeholder Array<Ty> out = createEmptyArray<Ty>(odims); switch(method) { case AF_INTERP_NEAREST: case AF_INTERP_LOWER: kernel::approx2<Ty, Tp, 1> (out, in, pos0, pos1, offGrid, method); break; case AF_INTERP_LINEAR: case AF_INTERP_BILINEAR: case AF_INTERP_LINEAR_COSINE: case AF_INTERP_BILINEAR_COSINE: kernel::approx2<Ty, Tp, 2> (out, in, pos0, pos1, offGrid, method); break; case AF_INTERP_CUBIC: case AF_INTERP_BICUBIC: case AF_INTERP_CUBIC_SPLINE: case AF_INTERP_BICUBIC_SPLINE: kernel::approx2<Ty, Tp, 3> (out, in, pos0, pos1, offGrid, method); break; default: break; } return out; }
Array<Ty> *approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1, const af_interp_type method, const float offGrid) { af::dim4 odims = in.dims(); odims[0] = pos0.dims()[0]; odims[1] = pos0.dims()[1]; // Create output placeholder Array<Ty> *out = createEmptyArray<Ty>(odims); switch(method) { case AF_INTERP_NEAREST: approx2_<Ty, Tp, AF_INTERP_NEAREST> (out->get(), out->dims(), out->elements(), in.get(), in.dims(), in.elements(), pos0.get(), pos0.dims(), pos1.get(), pos1.dims(), out->strides(), in.strides(), pos0.strides(), pos1.strides(), offGrid); break; case AF_INTERP_LINEAR: approx2_<Ty, Tp, AF_INTERP_LINEAR> (out->get(), out->dims(), out->elements(), in.get(), in.dims(), in.elements(), pos0.get(), pos0.dims(), pos1.get(), pos1.dims(), out->strides(), in.strides(), pos0.strides(), pos1.strides(), offGrid); break; default: break; } return out; }
Array<Ty> approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &pos1, const af_interp_type method, const float offGrid) { if ((std::is_same<Ty, double>::value || std::is_same<Ty, cdouble>::value) && !isDoubleSupported(getActiveDeviceId())) { OPENCL_NOT_SUPPORTED(); } af::dim4 odims = pos0.dims(); odims[2] = in.dims()[2]; odims[3] = in.dims()[3]; // Create output placeholder Array<Ty> out = createEmptyArray<Ty>(odims); switch(method) { case AF_INTERP_NEAREST: kernel::approx2<Ty, Tp, AF_INTERP_NEAREST>(out, in, pos0, pos1, offGrid); break; case AF_INTERP_LINEAR: kernel::approx2<Ty, Tp, AF_INTERP_LINEAR> (out, in, pos0, pos1, offGrid); break; default: break; } return out; }
Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<accT> const& r_filter) { const dim4 cfDims = c_filter.dims(); const dim4 rfDims = r_filter.dims(); const dim_t cfLen= cfDims.elements(); const dim_t rfLen= rfDims.elements(); const dim4 sDims = signal.dims(); dim4 tDims = sDims; dim4 oDims = sDims; if (expand) { tDims[0] += cfLen - 1; oDims[0] += cfLen - 1; oDims[1] += rfLen - 1; } Array<T> temp= createEmptyArray<T>(tDims); Array<T> out = createEmptyArray<T>(oDims); kernel::convolve2<T, accT, 0, expand>(temp, signal, c_filter); kernel::convolve2<T, accT, 1, expand>(out, temp, r_filter); return out; }
Array<T> generalSolve(const Array<T> &a, const Array<T> &b) { dim4 iDims = a.dims(); int M = iDims[0]; int N = iDims[1]; int MN = std::min(M, N); std::vector<int> ipiv(MN); Array<T> A = copyArray<T>(a); Array<T> B = copyArray<T>(b); cl::Buffer *A_buf = A.get(); int info = 0; magma_getrf_gpu<T>(M, N, (*A_buf)(), A.getOffset(), A.strides()[1], &ipiv[0], getQueue()(), &info); cl::Buffer *B_buf = B.get(); int K = B.dims()[1]; magma_getrs_gpu<T>(MagmaNoTrans, M, K, (*A_buf)(), A.getOffset(), A.strides()[1], &ipiv[0], (*B_buf)(), B.getOffset(), B.strides()[1], getQueue()(), &info); return B; }
void histogram(Array<OutT> out, Array<InT> const in, unsigned const nbins, double const minval, double const maxval) { dim4 const outDims = out.dims(); float const step = (maxval - minval)/(float)nbins; dim4 const inDims = in.dims(); dim4 const iStrides = in.strides(); dim4 const oStrides = out.strides(); dim_t const nElems = inDims[0]*inDims[1]; OutT *outData = out.get(); const InT* inData= in.get(); for(dim_t b3 = 0; b3 < outDims[3]; b3++) { for(dim_t b2 = 0; b2 < outDims[2]; b2++) { for(dim_t i=0; i<nElems; i++) { int idx = IsLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]); int bin = (int)((inData[idx] - minval) / step); bin = std::max(bin, 0); bin = std::min(bin, (int)(nbins - 1)); outData[bin]++; } inData += iStrides[2]; outData += oStrides[2]; } } }
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot, const Array<T> &b, const af_mat_prop options) { if(OpenCLCPUOffload()) { return cpu::solveLU(A, pivot, b, options); } int N = A.dims()[0]; int NRHS = b.dims()[1]; std::vector<int> ipiv(N); copyData(&ipiv[0], pivot); Array< T > B = copyArray<T>(b); const cl::Buffer *A_buf = A.get(); cl::Buffer *B_buf = B.get(); int info = 0; magma_getrs_gpu<T>(MagmaNoTrans, N, NRHS, (*A_buf)(), A.getOffset(), A.strides()[1], &ipiv[0], (*B_buf)(), B.getOffset(), B.strides()[1], getQueue()(), &info); return B; }
void select(Array<T> out, const Array<char> cond, const Array<T> a, const Array<T> b) { af::dim4 adims = a.dims(); af::dim4 astrides = a.strides(); af::dim4 bdims = b.dims(); af::dim4 bstrides = b.strides(); af::dim4 cdims = cond.dims(); af::dim4 cstrides = cond.strides(); af::dim4 odims = out.dims(); af::dim4 ostrides = out.strides(); bool is_a_same[] = {adims[0] == odims[0], adims[1] == odims[1], adims[2] == odims[2], adims[3] == odims[3]}; bool is_b_same[] = {bdims[0] == odims[0], bdims[1] == odims[1], bdims[2] == odims[2], bdims[3] == odims[3]}; bool is_c_same[] = {cdims[0] == odims[0], cdims[1] == odims[1], cdims[2] == odims[2], cdims[3] == odims[3]}; const T *aptr = a.get(); const T *bptr = b.get(); T *optr = out.get(); const char *cptr = cond.get(); for (int l = 0; l < odims[3]; l++) { int o_off3 = ostrides[3] * l; int a_off3 = astrides[3] * is_a_same[3] * l; int b_off3 = bstrides[3] * is_b_same[3] * l; int c_off3 = cstrides[3] * is_c_same[3] * l; for (int k = 0; k < odims[2]; k++) { int o_off2 = ostrides[2] * k + o_off3; int a_off2 = astrides[2] * is_a_same[2] * k + a_off3; int b_off2 = bstrides[2] * is_b_same[2] * k + b_off3; int c_off2 = cstrides[2] * is_c_same[2] * k + c_off3; for (int j = 0; j < odims[1]; j++) { int o_off1 = ostrides[1] * j + o_off2; int a_off1 = astrides[1] * is_a_same[1] * j + a_off2; int b_off1 = bstrides[1] * is_b_same[1] * j + b_off2; int c_off1 = cstrides[1] * is_c_same[1] * j + c_off2; for (int i = 0; i < odims[0]; i++) { bool cval = is_c_same[0] ? cptr[c_off1 + i] : cptr[c_off1]; T aval = is_a_same[0] ? aptr[a_off1 + i] : aptr[a_off1]; T bval = is_b_same[0] ? bptr[b_off1 + i] : bptr[b_off1]; T oval = cval ? aval : bval; optr[o_off1 + i] = oval; } } } } }
void nearest_neighbour_(Array<uint>& idx, Array<To>& dist, const Array<T>& query, const Array<T>& train, const uint dist_dim, const uint n_dist) { uint sample_dim = (dist_dim == 0) ? 1 : 0; const dim4 qDims = query.dims(); const dim4 tDims = train.dims(); if (n_dist > 1) { CPU_NOT_SUPPORTED(); } const unsigned distLength = qDims[dist_dim]; const unsigned nQuery = qDims[sample_dim]; const unsigned nTrain = tDims[sample_dim]; const dim4 outDims(n_dist, nQuery); idx = createEmptyArray<uint>(outDims); dist = createEmptyArray<To >(outDims); const T* qPtr = query.get(); const T* tPtr = train.get(); uint* iPtr = idx.get(); To* dPtr = dist.get(); dist_op<T, To, dist_type> op; for (unsigned i = 0; i < nQuery; i++) { To best_dist = limit_max<To>(); unsigned best_idx = 0; for (unsigned j = 0; j < nTrain; j++) { To local_dist = 0; for (unsigned k = 0; k < distLength; k++) { size_t qIdx, tIdx; if (sample_dim == 0) { qIdx = k * qDims[0] + i; tIdx = k * tDims[0] + j; } else { qIdx = i * qDims[0] + k; tIdx = j * tDims[0] + k; } local_dist += op(qPtr[qIdx], tPtr[tIdx]); } if (local_dist < best_dist) { best_dist = local_dist; best_idx = j; } } size_t oIdx; oIdx = i; iPtr[oIdx] = best_idx; dPtr[oIdx] = best_dist; } }
Array<T> diagCreate(const Array<T> &in, const int num) { int size = in.dims()[0] + std::abs(num); int batch = in.dims()[1]; Array<T> out = createEmptyArray<T>(dim4(size, size, batch)); const T *iptr = in.get(); T *optr = out.get(); for (int k = 0; k < batch; k++) { for (int j = 0; j < size; j++) { for (int i = 0; i < size; i++) { T val = scalar<T>(0); if (i == j - num) { val = (num > 0) ? iptr[i] : iptr[j]; } optr[i + j * out.strides()[1]] = val; } } optr += out.strides()[2]; iptr += in.strides()[1]; } return out; }
static af_array stdev(const af_array& in, int dim) { Array<inType> _in = getArray<inType>(in); Array<outType> input = cast<outType>(_in); dim4 iDims = input.dims(); Array<outType> meanArr = mean<inType, outType>(_in, dim); /* now tile meanArr along dim and use it for variance computation */ dim4 tileDims(1); tileDims[dim] = iDims[dim]; Array<outType> tMeanArr = detail::tile<outType>(meanArr, tileDims); /* now mean array is ready */ Array<outType> diff = detail::arithOp<outType, af_sub_t>(input, tMeanArr, tMeanArr.dims()); Array<outType> diffSq = detail::arithOp<outType, af_mul_t>(diff, diff, diff.dims()); Array<outType> redDiff = reduce<af_add_t, outType, outType>(diffSq, dim); dim4 oDims = redDiff.dims(); Array<outType> divArr = createValueArray<outType>(oDims, scalar<outType>(iDims[dim])); Array<outType> varArr = detail::arithOp<outType, af_div_t>(redDiff, divArr, redDiff.dims()); Array<outType> result = detail::unaryOp<outType, af_sqrt_t>(varArr); return getHandle<outType>(result); }
Array<T> convolve(Array<T> const& signal, Array<accT> const& filter, AF_BATCH_KIND kind) { const dim4 sDims = signal.dims(); const dim4 fDims = filter.dims(); dim4 oDims(1); if (expand) { for(dim_t d=0; d<4; ++d) { if (kind==AF_BATCH_NONE || kind==AF_BATCH_RHS) { oDims[d] = sDims[d]+fDims[d]-1; } else { oDims[d] = (d<baseDim ? sDims[d]+fDims[d]-1 : sDims[d]); } } } else { oDims = sDims; if (kind==AF_BATCH_RHS) { for (dim_t i=baseDim; i<4; ++i) oDims[i] = fDims[i]; } } Array<T> out = createEmptyArray<T>(oDims); kernel::convolve_nd<T, accT, baseDim, expand>(out, signal, filter, kind); return out; }
void morph3d(Array<T> out, Array<T> const in, Array<T> const mask) { const af::dim4 dims = in.dims(); const af::dim4 window = mask.dims(); const dim_t R0 = window[0]/2; const dim_t R1 = window[1]/2; const dim_t R2 = window[2]/2; const af::dim4 istrides = in.strides(); const af::dim4 fstrides = mask.strides(); const dim_t bCount = dims[3]; const af::dim4 ostrides = out.strides(); T* outData = out.get(); const T* inData = in.get(); const T* filter = mask.get(); for(dim_t batchId=0; batchId<bCount; ++batchId) { // either channels or batch is handled by outer most loop for(dim_t k=0; k<dims[2]; ++k) { // k steps along 3rd dimension for(dim_t j=0; j<dims[1]; ++j) { // j steps along 2nd dimension for(dim_t i=0; i<dims[0]; ++i) { // i steps along 1st dimension T filterResult = inData[ getIdx(istrides, i, j, k) ]; // wk, wj,wi steps along 2nd & 1st dimensions of filter window respectively for(dim_t wk=0; wk<window[2]; wk++) { for(dim_t wj=0; wj<window[1]; wj++) { for(dim_t wi=0; wi<window[0]; wi++) { dim_t offk = k+wk-R2; dim_t offj = j+wj-R1; dim_t offi = i+wi-R0; T maskValue = filter[ getIdx(fstrides, wi, wj, wk) ]; if ((maskValue > (T)0) && offi>=0 && offj>=0 && offk>=0 && offi<dims[0] && offj<dims[1] && offk<dims[2]) { T inValue = inData[ getIdx(istrides, offi, offj, offk) ]; if (IsDilation) filterResult = std::max(filterResult, inValue); else filterResult = std::min(filterResult, inValue); } } // window 1st dimension loop ends here } // window 1st dimension loop ends here }// filter window loop ends here outData[ getIdx(ostrides, i, j, k) ] = filterResult; } //1st dimension loop ends here } // 2nd dimension loop ends here } // 3rd dimension loop ends here // next iteration will be next batch if any outData += ostrides[3]; inData += istrides[3]; } }
void fft_inplace(Array<T> &in) { verifySupported<rank>(in.dims()); size_t tdims[4], istrides[4]; computeDims(tdims , in.dims()); computeDims(istrides, in.strides()); clfftPlanHandle plan; int batch = 1; for (int i = rank; i < 4; i++) { batch *= tdims[i]; } find_clfft_plan(plan, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, (clfftDim)rank, tdims, istrides, istrides[rank], istrides, istrides[rank], (clfftPrecision)Precision<T>::type, batch); cl_mem imem = (*in.get())(); cl_command_queue queue = getQueue()(); CLFFT_CHECK(clfftEnqueueTransform(plan, direction ? CLFFT_FORWARD : CLFFT_BACKWARD, 1, &queue, 0, NULL, NULL, &imem, &imem, NULL)); }
Array<T> diagExtract(const Array<T> &in, const int num) { const dim_t *idims = in.dims().get(); dim_t size = std::max(idims[0], idims[1]) - std::abs(num); Array<T> out = createEmptyArray<T>(dim4(size, 1, idims[2], idims[3])); const dim_t *odims = out.dims().get(); const int i_off = (num > 0) ? (num * in.strides()[1]) : (-num); for (int l = 0; l < (int)odims[3]; l++) { for (int k = 0; k < (int)odims[2]; k++) { const T *iptr = in.get() + l * in.strides()[3] + k * in.strides()[2] + i_off; T *optr = out.get() + l * out.strides()[3] + k * out.strides()[2]; for (int i = 0; i < (int)odims[0]; i++) { T val = scalar<T>(0); if (i < idims[0] && i < idims[1]) val = iptr[i * in.strides()[1] + i]; optr[i] = val; } } } return out; }
Array<T> morph(const Array<T> &in, const Array<T> &mask) { const dim4 dims = in.dims(); const dim4 window = mask.dims(); const dim_t R0 = window[0]/2; const dim_t R1 = window[1]/2; const dim4 istrides = in.strides(); const dim4 fstrides = mask.strides(); Array<T> out = createEmptyArray<T>(dims); const dim4 ostrides = out.strides(); T* outData = out.get(); const T* inData = in.get(); const T* filter = mask.get(); for(dim_t b3=0; b3<dims[3]; ++b3) { for(dim_t b2=0; b2<dims[2]; ++b2) { // either channels or batch is handled by outer most loop for(dim_t j=0; j<dims[1]; ++j) { // j steps along 2nd dimension for(dim_t i=0; i<dims[0]; ++i) { // i steps along 1st dimension T filterResult = inData[ getIdx(istrides, i, j) ]; // wj,wi steps along 2nd & 1st dimensions of filter window respectively for(dim_t wj=0; wj<window[1]; wj++) { for(dim_t wi=0; wi<window[0]; wi++) { dim_t offj = j+wj-R1; dim_t offi = i+wi-R0; T maskValue = filter[ getIdx(fstrides, wi, wj) ]; if ((maskValue > (T)0) && offi>=0 && offj>=0 && offi<dims[0] && offj<dims[1]) { T inValue = inData[ getIdx(istrides, offi, offj) ]; if (isDilation) filterResult = std::max(filterResult, inValue); else filterResult = std::min(filterResult, inValue); } } // window 1st dimension loop ends here } // filter window loop ends here outData[ getIdx(ostrides, i, j) ] = filterResult; } //1st dimension loop ends here } // 2nd dimension loop ends here // next iteration will be next batch if any outData += ostrides[2]; inData += istrides[2]; } } return out; }
void sort0ByKey(Array<Tk> okey, Array<Tv> oval, bool isAscending) { int higherDims = okey.dims()[1] * okey.dims()[2] * okey.dims()[3]; // TODO Make a better heurisitic if(higherDims > 4) kernel::sortByKeyBatched<Tk, Tv>(okey, oval, 0, isAscending); else kernel::sort0ByKeyIterative<Tk, Tv>(okey, oval, isAscending); }
Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs, af_blas_transpose optLhs, af_blas_transpose optRhs) { initBlas(); clblasTranspose lOpts = toClblasTranspose(optLhs); clblasTranspose rOpts = toClblasTranspose(optRhs); int aRowDim = (lOpts == clblasNoTrans) ? 0 : 1; int aColDim = (lOpts == clblasNoTrans) ? 1 : 0; int bColDim = (rOpts == clblasNoTrans) ? 1 : 0; dim4 lDims = lhs.dims(); dim4 rDims = rhs.dims(); int M = lDims[aRowDim]; int N = rDims[bColDim]; int K = lDims[aColDim]; //FIXME: Leaks on errors. Array<T> out = createEmptyArray<T>(af::dim4(M, N, 1, 1)); auto alpha = scalar<T>(1); auto beta = scalar<T>(0); dim4 lStrides = lhs.strides(); dim4 rStrides = rhs.strides(); clblasStatus err; cl::Event event; if(rDims[bColDim] == 1) { N = lDims[aColDim]; gemv_func<T> gemv; err = gemv( clblasColumnMajor, lOpts, lDims[0], lDims[1], alpha, (*lhs.get())(), lhs.getOffset(), lStrides[1], (*rhs.get())(), rhs.getOffset(), rStrides[0], beta , (*out.get())(), out.getOffset(), 1, 1, &getQueue()(), 0, nullptr, &event()); } else { gemm_func<T> gemm; err = gemm( clblasColumnMajor, lOpts, rOpts, M, N, K, alpha, (*lhs.get())(), lhs.getOffset(), lStrides[1], (*rhs.get())(), rhs.getOffset(), rStrides[1], beta, (*out.get())(), out.getOffset(), out.dims()[0], 1, &getQueue()(), 0, nullptr, &event()); } if(err) { throw runtime_error(std::string("CLBLAS error: ") + std::to_string(err)); } return out; }
void wrap_dim(Array<T> out, const Array<T> in, const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy, const dim_t px, const dim_t py) { const T *inPtr = in.get(); T *outPtr = out.get(); af::dim4 idims = in.dims(); af::dim4 odims = out.dims(); af::dim4 istrides = in.strides(); af::dim4 ostrides = out.strides(); dim_t nx = (odims[0] + 2 * px - wx) / sx + 1; for(dim_t w = 0; w < idims[3]; w++) { for(dim_t z = 0; z < idims[2]; z++) { dim_t cIn = w * istrides[3] + z * istrides[2]; dim_t cOut = w * ostrides[3] + z * ostrides[2]; const T* iptr_ = inPtr + cIn; T* optr= outPtr + cOut; for(dim_t col = 0; col < idims[d]; col++) { // Offset output ptr const T* iptr = iptr_ + col * istrides[d]; // Calculate input window index dim_t winy = (col / nx); dim_t winx = (col % nx); dim_t startx = winx * sx; dim_t starty = winy * sy; dim_t spx = startx - px; dim_t spy = starty - py; // Short cut condition ensuring all values within input dimensions bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 && spy + wy < odims[1]); for(dim_t y = 0; y < wy; y++) { for(dim_t x = 0; x < wx; x++) { dim_t xpad = spx + x; dim_t ypad = spy + y; dim_t iloc = (y * wx + x); if (d == 0) iloc *= istrides[1]; if(cond || (xpad >= 0 && xpad < odims[0] && ypad >= 0 && ypad < odims[1])) { dim_t oloc = (ypad * ostrides[1] + xpad * ostrides[0]); // FIXME: When using threads, atomize this optr[oloc] += iptr[iloc]; } } } } } } }
Array<T> diagCreate(const Array<T> &in, const int num) { int size = in.dims()[0] + std::abs(num); int batch = in.dims()[1]; Array<T> out = createEmptyArray<T>(dim4(size, size, batch)); kernel::diagCreate<T>(out, in, num); return out; }
void copyData(T *to, const Array<T> &from) { if(from.isOwner()) { // FIXME: Check for errors / exceptions memcpy(to, from.get(), from.elements()*sizeof(T)); } else { dim4 ostrides = calcStrides(from.dims()); stridedCopy<T>(to, ostrides, from.get(), from.dims(), from.strides(), from.ndims() - 1); } }
void sort_index(Array<T> &val, Array<uint> &idx, const Array<T> &in, const uint dim) { val = createEmptyArray<T>(in.dims()); idx = createEmptyArray<uint>(in.dims()); switch(dim) { case 0: sort0_index<T, isAscending>(val, idx, in); break; default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED); } }
Array<T> normalizePerType(const Array<T>& in) { Array<float> inFloat = cast<float, T>(in); Array<float> cnst = createValueArray<float>(in.dims(), 1.0 - 1.0e-6f); Array<float> scaled = arithOp<float, af_mul_t>(inFloat, cnst, in.dims()); return cast<T, float>(scaled); }
void ireduce(Array<T> &out, Array<uint> &loc, const Array<T> &in, const int dim) { dim4 odims = in.dims(); odims[dim] = 1; switch (in.ndims()) { case 1: ireduce_dim<op, T, 1>()(out.get(), out.strides(), out.dims(), loc.get(), in.get(), in.strides(), in.dims(), dim); break; case 2: ireduce_dim<op, T, 2>()(out.get(), out.strides(), out.dims(), loc.get(), in.get(), in.strides(), in.dims(), dim); break; case 3: ireduce_dim<op, T, 3>()(out.get(), out.strides(), out.dims(), loc.get(), in.get(), in.strides(), in.dims(), dim); break; case 4: ireduce_dim<op, T, 4>()(out.get(), out.strides(), out.dims(), loc.get(), in.get(), in.strides(), in.dims(), dim); break; } }
Array<To> scan(const Array<Ti>& in, const int dim) { dim4 dims = in.dims(); Array<To> out = createValueArray<To>(dims, 0); switch (in.ndims()) { case 1: scan_dim<op, Ti, To, 1>()(out.get(), out.strides(), out.dims(), in.get(), in.strides(), in.dims(), dim); break; case 2: scan_dim<op, Ti, To, 2>()(out.get(), out.strides(), out.dims(), in.get(), in.strides(), in.dims(), dim); break; case 3: scan_dim<op, Ti, To, 3>()(out.get(), out.strides(), out.dims(), in.get(), in.strides(), in.dims(), dim); break; case 4: scan_dim<op, Ti, To, 4>()(out.get(), out.strides(), out.dims(), in.get(), in.strides(), in.dims(), dim); break; } return out; }
static outType stdev(const af_array& in) { Array<inType> _in = getArray<inType>(in); Array<outType> input = cast<outType>(_in); Array<outType> meanCnst = createValueArray<outType>(input.dims(), mean<inType, outType>(_in)); Array<outType> diff = detail::arithOp<outType, af_sub_t>(input, meanCnst, input.dims()); Array<outType> diffSq = detail::arithOp<outType, af_mul_t>(diff, diff, diff.dims()); outType result = division(reduce_all<af_add_t, outType, outType>(diffSq), input.elements()); return sqrt(result); }
void sort_by_key(Array<Tk> &okey, Array<Tv> &oval, const Array<Tk> &ikey, const Array<Tv> &ival, const uint dim) { okey = createEmptyArray<Tk>(ikey.dims()); oval = createEmptyArray<Tv>(ival.dims()); switch(dim) { case 0: sort0_by_key<Tk, Tv, isAscending>(okey, oval, ikey, ival); break; default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED); } }