Array<T> createSubArray(const Array<T>& parent, const std::vector<af_seq> &index, bool copy) { parent.eval(); dim4 dDims = parent.getDataDims(); dim4 pDims = parent.dims(); dim4 dims = toDims (index, pDims); dim4 offset = toOffset(index, dDims); dim4 stride = toStride (index, dDims); Array<T> out = Array<T>(parent, dims, offset, stride); if (!copy) return out; if (stride[0] != 1 || stride[1] < 0 || stride[2] < 0 || stride[3] < 0) { out = copyArray(out); } return out; }
Array<T> createSubArray(const Array<T> &parent, const vector<af_seq> &index, bool copy) { parent.eval(); dim4 dDims = parent.getDataDims(); dim4 dStrides = calcStrides(dDims); dim4 parent_strides = parent.strides(); if (dStrides != parent_strides) { const Array<T> parentCopy = copyArray(parent); return createSubArray(parentCopy, index, copy); } dim4 pDims = parent.dims(); dim4 dims = toDims(index, pDims); dim4 strides = toStride(index, dDims); // Find total offsets after indexing dim4 offsets = toOffset(index, pDims); dim_t offset = parent.getOffset(); for (int i = 0; i < 4; i++) offset += offsets[i] * parent_strides[i]; Array<T> out = Array<T>(parent, dims, offset, strides); if (!copy) return out; if (strides[0] != 1 || strides[1] < 0 || strides[2] < 0 || strides[3] < 0) { out = copyArray(out); } return out; }
void assign(Param<T> out, af::dim4 dDims, CParam<T> rhs, std::vector<bool> const isSeq, std::vector<af_seq> const seqs, std::vector< CParam<uint> > idxArrs) { af::dim4 pDims = out.dims(); // retrieve dimensions & strides for array to which rhs is being copied to af::dim4 dst_offsets = toOffset(seqs, dDims); af::dim4 dst_strides = toStride(seqs, dDims); // retrieve rhs array dimenesions & strides af::dim4 src_dims = rhs.dims(); af::dim4 src_strides = rhs.strides(); // declare pointers to af_array index data uint const * const ptr0 = idxArrs[0].get(); uint const * const ptr1 = idxArrs[1].get(); uint const * const ptr2 = idxArrs[2].get(); uint const * const ptr3 = idxArrs[3].get(); const T * src= rhs.get(); T * dst = out.get(); for(dim_t l=0; l<src_dims[3]; ++l) { dim_t src_loff = l*src_strides[3]; dim_t dst_lIdx = trimIndex(isSeq[3] ? l+dst_offsets[3] : ptr3[l], pDims[3]); dim_t dst_loff = dst_lIdx * dst_strides[3]; for(dim_t k=0; k<src_dims[2]; ++k) { dim_t src_koff = k*src_strides[2]; dim_t dst_kIdx = trimIndex(isSeq[2] ? k+dst_offsets[2] : ptr2[k], pDims[2]); dim_t dst_koff = dst_kIdx * dst_strides[2]; for(dim_t j=0; j<src_dims[1]; ++j) { dim_t src_joff = j*src_strides[1]; dim_t dst_jIdx = trimIndex(isSeq[1] ? j+dst_offsets[1] : ptr1[j], pDims[1]); dim_t dst_joff = dst_jIdx * dst_strides[1]; for(dim_t i=0; i<src_dims[0]; ++i) { dim_t src_ioff = i*src_strides[0]; dim_t src_idx = src_ioff + src_joff + src_koff + src_loff; dim_t dst_iIdx = trimIndex(isSeq[0] ? i+dst_offsets[0] : ptr0[i], pDims[0]); dim_t dst_ioff = dst_iIdx * dst_strides[0]; dim_t dst_idx = dst_ioff + dst_joff + dst_koff + dst_loff; dst[dst_idx] = src[src_idx]; } } } } }
Array<T> index(const Array<T>& in, const af_index_t idxrs[]) { kernel::IndexKernelParam_t p; std::vector<af_seq> seqs(4, af_span); // create seq vector to retrieve output // dimensions, offsets & offsets for (dim_t x=0; x<4; ++x) { if (idxrs[x].isSeq) { seqs[x] = idxrs[x].idx.seq; } } // retrieve dimensions, strides and offsets dim4 iDims = in.dims(); dim4 dDims = in.getDataDims(); dim4 oDims = toDims (seqs, iDims); dim4 iOffs = toOffset(seqs, dDims); dim4 iStrds= toStride(seqs, dDims); for (dim_t i=0; i<4; ++i) { p.isSeq[i] = idxrs[i].isSeq; p.offs[i] = iOffs[i]; p.strds[i] = iStrds[i]; } Buffer* bPtrs[4]; std::vector< Array<uint> > idxArrs(4, createEmptyArray<uint>(dim4())); // look through indexs to read af_array indexs for (dim_t x=0; x<4; ++x) { // set index pointers were applicable if (!p.isSeq[x]) { idxArrs[x] = castArray<uint>(idxrs[x].idx.arr); bPtrs[x] = idxArrs[x].get(); // set output array ith dimension value oDims[x] = idxArrs[x].elements(); } else { // alloc an 1-element buffer to avoid OpenCL from failing bPtrs[x] = bufferAlloc(sizeof(uint)); } } Array<T> out = createEmptyArray<T>(oDims); if(oDims.elements() == 0) { return out; } kernel::index<T>(out, in, p, bPtrs); for (dim_t x=0; x<4; ++x) { if (p.isSeq[x]) bufferFree(bPtrs[x]); } return out; }
void index(Array<T> out, Array<T> const in, std::vector<bool> const isSeq, std::vector<af_seq> const seqs, std::vector< Array<uint> > const idxArrs) { const af::dim4 iDims = in.dims(); const af::dim4 dDims = in.getDataDims(); const af::dim4 iOffs = toOffset(seqs, dDims); const af::dim4 iStrds = toStride(seqs, dDims); const af::dim4 oDims = out.dims(); const af::dim4 oStrides = out.strides(); const T *src = in.get(); T *dst = out.get(); const uint* ptr0 = idxArrs[0].get(); const uint* ptr1 = idxArrs[1].get(); const uint* ptr2 = idxArrs[2].get(); const uint* ptr3 = idxArrs[3].get(); for (dim_t l=0; l<oDims[3]; ++l) { dim_t lOff = l*oStrides[3]; dim_t inIdx3 = trimIndex(isSeq[3] ? l+iOffs[3] : ptr3[l], iDims[3]); dim_t inOff3 = inIdx3*iStrds[3]; for (dim_t k=0; k<oDims[2]; ++k) { dim_t kOff = k*oStrides[2]; dim_t inIdx2 = trimIndex(isSeq[2] ? k+iOffs[2] : ptr2[k], iDims[2]); dim_t inOff2 = inIdx2*iStrds[2]; for (dim_t j=0; j<oDims[1]; ++j) { dim_t jOff = j*oStrides[1]; dim_t inIdx1 = trimIndex(isSeq[1] ? j+iOffs[1] : ptr1[j], iDims[1]); dim_t inOff1 = inIdx1*iStrds[1]; for (dim_t i=0; i<oDims[0]; ++i) { dim_t iOff = i*oStrides[0]; dim_t inIdx0 = trimIndex(isSeq[0] ? i+iOffs[0] : ptr0[i], iDims[0]); dim_t inOff0 = inIdx0*iStrds[0]; dst[lOff+kOff+jOff+iOff] = src[inOff3+inOff2+inOff1+inOff0]; } } } } }