示例#1
0
__global__
void AssignKernel(Param<T> out, CParam<T> in, const AssignKernelParam_t p,
                 const int nBBS0, const int nBBS1)
{
    // retrieve index pointers
    // these can be 0 where af_array index is not used
    const uint* ptr0 = p.ptr[0];
    const uint* ptr1 = p.ptr[1];
    const uint* ptr2 = p.ptr[2];
    const uint* ptr3 = p.ptr[3];
    // retrive booleans that tell us which index to use
    const bool s0 = p.isSeq[0];
    const bool s1 = p.isSeq[1];
    const bool s2 = p.isSeq[2];
    const bool s3 = p.isSeq[3];

    const int gz = blockIdx.x/nBBS0;
    const int gw = blockIdx.y/nBBS1;
    const int gx = blockDim.x * (blockIdx.x - gz*nBBS0) + threadIdx.x;
    const int gy = blockDim.y * (blockIdx.y - gw*nBBS1) + threadIdx.y;

    if (gx<in.dims[0] && gy<in.dims[1] && gz<in.dims[2] && gw<in.dims[3]) {
        // calculate pointer offsets for input
        int i = p.strds[0] * trimIndex(s0 ? gx+p.offs[0] : ptr0[gx], out.dims[0]);
        int j = p.strds[1] * trimIndex(s1 ? gy+p.offs[1] : ptr1[gy], out.dims[1]);
        int k = p.strds[2] * trimIndex(s2 ? gz+p.offs[2] : ptr2[gz], out.dims[2]);
        int l = p.strds[3] * trimIndex(s3 ? gw+p.offs[3] : ptr3[gw], out.dims[3]);
        // offset input and output pointers
        const T *src = (const T*)in.ptr + (gx*in.strides[0]+gy*in.strides[1]+ gz*in.strides[2]+gw*in.strides[3]);
        T *dst = (T*)out.ptr +(i+j+k+l);
        // set the output
        dst[0] = src[0];
    }
}
示例#2
0
__global__
void lookupND(Param<in_t> out, CParam<in_t> in, CParam<idx_t> indices,
                    int nBBS0, int nBBS1)
{
    int lx = threadIdx.x;
    int ly = threadIdx.y;

    int gz = blockIdx.x/nBBS0;
    int gw = blockIdx.y/nBBS1;

    int gx = blockDim.x * (blockIdx.x - gz*nBBS0) + lx;
    int gy = blockDim.y * (blockIdx.y - gw*nBBS1) + ly;

    const idx_t *idxPtr = (const idx_t*)indices.ptr;

    int i = in.strides[0]*(dim==0 ? trimIndex((int)idxPtr[gx], in.dims[0]): gx);
    int j = in.strides[1]*(dim==1 ? trimIndex((int)idxPtr[gy], in.dims[1]): gy);
    int k = in.strides[2]*(dim==2 ? trimIndex((int)idxPtr[gz], in.dims[2]): gz);
    int l = in.strides[3]*(dim==3 ? trimIndex((int)idxPtr[gw], in.dims[3]): gw);

    const in_t *inPtr = (const in_t*)in.ptr + (i+j+k+l);
    in_t *outPtr = (in_t*)out.ptr +(gx*out.strides[0]+gy*out.strides[1]+
                                    gz*out.strides[2]+gw*out.strides[3]);

    if (gx<out.dims[0] && gy<out.dims[1] && gz<out.dims[2] && gw<out.dims[3]) {
        outPtr[0] = inPtr[0];
    }
}
示例#3
0
void assign(Param<T> out, af::dim4 dDims,
            CParam<T> rhs, std::vector<bool> const isSeq,
            std::vector<af_seq> const seqs, std::vector< CParam<uint> > idxArrs)
{
    af::dim4 pDims = out.dims();
    // retrieve dimensions & strides for array to which rhs is being copied to
    af::dim4 dst_offsets = toOffset(seqs, dDims);
    af::dim4 dst_strides = toStride(seqs, dDims);
    // retrieve rhs array dimenesions & strides
    af::dim4 src_dims    = rhs.dims();
    af::dim4 src_strides = rhs.strides();
    // declare pointers to af_array index data
    uint const * const ptr0 = idxArrs[0].get();
    uint const * const ptr1 = idxArrs[1].get();
    uint const * const ptr2 = idxArrs[2].get();
    uint const * const ptr3 = idxArrs[3].get();

    const T * src= rhs.get();
    T * dst      = out.get();

    for(dim_t l=0; l<src_dims[3]; ++l) {

        dim_t src_loff = l*src_strides[3];

        dim_t dst_lIdx = trimIndex(isSeq[3] ? l+dst_offsets[3] : ptr3[l], pDims[3]);
        dim_t dst_loff = dst_lIdx * dst_strides[3];

        for(dim_t k=0; k<src_dims[2]; ++k) {

            dim_t src_koff = k*src_strides[2];

            dim_t dst_kIdx = trimIndex(isSeq[2] ? k+dst_offsets[2] : ptr2[k], pDims[2]);
            dim_t dst_koff = dst_kIdx * dst_strides[2];

            for(dim_t j=0; j<src_dims[1]; ++j) {

                dim_t src_joff = j*src_strides[1];

                dim_t dst_jIdx = trimIndex(isSeq[1] ? j+dst_offsets[1] : ptr1[j], pDims[1]);
                dim_t dst_joff = dst_jIdx * dst_strides[1];

                for(dim_t i=0; i<src_dims[0]; ++i) {

                    dim_t src_ioff = i*src_strides[0];
                    dim_t src_idx  = src_ioff + src_joff + src_koff + src_loff;

                    dim_t dst_iIdx = trimIndex(isSeq[0] ? i+dst_offsets[0] : ptr0[i], pDims[0]);
                    dim_t dst_ioff = dst_iIdx * dst_strides[0];
                    dim_t dst_idx  = dst_ioff + dst_joff + dst_koff + dst_loff;

                    dst[dst_idx] = src[src_idx];
                }
            }
        }
    }
}
示例#4
0
void index(Array<T> out, Array<T> const in,
           std::vector<bool> const isSeq, std::vector<af_seq> const seqs,
           std::vector< Array<uint> > const idxArrs)
{
    const af::dim4 iDims    = in.dims();
    const af::dim4 dDims    = in.getDataDims();
    const af::dim4 iOffs    = toOffset(seqs, dDims);
    const af::dim4 iStrds   = toStride(seqs, dDims);
    const af::dim4 oDims    = out.dims();
    const af::dim4 oStrides = out.strides();
    const T *src        = in.get();
    T *dst        = out.get();
    const uint* ptr0    = idxArrs[0].get();
    const uint* ptr1    = idxArrs[1].get();
    const uint* ptr2    = idxArrs[2].get();
    const uint* ptr3    = idxArrs[3].get();

    for (dim_t l=0; l<oDims[3]; ++l) {

        dim_t lOff   = l*oStrides[3];
        dim_t inIdx3 = trimIndex(isSeq[3] ? l+iOffs[3] : ptr3[l], iDims[3]);
        dim_t inOff3 = inIdx3*iStrds[3];

        for (dim_t k=0; k<oDims[2]; ++k) {

            dim_t kOff   = k*oStrides[2];
            dim_t inIdx2 = trimIndex(isSeq[2] ? k+iOffs[2] : ptr2[k], iDims[2]);
            dim_t inOff2 = inIdx2*iStrds[2];

            for (dim_t j=0; j<oDims[1]; ++j) {

                dim_t jOff   = j*oStrides[1];
                dim_t inIdx1 = trimIndex(isSeq[1] ? j+iOffs[1] : ptr1[j], iDims[1]);
                dim_t inOff1 = inIdx1*iStrds[1];

                for (dim_t i=0; i<oDims[0]; ++i) {

                    dim_t iOff   = i*oStrides[0];
                    dim_t inIdx0 = trimIndex(isSeq[0] ? i+iOffs[0] : ptr0[i], iDims[0]);
                    dim_t inOff0 = inIdx0*iStrds[0];

                    dst[lOff+kOff+jOff+iOff] = src[inOff3+inOff2+inOff1+inOff0];
                }
            }
        }
    }
}
示例#5
0
Array<in_t>* arrayIndex(const Array<in_t> &input, const Array<idx_t> &indices, const unsigned dim)
{
    const dim4 iDims = input.dims();
    const dim4 iStrides = input.strides();

    const in_t *inPtr = input.get();
    const idx_t *idxPtr = indices.get();

    dim4 oDims(1);
    for (dim_type d=0; d<4; ++d)
        oDims[d] = (d==int(dim) ? indices.elements() : iDims[d]);

    Array<in_t>* out = createEmptyArray<in_t>(oDims);

    dim4 oStrides = out->strides();

    in_t *outPtr = out->get();

    for (dim_type l=0; l<oDims[3]; ++l) {

        dim_type iLOff = iStrides[3]*(dim==3 ? trimIndex((dim_type)idxPtr[l], iDims[3]): l);
        dim_type oLOff = l*oStrides[3];

        for (dim_type k=0; k<oDims[2]; ++k) {

            dim_type iKOff = iStrides[2]*(dim==2 ? trimIndex((dim_type)idxPtr[k], iDims[2]): k);
            dim_type oKOff = k*oStrides[2];

            for (dim_type j=0; j<oDims[1]; ++j) {

                dim_type iJOff = iStrides[1]*(dim==1 ? trimIndex((dim_type)idxPtr[j], iDims[1]): j);
                dim_type oJOff = j*oStrides[1];

                for (dim_type i=0; i<oDims[0]; ++i) {

                    dim_type iIOff = iStrides[0]*(dim==0 ? trimIndex((dim_type)idxPtr[i], iDims[0]): i);
                    dim_type oIOff = i*oStrides[0];

                    outPtr[oLOff+oKOff+oJOff+oIOff] = inPtr[iLOff+iKOff+iJOff+iIOff];
                }
            }
        }
    }

    return out;
}
示例#6
0
void lookup(Param<InT> out, CParam<InT> input, CParam<IndexT> indices,
            unsigned const dim) {
    const af::dim4 iDims    = input.dims();
    const af::dim4 oDims    = out.dims();
    const af::dim4 iStrides = input.strides();
    const af::dim4 oStrides = out.strides();
    const InT *inPtr        = input.get();
    const IndexT *idxPtr    = indices.get();

    InT *outPtr = out.get();

    for (dim_t l = 0; l < oDims[3]; ++l) {
        dim_t iLOff = iStrides[3] *
                      (dim == 3 ? trimIndex((dim_t)idxPtr[l], iDims[3]) : l);
        dim_t oLOff = l * oStrides[3];

        for (dim_t k = 0; k < oDims[2]; ++k) {
            dim_t iKOff =
                iStrides[2] *
                (dim == 2 ? trimIndex((dim_t)idxPtr[k], iDims[2]) : k);
            dim_t oKOff = k * oStrides[2];

            for (dim_t j = 0; j < oDims[1]; ++j) {
                dim_t iJOff =
                    iStrides[1] *
                    (dim == 1 ? trimIndex((dim_t)idxPtr[j], iDims[1]) : j);
                dim_t oJOff = j * oStrides[1];

                for (dim_t i = 0; i < oDims[0]; ++i) {
                    dim_t iIOff =
                        iStrides[0] *
                        (dim == 0 ? trimIndex((dim_t)idxPtr[i], iDims[0]) : i);
                    dim_t oIOff = i * oStrides[0];

                    outPtr[oLOff + oKOff + oJOff + oIOff] =
                        inPtr[iLOff + iKOff + iJOff + iIOff];
                }
            }
        }
    }
}
示例#7
0
__global__
void lookup1D(Param<in_t> out, CParam<in_t> in, CParam<idx_t> indices, int vDim)
{
    int idx = threadIdx.x + blockIdx.x * THREADS * THRD_LOAD;

    const in_t* inPtr   = (const in_t*)in.ptr;
    const idx_t* idxPtr = (const idx_t*)indices.ptr;

    in_t* outPtr  = (in_t*)out.ptr;

    int en = min(out.dims[vDim], idx + THRD_LOAD * THREADS);

    for (int oIdx = idx; oIdx < en; oIdx += THREADS) {
        int iIdx = trimIndex(idxPtr[oIdx], in.dims[vDim]);
        outPtr[oIdx] = inPtr[iIdx];
    }
}