예제 #1
0
__device__
typename thrust::detail::enable_if<
bound <= groupsize * grainsize
>::type
scatter_if(bulk::bounded<
           bound,
           bulk::concurrent_group<bulk::agent<grainsize>,groupsize>
           > &g,
           RandomAccessIterator1 first,
           RandomAccessIterator1 last,
           RandomAccessIterator2 map,
           RandomAccessIterator3 stencil,
           RandomAccessIterator4 result)
{
    typedef typename bulk::bounded<
    bound,
    bulk::concurrent_group<bulk::agent<grainsize>,groupsize>
    >::size_type size_type;

    typedef typename bulk::concurrent_group<bulk::agent<grainsize>,groupsize>::agent_type agent_type;

    size_type n = last - first;

    size_type tid = g.this_exec.index();

    // avoid branches when possible
    if(n == bound)
    {
        for(size_type i = 0; i < g.this_exec.grainsize(); ++i)
        {
            size_type idx = g.size() * i + tid;

            if(stencil[idx])
            {
                result[map[idx]] = first[idx];
            } // end if
        } // end for
    } // end if
    else if(n < bound)
    {
        for(size_type i = 0; i < g.this_exec.grainsize(); ++i)
        {
            size_type idx = g.size() * i + tid;

            if(idx < (last - first) && stencil[idx])
            {
                result[map[idx]] = first[idx];
            } // end if
        } // end for
    } // end if

    g.wait();
} // end scatter_if()
__device__
typename thrust::detail::enable_if<
(bound <= groupsize * grainsize),
RandomAccessIterator2
>::type
copy_n(bulk::bounded<
       bound,
       concurrent_group<
       agent<grainsize>,
       groupsize
       >
       > &g,
       RandomAccessIterator1 first,
       Size n,
       RandomAccessIterator2 result)
{
    typedef bounded<
    bound,
    concurrent_group<
    agent<grainsize>,
    groupsize
    >
    > group_type;

    typedef typename group_type::size_type size_type;

    size_type tid = g.this_exec.index();

    typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type;

    // XXX make this an uninitialized array
    value_type stage[grainsize];

    // avoid conditional accesses when possible
    if(groupsize * grainsize <= n)
    {
        for(size_type i = 0; i < grainsize; ++i)
        {
            size_type src_idx = g.size() * i + tid;
            stage[i] = first[src_idx];
        } // end for i

        for(size_type i = 0; i < grainsize; ++i)
        {
            size_type dst_idx = g.size() * i + tid;
            result[dst_idx] = stage[i];
        } // end for i
    } // end if
    else
    {
        for(size_type i = 0; i < grainsize; ++i)
        {
            size_type src_idx = g.size() * i + tid;
            if(src_idx < n)
            {
                stage[i] = first[src_idx];
            } // end if
        } // end for

        for(size_type i = 0; i < grainsize; ++i)
        {
            size_type dst_idx = g.size() * i + tid;
            if(dst_idx < n)
            {
                result[dst_idx] = stage[i];
            } // end if
        } // end for
    } // end else

    g.wait();

    return result + thrust::min<Size>(g.size() * grainsize, n);
} // end copy_n()