__device__ typename thrust::detail::enable_if< bound <= groupsize * grainsize >::type scatter_if(bulk::bounded< bound, bulk::concurrent_group<bulk::agent<grainsize>,groupsize> > &g, RandomAccessIterator1 first, RandomAccessIterator1 last, RandomAccessIterator2 map, RandomAccessIterator3 stencil, RandomAccessIterator4 result) { typedef typename bulk::bounded< bound, bulk::concurrent_group<bulk::agent<grainsize>,groupsize> >::size_type size_type; typedef typename bulk::concurrent_group<bulk::agent<grainsize>,groupsize>::agent_type agent_type; size_type n = last - first; size_type tid = g.this_exec.index(); // avoid branches when possible if(n == bound) { for(size_type i = 0; i < g.this_exec.grainsize(); ++i) { size_type idx = g.size() * i + tid; if(stencil[idx]) { result[map[idx]] = first[idx]; } // end if } // end for } // end if else if(n < bound) { for(size_type i = 0; i < g.this_exec.grainsize(); ++i) { size_type idx = g.size() * i + tid; if(idx < (last - first) && stencil[idx]) { result[map[idx]] = first[idx]; } // end if } // end for } // end if g.wait(); } // end scatter_if()
__device__ typename thrust::detail::enable_if< (bound <= groupsize * grainsize), RandomAccessIterator2 >::type copy_n(bulk::bounded< bound, concurrent_group< agent<grainsize>, groupsize > > &g, RandomAccessIterator1 first, Size n, RandomAccessIterator2 result) { typedef bounded< bound, concurrent_group< agent<grainsize>, groupsize > > group_type; typedef typename group_type::size_type size_type; size_type tid = g.this_exec.index(); typedef typename thrust::iterator_value<RandomAccessIterator1>::type value_type; // XXX make this an uninitialized array value_type stage[grainsize]; // avoid conditional accesses when possible if(groupsize * grainsize <= n) { for(size_type i = 0; i < grainsize; ++i) { size_type src_idx = g.size() * i + tid; stage[i] = first[src_idx]; } // end for i for(size_type i = 0; i < grainsize; ++i) { size_type dst_idx = g.size() * i + tid; result[dst_idx] = stage[i]; } // end for i } // end if else { for(size_type i = 0; i < grainsize; ++i) { size_type src_idx = g.size() * i + tid; if(src_idx < n) { stage[i] = first[src_idx]; } // end if } // end for for(size_type i = 0; i < grainsize; ++i) { size_type dst_idx = g.size() * i + tid; if(dst_idx < n) { result[dst_idx] = stage[i]; } // end if } // end for } // end else g.wait(); return result + thrust::min<Size>(g.size() * grainsize, n); } // end copy_n()