__device__ T destructive_accumulate_n(ConcurrentGroup &g, RandomAccessIterator first, Size n, T init, BinaryFunction binary_op) { typedef typename ConcurrentGroup::size_type size_type; size_type tid = g.this_exec.index(); T x = init; if(tid < n) { x = first[tid]; } g.wait(); for(size_type offset = 1; offset < g.size(); offset += offset) { if(tid >= offset && tid - offset < n) { x = binary_op(first[tid - offset], x); } g.wait(); if(tid < n) { first[tid] = x; } g.wait(); } return binary_op(init, first[n - 1]); }
__forceinline__ __device__ RandomAccessIterator2 simple_copy_n(ConcurrentGroup &g, RandomAccessIterator1 first, Size n, RandomAccessIterator2 result) { for(Size i = g.this_exec.index(); i < n; i += g.size()) { result[i] = first[i]; } // end for i g.wait(); return result + n; } // end simple_copy_n()
__device__ T destructive_reduce_n(ConcurrentGroup &g, RandomAccessIterator first, Size n, T init, BinaryFunction binary_op) { typedef int size_type; size_type tid = g.this_exec.index(); Size m = n; while(m > 1) { Size half_m = m >> 1; if(tid < half_m) { T old_val = first[tid]; first[tid] = binary_op(old_val, first[m - tid - 1]); } // end if g.wait(); m -= half_m; } // end while g.wait(); T result = init; if(n > 0) { result = binary_op(result,first[0]); } // end if g.wait(); return result; } // end destructive_reduce_n()