示例#1
0
__device__ T destructive_accumulate_n(ConcurrentGroup &g, RandomAccessIterator first, Size n, T init, BinaryFunction binary_op)
{
  typedef typename ConcurrentGroup::size_type size_type;

  size_type tid = g.this_exec.index();

  T x = init;
  if(tid < n)
  {
    x = first[tid];
  }

  g.wait();

  for(size_type offset = 1; offset < g.size(); offset += offset)
  {
    if(tid >= offset && tid - offset < n)
    {
      x = binary_op(first[tid - offset], x);
    }

    g.wait();

    if(tid < n)
    {
      first[tid] = x;
    }

    g.wait();
  }

  return binary_op(init, first[n - 1]);
}
__forceinline__ __device__
RandomAccessIterator2 simple_copy_n(ConcurrentGroup &g, RandomAccessIterator1 first, Size n, RandomAccessIterator2 result)
{
    for(Size i = g.this_exec.index();
            i < n;
            i += g.size())
    {
        result[i] = first[i];
    } // end for i

    g.wait();

    return result + n;
} // end simple_copy_n()
示例#3
0
文件: reduce.hpp 项目: sebas095/CUDA
__device__ T destructive_reduce_n(ConcurrentGroup &g, RandomAccessIterator first, Size n, T init, BinaryFunction binary_op)
{
  typedef int size_type;

  size_type tid = g.this_exec.index();

  Size m = n;

  while(m > 1)
  {
    Size half_m = m >> 1;

    if(tid < half_m)
    {
      T old_val = first[tid];

      first[tid] = binary_op(old_val, first[m - tid - 1]);
    } // end if

    g.wait();

    m -= half_m;
  } // end while

  g.wait();

  T result = init;
  if(n > 0)
  {
    result = binary_op(result,first[0]);
  } // end if

  g.wait();

  return result;
} // end destructive_reduce_n()