Beispiel #1
0
__device__
RandomAccessIterator3 merge(bulk::concurrent_group<bulk::agent<grainsize>,groupsize> &exec,
                            RandomAccessIterator1 first1, RandomAccessIterator1 last1,
                            RandomAccessIterator2 first2, RandomAccessIterator2 last2,
                            RandomAccessIterator3 result,
                            Compare comp)
{
  typedef typename bulk::concurrent_group<bulk::agent<grainsize>,groupsize>::size_type size_type;

  typedef typename thrust::iterator_value<RandomAccessIterator3>::type value_type;

  value_type *buffer = reinterpret_cast<value_type*>(bulk::malloc(exec, exec.size() * exec.grainsize() * sizeof(value_type)));

  size_type chunk_size = exec.size() * exec.this_exec.grainsize();

  size_type n1 = last1 - first1;
  size_type n2 = last2 - first2;

  // avoid the search & loop when possible
  if(n1 + n2 <= chunk_size)
  {
    result = detail::merge_detail::bounded_merge_with_buffer(exec, first1, last1, first2, last2, buffer, result, comp);
  } // end if
  else
  {
    while((first1 < last1) || (first2 < last2))
    {
      size_type n1 = last1 - first1;
      size_type n2 = last2 - first2;

      size_type diag = thrust::min<size_type>(chunk_size, n1 + n2);

      size_type mp = bulk::merge_path(first1, n1, first2, n2, diag, comp);

      result = detail::merge_detail::bounded_merge_with_buffer(exec,
                                                               first1, first1 + mp,
                                                               first2, first2 + diag - mp,
                                                               buffer,
                                                               result,
                                                               comp);

      first1 += mp;
      first2 += diag - mp;
    } // end while
  } // end else

  bulk::free(exec, buffer);

  return result;
} // end merge()
Beispiel #2
0
__device__
void scatter_if(bulk::concurrent_group<bulk::agent<grainsize>,groupsize> &g,
                RandomAccessIterator1 first,
                RandomAccessIterator1 last,
                RandomAccessIterator2 map,
                RandomAccessIterator3 stencil,
                RandomAccessIterator4 result)
{
    typedef typename bulk::concurrent_group<bulk::agent<grainsize>,groupsize>::size_type size_type;

    size_type chunk_size = g.size() * grainsize;

    size_type n = last - first;

    size_type tid = g.this_exec.index();

    // important special case which avoids the expensive for loop below
    if(chunk_size == n)
    {
        for(size_type i = 0; i < grainsize; ++i)
        {
            size_type idx = g.size() * i + tid;

            if(stencil[idx])
            {
                result[map[idx]] = first[idx];
            } // end if
        } // end for
    } // end if
    else if(n < chunk_size)
    {
        for(size_type i = 0; i < grainsize; ++i)
        {
            size_type idx = g.size() * i + tid;

            if(idx < (last - first) && stencil[idx])
            {
                result[map[idx]] = first[idx];
            } // end if
        } // end for
    } // end if
    else
    {
        for(;
                first < last;
                first += chunk_size, map += chunk_size, stencil += chunk_size)
        {
            if((last - first) >= chunk_size)
            {
                // avoid conditional accesses when possible
                for(size_type i = 0; i < grainsize; ++i)
                {
                    size_type idx = g.size() * i + tid;

                    if(stencil[idx])
                    {
                        result[map[idx]] = first[idx];
                    } // end if
                } // end for
            } // end if
            else
            {
                for(size_type i = 0; i < grainsize; ++i)
                {
                    size_type idx = g.size() * i + tid;

                    if(idx < (last - first) && stencil[idx])
                    {
                        result[map[idx]] = first[idx];
                    } // end if
                } // end for
            } // end else
        } // end for
    } // end else

    g.wait();
} // end scatter_if