void sort(vex::vector<T> &x) { auto queue = x.queue_list(); for(unsigned d = 0; d < queue.size(); ++d) { if (x.part_size(d)) { boost::compute::command_queue q( queue[d]() ); boost::compute::buffer buf( x(d).raw() ); boost::compute::sort( boost::compute::make_buffer_iterator<T>(buf, 0), boost::compute::make_buffer_iterator<T>(buf, x.part_size(d)), q ); } } // If there are multiple queues, merge the results on the CPU if (queue.size() > 1) { namespace fusion = boost::fusion; auto key_vectors = fusion::vector_tie(x); auto host_vectors = detail::merge(key_vectors, vex::less<T>()); fusion::for_each( detail::make_zip_view(host_vectors, key_vectors), detail::do_copy() ); } }
void exclusive_scan(const vex::vector<T> &src, vex::vector<T> &dst) { auto queue = src.queue_list(); std::vector<T> tail; /* If there is more than one partition, we need to take a copy the last * element in each partition (except the last) as otherwise information * about it is lost. * * This must be captured here rather than later, in case the input and * output alias. */ if (queue.size() > 1) { tail.resize(queue.size() - 1); for (unsigned d = 0; d < tail.size(); ++d) { if (src.part_size(d)) tail[d] = src[src.part_start(d + 1) - 1]; } } // Scan partitions separately. for(unsigned d = 0; d < queue.size(); ++d) { if (src.part_size(d)) { boost::compute::command_queue q( queue[d]() ); boost::compute::buffer sbuf( src(d).raw() ); boost::compute::buffer dbuf( dst(d).raw() ); boost::compute::detail::scan( boost::compute::make_buffer_iterator<T>(sbuf, 0), boost::compute::make_buffer_iterator<T>(sbuf, src.part_size(d)), boost::compute::make_buffer_iterator<T>(dbuf, 0), true, q ); } } // If there are more than one partition, // update all of them except for the first. if (queue.size() > 1) { T sum{}; for(unsigned d = 0; d < tail.size(); ++d) { if (src.part_size(d)) { sum += tail[d]; sum += dst[src.part_start(d + 1) - 1]; // Wrap partition into vector for ease of use: vex::vector<T> part(queue[d + 1], dst(d + 1)); part += sum; } } } }
void sort(vex::vector<T> &x) { auto queue = x.queue_list(); for(unsigned d = 0; d < queue.size(); ++d) { if (x.part_size(d)) { boost::compute::command_queue q( queue[d]() ); boost::compute::buffer buf( x(d).raw() ); boost::compute::sort( boost::compute::make_buffer_iterator<T>(buf, 0), boost::compute::make_buffer_iterator<T>(buf, x.part_size(d)), q ); } } if (queue.size() > 1) { // Get sorted partitions to host side and do multiway merge sort. std::vector<T> src(x.size()), dst(x.size()); vex::copy(x, src); std::vector< typename std::vector<T>::const_iterator > begin(queue.size()); std::vector< typename std::vector<T>::const_iterator > end (queue.size()); for(unsigned d = 0; d < queue.size(); ++d) { begin[d] = src.begin() + x.part_start(d); end [d] = src.begin() + x.part_start(d + 1); } for(auto pos = dst.begin(); pos != dst.end(); ++pos) { int winner = -1; for(unsigned d = 0; d < queue.size(); ++d) { if (begin[d] == end[d]) continue; if (winner < 0 || *begin[d] < *begin[winner]) winner = d; } *pos = *begin[winner]++; } vex::copy(dst, x); } }
void inclusive_scan(const vex::vector<T> &src, vex::vector<T> &dst) { auto queue = src.queue_list(); // Scan partitions separately. for(unsigned d = 0; d < queue.size(); ++d) { if (src.part_size(d)) { boost::compute::command_queue q( queue[d]() ); boost::compute::buffer sbuf( src(d)() ); boost::compute::buffer dbuf( dst(d)() ); boost::compute::detail::scan( boost::compute::make_buffer_iterator<T>(sbuf, 0), boost::compute::make_buffer_iterator<T>(sbuf, src.part_size(d)), boost::compute::make_buffer_iterator<T>(dbuf, 0), false, q ); } } // If there are more than one partition, // update all of them except for the first. if (queue.size() > 1) { std::vector<T> tail(queue.size() - 1, T()); for(unsigned d = 0; d < tail.size(); ++d) { if (src.part_size(d)) tail[d] = dst[src.part_start(d + 1) - 1]; } std::partial_sum(tail.begin(), tail.end(), tail.begin()); for(unsigned d = 1; d < queue.size(); ++d) { if (src.part_size(d)) { // Wrap partition into vector for ease of use: vex::vector<T> part(queue[d], dst(d)); part += tail[d - 1]; } } } }
static V get(const vex::vector<V> &x, const vex::vector<V> &y) { vex::Reductor<V, vex::SUM_Kahan> sum( x.queue_list() ); return sum(x * y); }
static void resize( vex::vector< T > &x1 , const vex::vector< T > &x2 ) { x1.resize( x2.queue_list() , x2.size() ); }
decltype(T1() * T2()) inner_prod(const vex::vector<T1> &v1, const vex::vector<T2> &v2) { vex::Reductor<decltype(T1() * T2()), vex::SUM> sum(v1.queue_list()); return sum(v1 * v2); }
T operator()( const vex::vector<T> &x ) const { auto max = detail::vexcl_reductor<T>(x.queue_list()); return max( fabs(x) ); }
T operator()( const vex::vector<T> &x ) const { const auto &max = vex::get_reductor<T, vex::MAX>(x.queue_list()); return max( fabs(x) ); }