Exemple #1
0
void sort(vex::vector<T> &x) {
    auto queue = x.queue_list();

    for(unsigned d = 0; d < queue.size(); ++d) {
        if (x.part_size(d)) {
            boost::compute::command_queue q( queue[d]() );
            boost::compute::buffer buf( x(d).raw() );

            boost::compute::sort(
                    boost::compute::make_buffer_iterator<T>(buf, 0),
                    boost::compute::make_buffer_iterator<T>(buf, x.part_size(d)),
                    q
                    );
        }
    }

    // If there are multiple queues, merge the results on the CPU
    if (queue.size() > 1) {
        namespace fusion = boost::fusion;

        auto key_vectors  = fusion::vector_tie(x);
        auto host_vectors = detail::merge(key_vectors, vex::less<T>());
        fusion::for_each( detail::make_zip_view(host_vectors, key_vectors), detail::do_copy() );
    }
}
Exemple #2
0
        void apply(const vex::vector<val_t> &x, vex::vector<val_t> &y,
                 val_t alpha = 1, bool append = false) const
        {
            precondition(x.nparts() == 1 && y.nparts() == 1,
                    "Incompatible vectors");

            mul(x(0), y(0), alpha, append);
        }
Exemple #3
0
void exclusive_scan(const vex::vector<T> &src, vex::vector<T> &dst) {
    auto queue = src.queue_list();

    std::vector<T> tail;
    /* If there is more than one partition, we need to take a copy the last
     * element in each partition (except the last) as otherwise information
     * about it is lost.
     *
     * This must be captured here rather than later, in case the input and
     * output alias.
     */
    if (queue.size() > 1) {
        tail.resize(queue.size() - 1);
        for (unsigned d = 0; d < tail.size(); ++d) {
            if (src.part_size(d))
                tail[d] = src[src.part_start(d + 1) - 1];
        }
    }

    // Scan partitions separately.
    for(unsigned d = 0; d < queue.size(); ++d) {
        if (src.part_size(d)) {
            boost::compute::command_queue q( queue[d]() );

            boost::compute::buffer sbuf( src(d).raw() );
            boost::compute::buffer dbuf( dst(d).raw() );

            boost::compute::detail::scan(
                    boost::compute::make_buffer_iterator<T>(sbuf, 0),
                    boost::compute::make_buffer_iterator<T>(sbuf, src.part_size(d)),
                    boost::compute::make_buffer_iterator<T>(dbuf, 0),
                    true, q
                    );
        }
    }

    // If there are more than one partition,
    // update all of them except for the first.
    if (queue.size() > 1) {
        T sum{};

        for(unsigned d = 0; d < tail.size(); ++d) {
            if (src.part_size(d)) {
                sum += tail[d];
                sum += dst[src.part_start(d + 1) - 1];
                // Wrap partition into vector for ease of use:
                vex::vector<T> part(queue[d + 1], dst(d + 1));
                part += sum;
            }
        }
    }
}
Exemple #4
0
void sort(vex::vector<T> &x) {
    auto queue = x.queue_list();

    for(unsigned d = 0; d < queue.size(); ++d) {
        if (x.part_size(d)) {
            boost::compute::command_queue q( queue[d]() );
            boost::compute::buffer buf( x(d).raw() );

            boost::compute::sort(
                    boost::compute::make_buffer_iterator<T>(buf, 0),
                    boost::compute::make_buffer_iterator<T>(buf, x.part_size(d)),
                    q
                    );
        }
    }

    if (queue.size() > 1) {
        // Get sorted partitions to host side and do multiway merge sort.

        std::vector<T> src(x.size()), dst(x.size());
        vex::copy(x, src);

        std::vector< typename std::vector<T>::const_iterator > begin(queue.size());
        std::vector< typename std::vector<T>::const_iterator > end  (queue.size());

        for(unsigned d = 0; d < queue.size(); ++d) {
            begin[d] = src.begin() + x.part_start(d);
            end  [d] = src.begin() + x.part_start(d + 1);
        }


        for(auto pos = dst.begin(); pos != dst.end(); ++pos) {
            int winner = -1;
            for(unsigned d = 0; d < queue.size(); ++d) {
                if (begin[d] == end[d])
                    continue;

                if (winner < 0 || *begin[d] < *begin[winner])
                    winner = d;
            }

            *pos = *begin[winner]++;
        }

        vex::copy(dst, x);
    }
}
Exemple #5
0
void inclusive_scan(const vex::vector<T> &src, vex::vector<T> &dst) {
    auto queue = src.queue_list();

    // Scan partitions separately.
    for(unsigned d = 0; d < queue.size(); ++d) {
        if (src.part_size(d)) {
            boost::compute::command_queue q( queue[d]() );

            boost::compute::buffer sbuf( src(d)() );
            boost::compute::buffer dbuf( dst(d)() );

            boost::compute::detail::scan(
                    boost::compute::make_buffer_iterator<T>(sbuf, 0),
                    boost::compute::make_buffer_iterator<T>(sbuf, src.part_size(d)),
                    boost::compute::make_buffer_iterator<T>(dbuf, 0),
                    false, q
                    );
        }
    }

    // If there are more than one partition,
    // update all of them except for the first.
    if (queue.size() > 1) {
        std::vector<T> tail(queue.size() - 1, T());

        for(unsigned d = 0; d < tail.size(); ++d) {
            if (src.part_size(d))
                tail[d] = dst[src.part_start(d + 1) - 1];
        }

        std::partial_sum(tail.begin(), tail.end(), tail.begin());

        for(unsigned d = 1; d < queue.size(); ++d) {
            if (src.part_size(d)) {
                // Wrap partition into vector for ease of use:
                vex::vector<T> part(queue[d], dst(d));
                part += tail[d - 1];
            }
        }
    }
}
Exemple #6
0
 static V get(const vex::vector<V> &x, const vex::vector<V> &y)
 {
     vex::Reductor<V, vex::SUM_Kahan> sum( x.queue_list() );
     return sum(x * y);
 }
 static bool same_size( const vex::vector< T > &x1 , const vex::vector< T > &x2 )
 {
     return x1.size() == x2.size();
 }
 static void resize( vex::vector< T > &x1 , const vex::vector< T > &x2 )
 {
     x1.resize( x2.queue_list() , x2.size() );
 }
Exemple #9
0
 decltype(T1() * T2())
 inner_prod(const vex::vector<T1> &v1, const vex::vector<T2> &v2) {
     vex::Reductor<decltype(T1() * T2()), vex::SUM> sum(v1.queue_list());
     return sum(v1 * v2);
 }
    T operator()( const vex::vector<T> &x ) const {
        auto max = detail::vexcl_reductor<T>(x.queue_list());

        return max( fabs(x) );
    }
    T operator()( const vex::vector<T> &x ) const {
        const auto &max = vex::get_reductor<T, vex::MAX>(x.queue_list());

        return max( fabs(x) );
    }