예제 #1
0
            static FwdIter call(ExPolicy policy, FwdIter first,
                std::size_t count, F1 && f1, std::size_t chunk_size)
            {
                typedef typename ExPolicy::executor_type executor_type;
                typedef typename hpx::parallel::executor_traits<executor_type>
                    executor_traits;
                typedef typename hpx::util::tuple<FwdIter, std::size_t> tuple;

                FwdIter last = first;
                std::advance(last, count);

                std::vector<hpx::future<Result> > inititems, workitems;
                std::list<boost::exception_ptr> errors;
                std::vector<tuple> shape;

                try {
                    // estimates a chunk size based on number of cores used
                    shape = get_bulk_iteration_shape(policy, inititems, f1,
                        first, count, chunk_size);

                    workitems.reserve(shape.size());

                    using hpx::util::bind;
                    using hpx::util::functional::invoke_fused;
                    using hpx::util::placeholders::_1;
                    workitems = executor_traits::async_execute(
                        policy.executor(),
                        bind(invoke_fused(), std::forward<F1>(f1), _1),
                        shape);
                }
                catch (...) {
                    detail::handle_local_exceptions<ExPolicy>::call(
                        boost::current_exception(), errors);
                }

                // wait for all tasks to finish
                hpx::wait_all(inititems);
                hpx::wait_all(workitems);

                // handle exceptions
                detail::handle_local_exceptions<ExPolicy>::call(
                    inititems, errors);
                detail::handle_local_exceptions<ExPolicy>::call(
                    workitems, errors);

                return last;
            }
예제 #2
0
파일: rotate.hpp 프로젝트: Bcorde5/hpx
        hpx::future<FwdIter>
        rotate_helper(ExPolicy policy, FwdIter first, FwdIter new_first,
            FwdIter last)
        {
            typedef boost::mpl::false_ non_seq;

            parallel_task_execution_policy p =
                par_task(policy.get_chunk_size());

            detail::reverse r;
            return lcos::local::dataflow(
                hpx::util::unwrapped([=]() mutable -> hpx::future<FwdIter>
                {
                    hpx::future<void> f = r.call(p, non_seq(), first, last);
                    std::advance(first, std::distance(new_first, last));
                    return f.then(
                        [first] (hpx::future<void> &&) -> FwdIter
                        {
                            return first;
                        });
                }),
                r.call(p, non_seq(), first, new_first),
                r.call(p, non_seq(), new_first, last));
        }
예제 #3
0
        // requires traits::is_future<Future>
    std::vector<hpx::util::tuple<FwdIter, std::size_t> >
    get_bulk_iteration_shape(
        ExPolicy policy, std::vector<Future>& workitems, F1 && f1,
        FwdIter& first, std::size_t& count, std::size_t chunk_size)
    {
        typedef typename ExPolicy::executor_parameters_type parameters_type;
        typedef executor_parameter_traits<parameters_type> traits;
        typedef hpx::util::tuple<FwdIter, std::size_t> tuple_type;

        typedef typename ExPolicy::executor_type executor_type;
        std::size_t const cores = executor_traits<executor_type>::
            processing_units_count(policy.executor(), policy.parameters());

        bool variable_chunk_sizes = traits::variable_chunk_size(
            policy.parameters(), policy.executor());

        std::vector<tuple_type> shape;

        if (!variable_chunk_sizes || chunk_size != 0)
        {
            if (chunk_size == 0)
            {
                auto test_function =
                    [&]() -> std::size_t
                    {
                        std::size_t test_chunk_size = count / 100;
                        if (test_chunk_size == 0)
                            return 0;

                        add_ready_future(workitems, f1, first, test_chunk_size);

                        std::advance(first, test_chunk_size);
                        count -= test_chunk_size;

                        return test_chunk_size;
                    };

                chunk_size = traits::get_chunk_size(policy.parameters(),
                    policy.executor(), test_function, count);
            }

            if (chunk_size == 0)
                chunk_size = (count + cores - 1) / cores;

            shape.reserve(count / chunk_size + 1);
            while (count != 0)
            {
                std::size_t chunk = (std::min)(chunk_size, count);

                shape.push_back(hpx::util::make_tuple(first, chunk));
                count -= chunk;
                std::advance(first, chunk);
            }
        }
        else
        {
            while (count != 0)
            {
                chunk_size = traits::get_chunk_size(
                    policy.parameters(), policy.executor(),
                    [](){ return 0; }, count);

                if (chunk_size == 0)
                    chunk_size = (count + cores - 1) / cores;

                std::size_t chunk = (std::min)(chunk_size, count);

                shape.push_back(hpx::util::make_tuple(first, chunk));
                count -= chunk;
                std::advance(first, chunk);
            }
        }

        return shape;
    }
예제 #4
0
    typename util::detail::algorithm_result<ExPolicy, OutIter>::type
    set_operation(ExPolicy policy,
        RanIter1 first1, RanIter1 last1, RanIter2 first2, RanIter2 last2,
        OutIter dest, F && f, Combiner && combiner, SetOp && setop)
    {
        typedef typename std::iterator_traits<RanIter1>::difference_type
            difference_type1;
        typedef typename std::iterator_traits<RanIter2>::difference_type
            difference_type2;

        // allocate intermediate buffers
        difference_type1 len1 = std::distance(first1, last1);
        difference_type2 len2 = std::distance(first2, last2);

        typedef typename set_operations_buffer<OutIter>::type buffer_type;
        boost::shared_array<buffer_type> buffer(
            new buffer_type[combiner(len1, len2)]);

        typedef typename ExPolicy::executor_type executor_type;
        std::size_t cores = executor_information_traits<executor_type>::
            processing_units_count(policy.executor(), policy.parameters());

        std::size_t step = (len1 + cores - 1) / cores;
        boost::shared_array<set_chunk_data> chunks(new set_chunk_data[cores]);

        // fill the buffer piecewise
        return parallel::util::partitioner<ExPolicy, OutIter, void>::call(
            policy, chunks.get(), cores,
            // first step, is applied to all partitions
            [=](set_chunk_data* curr_chunk, std::size_t part_size)
            {
                HPX_ASSERT(part_size == 1);

                // find start in sequence 1
                std::size_t start1 = (curr_chunk - chunks.get()) * step;
                std::size_t end1 = (std::min)(start1 + step, std::size_t(len1));

                bool first_partition = (start1 == 0);
                bool last_partition = (end1 == std::size_t(len1));

                // all but the last chunk require special handling
                if (!last_partition)
                {
                    // this chunk will be handled by the next one if all
                    // elements of this partition are equal
                    if (!f(first1[start1], first1[end1 + 1]))
                        return;

                    // move backwards to find earliest element which is equal to
                    // the last element of the current chunk
                    while (end1 != 0 && !f(first1[end1 - 1], first1[end1]))
                        --end1;
                }

                // move backwards to find earliest element which is equal to
                // the first element of the current chunk
                while (start1 != 0 && !f(first1[start1 - 1], first1[start1]))
                    --start1;

                // find start and end in sequence 2
                std::size_t start2 = 0;
                if (!first_partition)
                {
                    start2 =
                        std::lower_bound(
                            first2, first2 + len2, first1[start1], f
                        ) - first2;
                }

                std::size_t end2 = len2;
                if (!last_partition)
                {
                    end2 =
                        std::lower_bound(
                            first2 + start2, first2 + len2, first1[end1], f
                        ) - first2;
                }

                // perform requested set-operation into the proper place of the
                // intermediate buffer
                curr_chunk->start = combiner(start1, start2);
                auto buffer_dest = buffer.get() + curr_chunk->start;
                curr_chunk->len =
                    setop(first1 + start1, first1 + end1,
                          first2 + start2, first2 + end2, buffer_dest, f
                    ) - buffer_dest;
            },
            // second step, is executed after all partitions are done running
            [buffer, chunks, cores, dest](std::vector<future<void> >&&) -> OutIter
            {
                // accumulate real length
                set_chunk_data* chunk = chunks.get();
                chunk->start_index = 0;
                for (size_t i = 1; i != cores; ++i)
                {
                    set_chunk_data* curr_chunk = chunk++;
                    chunk->start_index =
                        curr_chunk->start_index + curr_chunk->len;
                }

                // finally, copy data to destination
                parallel::util::foreach_partitioner<
                        hpx::parallel::parallel_execution_policy
                    >::call(par, chunks.get(), cores,
                        [buffer, dest](
                            set_chunk_data* chunk, std::size_t, std::size_t)
                        {
                            std::copy(buffer.get() + chunk->start,
                                buffer.get() + chunk->start + chunk->len,
                                dest + chunk->start_index);
                        },
                        [](set_chunk_data* last) -> set_chunk_data*
                        {
                            return last;
                        });

                return dest;
            });
    }