void sort(Master&                   master,               //!< master object
              const Assigner&           assigner,             //!< assigner object
              std::vector<T> Block::*   values,               //!< all values to sort
              std::vector<T> Block::*   samples,              //!< (output) boundaries of blocks
              size_t                    num_samples,          //!< desired number of samples
              const Cmp&                cmp,                  //!< comparison function
              int                       k   = 2,              //!< k-ary reduction will be used
              bool                      samples_only = false) //!< false: results will be all_to_all exchanged; true: only sort but don't exchange results
    {
        bool immediate = master.immediate();
        master.set_immediate(false);

        // NB: although sorter will go out of scope, its member functions sample()
        //     and exchange() will return functors whose copies get saved inside reduce
        detail::SampleSort<Block,T,Cmp> sorter(values, samples, cmp, num_samples);

        // swap-reduce to all-gather samples
        RegularDecomposer<DiscreteBounds> decomposer(1, interval(0,assigner.nblocks()), assigner.nblocks());
        RegularSwapPartners   partners(decomposer, k);
        reduce(master, assigner, partners, sorter.sample(), detail::SkipIntermediate(partners.rounds()));

        // all_to_all to exchange the values
        if (!samples_only)
            all_to_all(master, assigner, sorter.exchange(), k);

        master.set_immediate(immediate);
    }