static void gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out, int root) { std::vector<int> counts(comm.size()); Collectives<int,void*>::gather(comm, (int) in.size(), counts, root); std::vector<int> offsets(comm.size(), 0); for (unsigned i = 1; i < offsets.size(); ++i) offsets[i] = offsets[i-1] + counts[i-1]; std::vector<T> buffer(offsets.back() + counts.back()); MPI_Gatherv(Datatype::address(const_cast<T&>(in[0])), in.size(), Datatype::datatype(), Datatype::address(buffer[0]), &counts[0], &offsets[0], Datatype::datatype(), root, comm); out.resize(comm.size()); size_t cur = 0; for (unsigned i = 0; i < (unsigned)comm.size(); ++i) { out[i].reserve(counts[i]); for (unsigned j = 0; j < (unsigned)counts[i]; ++j) out[i].push_back(buffer[cur++]); } }
void operator()(const communicator& comm, const config& cfg, T alpha, MatrixA& A, MatrixB& B, T beta, MatrixC& C) { using namespace matrix_constants; len_type m = (Mat == MAT_A ? A.length(0) : Mat == MAT_B ? B.length(0) : C.length(0)); len_type n = (Mat == MAT_A ? A.length(1) : Mat == MAT_B ? B.length(1) : C.length(1)); if (!rscat) { if (comm.master()) { scat_buffer = Pool.allocate<stride_type>(2*m + 2*n); rscat = scat_buffer.get<stride_type>(); } comm.broadcast(rscat); cscat = rscat+m; rbs = cscat+n; cbs = rbs+m; } matrify_and_run<Mat>(*this, comm, cfg, alpha, A, B, beta, C); }
void broadcast_test(const communicator& comm, const T& bc_value, const char* kind, int root = -1) { if (root == -1) { for (root = 0; root < comm.size(); ++root) broadcast_test(comm, bc_value, kind, root); } else { using boost::mpi::broadcast; T value; if (comm.rank() == root) { value = bc_value; std::cout << "Broadcasting " << kind << " from root " << root << "..."; std::cout.flush(); } broadcast(comm, value, root); BOOST_CHECK(value == bc_value); if (comm.rank() == root && value == bc_value) std::cout << "OK." << std::endl; } (comm.barrier)(); }
void gather_test(const communicator& comm, Generator generator, const char* kind, int root = -1) { typedef typename Generator::result_type value_type; value_type value = generator(comm.rank()); if (root == -1) { for (root = 0; root < comm.size(); ++root) gather_test(comm, generator, kind, root); } else { using boost::mpi::gather; std::vector<value_type> values; if (comm.rank() == root) { std::cout << "Gathering " << kind << " from root " << root << "..." << std::endl; } gather(comm, value, values, root); if (comm.rank() == root) { std::vector<value_type> expected_values; for (int p = 0; p < comm.size(); ++p) expected_values.push_back(generator(p)); BOOST_CHECK(values == expected_values); } else { BOOST_CHECK(values.empty()); } } (comm.barrier)(); }
void block_scatter(const communicator& comm, tensor_matrix<T>& A, stride_type* rscat, len_type MB, stride_type* rbs, stride_type* cscat, len_type NB, stride_type* cbs) { len_type m = A.length(0); len_type n = A.length(1); len_type first, last; std::tie(first, last, std::ignore) = comm.distribute_over_threads(m, MB); A.length(0, last-first); A.shift(0, first); A.fill_block_scatter(0, rscat+first, MB, rbs+first/MB); A.shift(0, -first); A.length(0, m); std::tie(first, last, std::ignore) = comm.distribute_over_threads(n, NB); A.length(1, last-first); A.shift(1, first); A.fill_block_scatter(1, cscat+first, NB, cbs+first/NB); A.shift(1, -first); A.length(1, n); comm.barrier(); }
void scatter_test(const communicator& comm, Generator generator, const char* kind, int root = -1) { typedef typename Generator::result_type value_type; if (root == -1) { for (root = 0; root < comm.size(); ++root) scatter_test(comm, generator, kind, root); } else { using boost::mpi::scatter; value_type value; if (comm.rank() == root) { std::vector<value_type> values; for (int p = 0; p < comm.size(); ++p) values.push_back(generator(p)); if (comm.rank() == root) { std::cout << "Scattering " << kind << " from root " << root << "..."; std::cout.flush(); } scatter(comm, values, value, root); } else { scatter(comm, value, root); } BOOST_CHECK(value == generator(comm.rank())); } (comm.barrier)(); }
//--------- static void broadcast(communicator c, A &a, int root) { check_is_contiguous(a); auto sh = a.shape(); MPI_Bcast(&sh[0], sh.size(), mpi_datatype<typename decltype(sh)::value_type>::invoke(), root, c.get()); if (c.rank() != root) a.resize(sh); MPI_Bcast(a.data_start(), a.domain().number_of_elements(), D(), root, c.get()); }
std::vector<T> mpi_reduce(std::vector<T> const &a, communicator c, int root, bool all, MPI_Op op, std::true_type) { std::vector<T> b(a.size()); if (!all) MPI_Reduce((void *)a.data(), b.data(), a.size(), mpi_datatype<T>(), op, root, c.get()); else MPI_Allreduce((void *)a.data(), b.data(), a.size(), mpi_datatype<T>(), op, c.get()); return b; }
template <typename T> REQUIRES_IS_BASIC(T, T) mpi_reduce(T a, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { T b; auto d = datatype<T>(); if (!all) MPI_Reduce(&a, &b, 1, d, op, root, c.get()); else MPI_Allreduce(&a, &b, 1, d, op, c.get()); return b; }
void gather(const communicator& comm, const T* in_values, int n, std::vector<T>& out_values, int root) { if (comm.rank() == root) { out_values.resize(comm.size() * n); ::boost::mpi::gather(comm, in_values, n, &out_values[0], root); } else ::boost::mpi::gather(comm, in_values, n, root); }
void gather_impl(const communicator& comm, const T* in_values, int n, T* out_values, int root, mpl::false_) { int tag = environment::collectives_tag(); int size = comm.size(); for (int src = 0; src < size; ++src) { if (src == root) std::copy(in_values, in_values + n, out_values + n * src); else comm.recv(src, tag, out_values + n * src, n); } }
object gather(const communicator& comm, object value, int root) { if (comm.rank() == root) { std::vector<object> values; boost::mpi::gather(comm, value, values, root); boost::python::list l; for (int i = 0; i < comm.size(); ++i) l.append(values[i]); return boost::python::tuple(l); } else { boost::mpi::gather(comm, value, root); return object(); } }
object scatter(const communicator& comm, object values, int root) { object result; if (comm.rank() == root) { std::vector<object> values_vec(comm.size()); object iterator = object(handle<>(PyObject_GetIter(values.ptr()))); for (int i = 0; i < comm.size(); ++i) values_vec[i] = object(handle<>(PyIter_Next(iterator.ptr()))); boost::mpi::scatter(comm, values_vec, result, root); } else { boost::mpi::scatter(comm, result, root); } return result; }
void gather_impl(const communicator& comm, const T* in_values, int n, int root, mpl::false_) { int tag = environment::collectives_tag(); comm.send(root, tag, in_values, n); }
int espresso::esutil::Collectives::locateItem(bool here, int controller, communicator world) { int node = here ? world.rank() : UniqueReduce::NotHere; if (world.rank() != controller) { reduce(world, node, UniqueReduce(), controller); return None; } else { int owner; reduce(world, node, owner, UniqueReduce(), controller); if (owner == UniqueReduce::Duplicate) { throw DuplicateError(); } return owner; } }
object all_to_all(const communicator& comm, object in_values) { // Build input values std::vector<object> in_values_vec(comm.size()); object iterator = object(handle<>(PyObject_GetIter(in_values.ptr()))); for (int i = 0; i < comm.size(); ++i) in_values_vec[i] = object(handle<>(PyIter_Next(iterator.ptr()))); std::vector<object> out_values_vec(comm.size()); boost::mpi::all_to_all(comm, in_values_vec, out_values_vec); boost::python::list l; for (int i = 0; i < comm.size(); ++i) l.append(out_values_vec[i]); return boost::python::tuple(l); }
void scatterv_impl(const communicator& comm, T* out_values, int n, int root, mpl::false_ isnt_mpi_type) { assert(root != comm.rank()); scatterv_impl(comm, (T const*)0, out_values, n, (int const*)0, (int const*)0, root, isnt_mpi_type); }
void all_gather(const communicator& comm, const T& in_val, std::vector<T>& out_vals) { out_vals.resize(comm.size()); MPI_Allgather((void*)&in_val, 1, detail::mpi_type<T>(), &out_vals.front(), 1, detail::mpi_type<T>(), comm); // throw std::logic_error(std::string("all_gather() is not implemented, called for type T=") // +typeid(T).name()); }
object communicator_iprobe(const communicator& comm, int source, int tag) { if (pdalboost::optional<status> result = comm.iprobe(source, tag)) return object(*result); else return object(); }
request_with_value communicator_irecv(const communicator& comm, int source, int tag) { pdalboost::shared_ptr<object> result(new object()); request_with_value req(comm.irecv(source, tag, *result)); req.m_internal_value = result; return req; }
void ring_array_test(const communicator& comm, const T* pass_values, int n, const char* kind, int root = 0) { T* transferred_values = new T[n]; int rank = comm.rank(); int size = comm.size(); if (rank == root) { std::cout << "Passing " << kind << " array around a ring from root " << root << "..."; comm.send((rank + 1) % size, 0, pass_values, n); comm.recv((rank + size - 1) % size, 0, transferred_values, n); bool okay = std::equal(pass_values, pass_values + n, transferred_values); BOOST_CHECK(okay); if (okay) std::cout << " OK." << std::endl; } else { status stat = comm.probe(boost::mpi::any_source, 0); boost::optional<int> num_values = stat.template count<T>(); if (boost::mpi::is_mpi_datatype<T>()) BOOST_CHECK(num_values && *num_values == n); else BOOST_CHECK(!num_values || *num_values == n); comm.recv(stat.source(), 0, transferred_values, n); BOOST_CHECK(std::equal(pass_values, pass_values + n, transferred_values)); comm.send((rank + 1) % size, 0, transferred_values, n); } (comm.barrier)(); delete [] transferred_values; }
void scatterv(const communicator& comm, const T* in_values, const std::vector<int>& sizes, T* out_values, int root) { using detail::c_data; detail::scatterv_impl(comm, in_values, out_values, sizes[comm.rank()], c_data(sizes), (int const*)0, root, is_mpi_datatype<T>()); }
void gather(const communicator& comm, const T& in_value, T* out_values, int root) { if (comm.rank() == root) detail::gather_impl(comm, &in_value, 1, out_values, root, is_mpi_datatype<T>()); else detail::gather_impl(comm, &in_value, 1, root, is_mpi_datatype<T>()); }
void all_reduce_array_test(const communicator& comm, Generator generator, const char* type_kind, Op op, const char* op_kind, typename Generator::result_type init, bool in_place) { typedef typename Generator::result_type value_type; value_type value = generator(comm.rank()); std::vector<value_type> send(10, value); using boost::mpi::all_reduce; using boost::mpi::inplace; if (comm.rank() == 0) { char const* place = in_place ? "in place" : "out of place"; std::cout << "Reducing (" << place << ") array to " << op_kind << " of " << type_kind << "..."; std::cout.flush(); } std::vector<value_type> result; if (in_place) { all_reduce(comm, inplace(&(send[0])), send.size(), op); result.swap(send); } else { std::vector<value_type> recv(10, value_type()); all_reduce(comm, &(send[0]), send.size(), &(recv[0]), op); result.swap(recv); } // Compute expected result std::vector<value_type> generated_values; for (int p = 0; p < comm.size(); ++p) generated_values.push_back(generator(p)); value_type expected_result = std::accumulate(generated_values.begin(), generated_values.end(), init, op); bool got_expected_result = (std::equal_range(result.begin(), result.end(), expected_result) == std::make_pair(result.begin(), result.end())); BOOST_CHECK(got_expected_result); if (got_expected_result && comm.rank() == 0) std::cout << "OK." << std::endl; (comm.barrier)(); }
void all_gather_impl(const communicator& comm, const T* in_values, int n, T* out_values, int const* sizes, int const* skips, mpl::false_) { int nproc = comm.size(); // first, gather all size, these size can be different for // each process packed_oarchive oa(comm); for (int i = 0; i < n; ++i) { oa << in_values[i]; } std::vector<int> oasizes(nproc); int oasize = oa.size(); BOOST_MPI_CHECK_RESULT(MPI_Allgather, (&oasize, 1, MPI_INTEGER, c_data(oasizes), 1, MPI_INTEGER, MPI_Comm(comm))); // Gather the archives, which can be of different sizes, so // we need to use allgatherv. // Every thing is contiguous, so the offsets can be // deduced from the collected sizes. std::vector<int> offsets(nproc); sizes2offsets(oasizes, offsets); packed_iarchive::buffer_type recv_buffer(std::accumulate(oasizes.begin(), oasizes.end(), 0)); BOOST_MPI_CHECK_RESULT(MPI_Allgatherv, (const_cast<void*>(oa.address()), int(oa.size()), MPI_BYTE, c_data(recv_buffer), c_data(oasizes), c_data(offsets), MPI_BYTE, MPI_Comm(comm))); for (int src = 0; src < nproc; ++src) { int nb = sizes ? sizes[src] : n; int skip = skips ? skips[src] : 0; std::advance(out_values, skip); if (src == comm.rank()) { // this is our local data for (int i = 0; i < nb; ++i) { *out_values++ = *in_values++; } } else { packed_iarchive ia(comm, recv_buffer, boost::archive::no_header, offsets[src]); for (int i = 0; i < nb; ++i) { ia >> *out_values++; } } } }
void upper_lower_scan(const communicator& comm, const T* in_values, int n, T* out_values, Op& op, int lower, int upper) { int tag = environment::collectives_tag(); int rank = comm.rank(); if (lower + 1 == upper) { std::copy(in_values, in_values + n, out_values); } else { int middle = (lower + upper) / 2; if (rank < middle) { // Lower half upper_lower_scan(comm, in_values, n, out_values, op, lower, middle); // If we're the last process in the lower half, send our values // to everyone in the upper half. if (rank == middle - 1) { packed_oarchive oa(comm); for (int i = 0; i < n; ++i) oa << out_values[i]; for (int p = middle; p < upper; ++p) comm.send(p, tag, oa); } } else { // Upper half upper_lower_scan(comm, in_values, n, out_values, op, middle, upper); // Receive value from the last process in the lower half. packed_iarchive ia(comm); comm.recv(middle - 1, tag, ia); // Combine value that came from the left with our value T left_value; for (int i = 0; i < n; ++i) { ia >> left_value; out_values[i] = op(left_value, out_values[i]); } } } }
object all_gather(const communicator& comm, object value) { std::vector<object> values; boost::mpi::all_gather(comm, value, values); boost::python::list l; for (int i = 0; i < comm.size(); ++i) l.append(values[i]); return boost::python::tuple(l); }
object reduce(const communicator& comm, object value, object op, int root) { if (comm.rank() == root) { object out_value; boost::mpi::reduce(comm, value, out_value, op, root); return out_value; } else { boost::mpi::reduce(comm, value, op, root); return object(); } }
void scatterv_impl(const communicator& comm, const T* in_values, T* out_values, int out_size, int const* sizes, int const* displs, int root, mpl::false_) { packed_oarchive::buffer_type sendbuf; bool is_root = comm.rank() == root; int nproc = comm.size(); std::vector<int> archsizes; if (is_root) { assert(out_size == sizes[comm.rank()]); archsizes.resize(nproc); std::vector<int> skipped; if (displs) { skipped.resize(nproc); offsets2skipped(sizes, displs, c_data(skipped), nproc); displs = c_data(skipped); } fill_scatter_sendbuf(comm, in_values, sizes, (int const*)0, sendbuf, archsizes); } dispatch_scatter_sendbuf(comm, sendbuf, archsizes, (T const*)0, out_values, out_size, root); }
static void broadcast(const communicator& comm, std::vector<T>& x, int root) { size_t sz = x.size(); Collectives<size_t, void*>::broadcast(comm, sz, root); if (comm.rank() != root) x.resize(sz); MPI_Bcast(Datatype::address(x[0]), x.size(), Datatype::datatype(), root, comm); }