void all_gather_impl(const communicator& comm, const T* in_values, int n, T* out_values, int const* sizes, int const* skips, mpl::false_) { int nproc = comm.size(); // first, gather all size, these size can be different for // each process packed_oarchive oa(comm); for (int i = 0; i < n; ++i) { oa << in_values[i]; } std::vector<int> oasizes(nproc); int oasize = oa.size(); BOOST_MPI_CHECK_RESULT(MPI_Allgather, (&oasize, 1, MPI_INTEGER, c_data(oasizes), 1, MPI_INTEGER, MPI_Comm(comm))); // Gather the archives, which can be of different sizes, so // we need to use allgatherv. // Every thing is contiguous, so the offsets can be // deduced from the collected sizes. std::vector<int> offsets(nproc); sizes2offsets(oasizes, offsets); packed_iarchive::buffer_type recv_buffer(std::accumulate(oasizes.begin(), oasizes.end(), 0)); BOOST_MPI_CHECK_RESULT(MPI_Allgatherv, (const_cast<void*>(oa.address()), int(oa.size()), MPI_BYTE, c_data(recv_buffer), c_data(oasizes), c_data(offsets), MPI_BYTE, MPI_Comm(comm))); for (int src = 0; src < nproc; ++src) { int nb = sizes ? sizes[src] : n; int skip = skips ? skips[src] : 0; std::advance(out_values, skip); if (src == comm.rank()) { // this is our local data for (int i = 0; i < nb; ++i) { *out_values++ = *in_values++; } } else { packed_iarchive ia(comm, recv_buffer, boost::archive::no_header, offsets[src]); for (int i = 0; i < nb; ++i) { ia >> *out_values++; } } } }
void upper_lower_scan(const communicator& comm, const T* in_values, int n, T* out_values, Op& op, int lower, int upper) { int tag = environment::collectives_tag(); int rank = comm.rank(); if (lower + 1 == upper) { std::copy(in_values, in_values + n, out_values); } else { int middle = (lower + upper) / 2; if (rank < middle) { // Lower half upper_lower_scan(comm, in_values, n, out_values, op, lower, middle); // If we're the last process in the lower half, send our values // to everyone in the upper half. if (rank == middle - 1) { packed_oarchive oa(comm); for (int i = 0; i < n; ++i) oa << out_values[i]; for (int p = middle; p < upper; ++p) comm.send(p, tag, oa); } } else { // Upper half upper_lower_scan(comm, in_values, n, out_values, op, middle, upper); // Receive value from the last process in the lower half. packed_iarchive ia(comm); comm.recv(middle - 1, tag, ia); // Combine value that came from the left with our value T left_value; for (int i = 0; i < n; ++i) { ia >> left_value; out_values[i] = op(left_value, out_values[i]); } } } }
void tree_reduce_impl(const communicator& comm, const T* in_values, int n, T* out_values, Op op, int root, mpl::true_ /*is_commutative*/) { std::copy(in_values, in_values + n, out_values); int size = comm.size(); int rank = comm.rank(); // The computation tree we will use. detail::computation_tree tree(rank, size, root); int tag = environment::collectives_tag(); MPI_Status status; int children = 0; for (int child = tree.child_begin(); children < tree.branching_factor() && child != root; ++children, child = (child + 1) % size) { // Receive archive packed_iarchive ia(comm); detail::packed_archive_recv(comm, child, tag, ia, status); T incoming; for (int i = 0; i < n; ++i) { ia >> incoming; out_values[i] = op(out_values[i], incoming); } } // For non-roots, send the result to the parent. if (tree.parent() != rank) { packed_oarchive oa(comm); for (int i = 0; i < n; ++i) oa << out_values[i]; detail::packed_archive_send(comm, tree.parent(), tag, oa); } }
void fireParameterChanged(const ParameterList& pl) { double delta = getParameterValue("delta"); double tau = getParameterValue("tau"); double lambda = getParameterValue("lambda"); //double sigma = getParameterValue("sigma"); model_pointer->model->set_model_parameter("delta",delta); model_pointer->model->set_model_parameter("tau",tau); model_pointer->model->set_model_parameter("lambda",lambda); //model_pointer->model->set_model_parameter("Delta_bar",sigma*1e6); //model_pointer->model->set_model_parameter("Lambda_bar",sigma*1e6); //model_pointer->calculate_EGb(); double y=-(model_pointer->calculate_pun()); model_pointer->gather_counts(); if (world.rank()==0) { cout <<endl<< "delta=" << delta << "\t tau=" << tau << "\t lambda=" << lambda << "\t ll=" << -y <<endl; model_pointer->print_branch_counts(); }; fval_ = y; }
template <typename T> std::vector<T> mpi_gather(std::vector<T> const &a, communicator c, int root, bool all, std::true_type) { long size = mpi_reduce(a.size(), c, root, all); std::vector<T> b((all || (c.rank() == root) ? size : 0)); auto recvcounts = std::vector<int>(c.size()); auto displs = std::vector<int>(c.size() + 1, 0); int sendcount = a.size(); auto mpi_ty = mpi::mpi_datatype<int>(); if (!all) MPI_Gather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, root, c.get()); else MPI_Allgather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, c.get()); for (int r = 0; r < c.size(); ++r) displs[r + 1] = recvcounts[r] + displs[r]; if (!all) MPI_Gatherv((void *)a.data(), sendcount, mpi_datatype<T>(), (void *)b.data(), &recvcounts[0], &displs[0], mpi_datatype<T>(), root, c.get()); else MPI_Allgatherv((void *)a.data(), sendcount, mpi_datatype<T>(), (void *)b.data(), &recvcounts[0], &displs[0], mpi_datatype<T>(), c.get()); return b; }
void ring_test(const communicator& comm, const T& pass_value, const char* kind, int root = 0) { T transferred_value; int rank = comm.rank(); int size = comm.size(); if (rank == root) { std::cout << "Passing " << kind << " around a ring from root " << root << "..."; comm.send((rank + 1) % size, 0, pass_value); comm.recv((rank + size - 1) % size, 0, transferred_value); BOOST_CHECK(transferred_value == pass_value); if (transferred_value == pass_value) std::cout << " OK." << std::endl; } else { comm.recv((rank + size - 1) % size, 0, transferred_value); BOOST_CHECK(transferred_value == pass_value); comm.send((rank + 1) % size, 0, transferred_value); } (comm.barrier)(); }
void test_skeleton_and_content(const communicator& comm, int root = 0) { using boost::mpi::content; using boost::mpi::get_content; using boost::make_counting_iterator; using boost::mpi::broadcast; typedef std::list<int>::iterator iterator; int list_size = comm.size() + 7; if (comm.rank() == root) { // Fill in the seed data std::list<int> original_list; for (int i = 0; i < list_size; ++i) original_list.push_back(i); // Build up the skeleton packed_skeleton_oarchive oa(comm); oa << original_list; // Broadcast the skeleton std::cout << "Broadcasting integer list skeleton from root " << root << "..."; broadcast(comm, oa, root); std::cout << "OK." << std::endl; // Broadcast the content std::cout << "Broadcasting integer list content from root " << root << "..."; { content c = get_content(original_list); broadcast(comm, c, root); } std::cout << "OK." << std::endl; // Reverse the list, broadcast the content again std::reverse(original_list.begin(), original_list.end()); std::cout << "Broadcasting reversed integer list content from root " << root << "..."; { content c = get_content(original_list); broadcast(comm, c, root); } std::cout << "OK." << std::endl; } else { // Allocate some useless data, to try to get the addresses of the // list<int>'s used later to be different across processes. std::list<int> junk_list(comm.rank() * 3 + 1, 17); // Receive the skeleton packed_skeleton_iarchive ia(comm); broadcast(comm, ia, root); // Build up a list to match the skeleton, and make sure it has the // right structure (we have no idea what the data will be). std::list<int> transferred_list; ia >> transferred_list; BOOST_CHECK((int)transferred_list.size() == list_size); // Receive the content and check it broadcast(comm, get_content(transferred_list), root); BOOST_CHECK(std::equal(make_counting_iterator(0), make_counting_iterator(list_size), transferred_list.begin())); // Receive the reversed content and check it broadcast(comm, get_content(transferred_list), root); BOOST_CHECK(std::equal(make_counting_iterator(0), make_counting_iterator(list_size), transferred_list.rbegin())); } (comm.barrier)(); }
void nonblocking_test(const communicator& comm, const T* values, int num_values, const char* kind, method_kind method = mk_all) { using boost::mpi::wait_any; using boost::mpi::test_any; using boost::mpi::wait_all; using boost::mpi::test_all; using boost::mpi::wait_some; using boost::mpi::test_some; if (method == mk_all || method == mk_all_except_test_all) { nonblocking_test(comm, values, num_values, kind, mk_wait_any); nonblocking_test(comm, values, num_values, kind, mk_test_any); nonblocking_test(comm, values, num_values, kind, mk_wait_all); nonblocking_test(comm, values, num_values, kind, mk_wait_all_keep); if (method == mk_all) { nonblocking_test(comm, values, num_values, kind, mk_test_all); nonblocking_test(comm, values, num_values, kind, mk_test_all_keep); } nonblocking_test(comm, values, num_values, kind, mk_wait_some); nonblocking_test(comm, values, num_values, kind, mk_wait_some_keep); nonblocking_test(comm, values, num_values, kind, mk_test_some); nonblocking_test(comm, values, num_values, kind, mk_test_some_keep); } else { if (comm.rank() == 0) { std::cout << "Testing " << method_kind_names[method] << " with " << kind << "..."; std::cout.flush(); } typedef std::pair<status, std::vector<request>::iterator> status_iterator_pair; T incoming_value; std::vector<T> incoming_values(num_values); std::vector<request> reqs; // Send/receive the first value reqs.push_back(comm.isend((comm.rank() + 1) % comm.size(), 0, values[0])); reqs.push_back(comm.irecv((comm.rank() + comm.size() - 1) % comm.size(), 0, incoming_value)); if (method != mk_wait_any && method != mk_test_any) { #ifndef LAM_MPI // We've run into problems here (with 0-length messages) with // LAM/MPI on Mac OS X and x86-86 Linux. Will investigate // further at a later time, but the problem only seems to occur // when using shared memory, not TCP. // Send/receive an empty message reqs.push_back(comm.isend((comm.rank() + 1) % comm.size(), 1)); reqs.push_back(comm.irecv((comm.rank() + comm.size() - 1) % comm.size(), 1)); #endif // Send/receive an array reqs.push_back(comm.isend((comm.rank() + 1) % comm.size(), 2, values, num_values)); reqs.push_back(comm.irecv((comm.rank() + comm.size() - 1) % comm.size(), 2, &incoming_values.front(), num_values)); } switch (method) { case mk_wait_any: if (wait_any(reqs.begin(), reqs.end()).second == reqs.begin()) reqs[1].wait(); else reqs[0].wait(); break; case mk_test_any: { boost::optional<status_iterator_pair> result; do { result = test_any(reqs.begin(), reqs.end()); } while (!result); if (result->second == reqs.begin()) reqs[1].wait(); else reqs[0].wait(); break; } case mk_wait_all: wait_all(reqs.begin(), reqs.end()); break; case mk_wait_all_keep: { std::vector<status> stats; wait_all(reqs.begin(), reqs.end(), std::back_inserter(stats)); } break; case mk_test_all: while (!test_all(reqs.begin(), reqs.end())) { /* Busy wait */ } break; case mk_test_all_keep: { std::vector<status> stats; while (!test_all(reqs.begin(), reqs.end(), std::back_inserter(stats))) /* Busy wait */; } break; case mk_wait_some: { std::vector<request>::iterator pos = reqs.end(); do { pos = wait_some(reqs.begin(), pos); } while (pos != reqs.begin()); } break; case mk_wait_some_keep: { std::vector<status> stats; std::vector<request>::iterator pos = reqs.end(); do { pos = wait_some(reqs.begin(), pos, std::back_inserter(stats)).second; } while (pos != reqs.begin()); } break; case mk_test_some: { std::vector<request>::iterator pos = reqs.end(); do { pos = test_some(reqs.begin(), pos); } while (pos != reqs.begin()); } break; case mk_test_some_keep: { std::vector<status> stats; std::vector<request>::iterator pos = reqs.end(); do { pos = test_some(reqs.begin(), pos, std::back_inserter(stats)).second; } while (pos != reqs.begin()); } break; default: BOOST_CHECK(false); } if (comm.rank() == 0) { bool okay = true; if (!((incoming_value == values[0]))) okay = false; if (method != mk_wait_any && method != mk_test_any && !std::equal(incoming_values.begin(), incoming_values.end(), values)) okay = false; if (okay) std::cout << "OK." << std::endl; else std::cerr << "ERROR!" << std::endl; } BOOST_CHECK(incoming_value == values[0]); if (method != mk_wait_any && method != mk_test_any) BOOST_CHECK(std::equal(incoming_values.begin(), incoming_values.end(), values)); } }
void scatterv(const communicator& comm, T* out_values, int out_size, int root) { BOOST_ASSERT(comm.rank() != root); detail::scatterv_impl(comm, out_values, out_size, root, is_mpi_datatype<T>()); }
/** * Function to chunk a range, distributing it uniformly over all MPI ranks. * * @tparam T The type of the range * * @param range The range to chunk * @param comm The mpi communicator */ template <typename T> auto chunk(T &&range, communicator comm = {}) { auto total_size = std::distance(std::cbegin(range), std::cend(range)); auto [start_idx, end_idx] = itertools::chunk_range(0, total_size, comm.size(), comm.rank()); return itertools::slice(std::forward<T>(range), start_idx, end_idx); }
void gather(const communicator& comm, const T& in_value, int root) { BOOST_ASSERT(comm.rank() != root); detail::gather_impl(comm, &in_value, 1, root, is_mpi_datatype<T>()); }
void mpi_reduce_in_place(std::vector<T> &a, communicator c, int root, bool all, MPI_Op op, std::true_type) { if (!all) MPI_Reduce((c.rank() == root ? MPI_IN_PLACE : a.data()), a.data(), a.size(), mpi_datatype<T>(), op, root, c.get()); else MPI_Allreduce(MPI_IN_PLACE, a.data(), a.size(), mpi_datatype<T>(), op, c.get()); }
template <typename T> void mpi_broadcast(std::vector<T> &v, communicator c, int root, std::false_type) { size_t s = v.size(); mpi_broadcast(s, c, root); if (c.rank() != root) v.resize(s); for (auto &x : v) mpi_broadcast(x, c, root); }
template <typename T> void mpi_broadcast(std::vector<T> &a, communicator c, int root, std::true_type) { size_t s = a.size(); mpi_broadcast(s, c, root); if (c.rank() != root) a.resize(s); MPI_Bcast(a.data(), a.size(), mpi_datatype<T>(), root, c.get()); }
void test_skeleton_and_content(const communicator& comm, int root, bool manual_broadcast) { using boost::mpi::skeleton; using boost::mpi::content; using boost::mpi::get_content; using boost::make_counting_iterator; using boost::mpi::broadcast; int list_size = comm.size() + 7; if (comm.rank() == root) { // Fill in the seed data std::list<int> original_list; for (int i = 0; i < list_size; ++i) original_list.push_back(i); std::cout << "Broadcasting integer list skeleton from root " << root << "..."; if (manual_broadcast) { // Broadcast the skeleton (manually) for (int p = 0; p < comm.size(); ++p) if (p != root) comm.send(p, 0, skeleton(original_list)); } else { broadcast(comm, skeleton(original_list), root); } std::cout << "OK." << std::endl; // Broadcast the content (manually) std::cout << "Broadcasting integer list content from root " << root << "..."; { content c = get_content(original_list); for (int p = 0; p < comm.size(); ++p) if (p != root) comm.send(p, 1, c); } std::cout << "OK." << std::endl; // Reverse the list, broadcast the content again std::reverse(original_list.begin(), original_list.end()); std::cout << "Broadcasting reversed integer list content from root " << root << "..."; { content c = get_content(original_list); for (int p = 0; p < comm.size(); ++p) if (p != root) comm.send(p, 2, c); } std::cout << "OK." << std::endl; } else { // Allocate some useless data, to try to get the addresses of the // list<int>'s used later to be different across processes. std::list<int> junk_list(comm.rank() * 3 + 1, 17); // Receive the skeleton to build up the transferred list std::list<int> transferred_list; if (manual_broadcast) { comm.recv(root, 0, skeleton(transferred_list)); } else { broadcast(comm, skeleton(transferred_list), root); } BOOST_CHECK((int)transferred_list.size() == list_size); // Receive the content and check it comm.recv(root, 1, get_content(transferred_list)); BOOST_CHECK(std::equal(make_counting_iterator(0), make_counting_iterator(list_size), transferred_list.begin())); // Receive the reversed content and check it comm.recv(root, 2, get_content(transferred_list)); BOOST_CHECK(std::equal(make_counting_iterator(0), make_counting_iterator(list_size), transferred_list.rbegin())); } (comm.barrier)(); }
//--------- static void reduce_in_place(communicator c, A &a, int root) { check_is_contiguous(a); // assume arrays have the same size on all nodes... MPI_Reduce((c.rank() == root ? MPI_IN_PLACE : a), a.data_start(), a.domain().number_of_elements(), D(), MPI_SUM, root, c.get()); }
void all_to_all_impl(const communicator& comm, const T* in_values, int n, T* out_values, mpl::false_) { int size = comm.size(); int rank = comm.rank(); // The amount of data to be sent to each process std::vector<int> send_sizes(size); // The displacements for each outgoing value. std::vector<int> send_disps(size); // The buffer that will store all of the outgoing values std::vector<char, allocator<char> > outgoing; // Pack the buffer with all of the outgoing values. for (int dest = 0; dest < size; ++dest) { // Keep track of the displacements send_disps[dest] = outgoing.size(); // Our own value will never be transmitted, so don't pack it. if (dest != rank) { packed_oarchive oa(comm, outgoing); for (int i = 0; i < n; ++i) oa << in_values[dest * n + i]; } // Keep track of the sizes send_sizes[dest] = outgoing.size() - send_disps[dest]; } // Determine how much data each process will receive. std::vector<int> recv_sizes(size); all_to_all(comm, send_sizes, recv_sizes); // Prepare a buffer to receive the incoming data. std::vector<int> recv_disps(size); int sum = 0; for (int src = 0; src < size; ++src) { recv_disps[src] = sum; sum += recv_sizes[src]; } std::vector<char, allocator<char> > incoming(sum > 0? sum : 1); // Make sure we don't try to reference an empty vector if (outgoing.empty()) outgoing.push_back(0); // Transmit the actual data BOOST_MPI_CHECK_RESULT(MPI_Alltoallv, (&outgoing[0], &send_sizes[0], &send_disps[0], MPI_PACKED, &incoming[0], &recv_sizes[0], &recv_disps[0], MPI_PACKED, comm)); // Deserialize data from the iarchive for (int src = 0; src < size; ++src) { if (src == rank) std::copy(in_values + src * n, in_values + (src + 1) * n, out_values + src * n); else { packed_iarchive ia(comm, incoming, boost::archive::no_header, recv_disps[src]); for (int i = 0; i < n; ++i) ia >> out_values[src * n + i]; } } }