std::vector<Regular<T>> mpi_reduce(std::vector<T> const &a, communicator c, int root, bool all, MPI_Op op, std::false_type) { int s = a.size(); std::vector<Regular<T>> lhs; lhs.reserve(s); for (auto i = 0; i < s; ++i) lhs.push_back(mpi_reduce(a[i], c, root, all, op)); return lhs; }
typename traits::GetValueType<Src>::ValueType operator()(Functor func, Src src, uint32_t n) { typedef typename traits::GetValueType<Src>::ValueType Type; Type localResult = reduce(func, src, n); Type globalResult; mpi_reduce(func, &globalResult, &localResult, 1); return globalResult; }
int MPI_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype type, MPI_Op op, int root, MPI_Comm comm) { if (!INITIALIZED) { exit(1); } char input[128]; char output[8]; int size = count * mpi_sizeof(type); if (MPI_RANK != root) { sprintf(input, "MPI_Reduce\ncount=%d\nsource=%d\ndest=%d\ncomm=%d\ntype=%d\n\n", size, root, root, comm, type); mpi2bsp(input, sendbuf, size, output, 8, FALSE); } else { if (sendbuf == recvbuf) { exit(1); } else if (sendbuf != MPI_IN_PLACE) { memcpy(recvbuf, sendbuf, size); } sprintf(input, "MPI_Reduce\nrcount=%d\nsource=%d\ncomm=%d\ntype=%d\n\n", size, root, comm, type); int sockfd = get_connection(POOL, "127.0.0.1", LOCAL_PORT); if (sockfd < 0) { printf("Failed to open a socket to the parent process\n"); exit(1); } send(sockfd, input, strlen(input), 0); int i; void* temp = malloc(size); for (i = 0; i < MPI_SIZE - 1; i++) { int bytes_read = 0; while (bytes_read < size) { int ret = recv(sockfd, temp + bytes_read, size - bytes_read, 0); if (ret <= 0) { exit(1); } else { bytes_read += ret; } } mpi_reduce(recvbuf, temp, count, type, op); } release_connection(POOL, "127.0.0.1", LOCAL_PORT, sockfd); free(temp); } return 0; }
TEST(Gfs, MPI_multivar) { mpi::communicator world; int nw = 2, nbw = 10; double beta = 10; clef::placeholder<0> k_; clef::placeholder<1> q_; clef::placeholder<2> r_; clef::placeholder<3> iw_; clef::placeholder<4> inu_; clef::placeholder<5> inup_; auto g = gf3_s{{{beta, Boson, nbw}, {beta, Fermion, nw}, {beta, Fermion, nw}}}; g(iw_, inu_, inup_) << inu_ + 10 * inup_ + 100 * iw_; auto g2 = g; g2 = mpi_reduce(g, world); if (world.rank() == 0) EXPECT_ARRAY_NEAR(g2.data(), g.data() * world.size()); mpi_broadcast(g2, world); if (world.rank() == 1) EXPECT_ARRAY_NEAR(g2.data(), g.data()* world.size()); gf3_s g3 = mpi_all_reduce(g, world); EXPECT_ARRAY_NEAR(g3.data(), g.data() * world.size()); gf3_s g4 = mpi_scatter(g); g2(iw_, inu_, inup_) << g2(iw_, inu_, inup_) * (1 + world.rank()); g4 = mpi_gather(g2); // Test the result ? auto G = make_block_gf<cartesian_product<imfreq, imfreq, imfreq>, scalar_valued>({g}); auto g0 = gf<imfreq, scalar_valued>{{beta, Boson, nbw}}; auto G2 = make_block_gf<imfreq, scalar_valued>({g0}); mpi_broadcast(G, world); mpi_broadcast(G2, world); }
template <typename T> std::vector<T> mpi_gather(std::vector<T> const &a, communicator c, int root, bool all, std::true_type) { long size = mpi_reduce(a.size(), c, root, all); std::vector<T> b((all || (c.rank() == root) ? size : 0)); auto recvcounts = std::vector<int>(c.size()); auto displs = std::vector<int>(c.size() + 1, 0); int sendcount = a.size(); auto mpi_ty = mpi::mpi_datatype<int>(); if (!all) MPI_Gather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, root, c.get()); else MPI_Allgather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, c.get()); for (int r = 0; r < c.size(); ++r) displs[r + 1] = recvcounts[r] + displs[r]; if (!all) MPI_Gatherv((void *)a.data(), sendcount, mpi_datatype<T>(), (void *)b.data(), &recvcounts[0], &displs[0], mpi_datatype<T>(), root, c.get()); else MPI_Allgatherv((void *)a.data(), sendcount, mpi_datatype<T>(), (void *)b.data(), &recvcounts[0], &displs[0], mpi_datatype<T>(), c.get()); return b; }
template<typename KernelType> configuration som_core::accumulate(KernelType const& kern, typename KernelType::result_type rhs_, typename KernelType::result_type error_bars_, double norm, histogram & hist, std::function<bool()> const& stop_callback, int F) { if(params.verbosity >= 1) std::cout << "Accumulating particular solutions ..." << std::endl; objective_function<KernelType> of(kern, rhs_, error_bars_); solution_worker<KernelType> worker(of,norm,ci,params,stop_callback,F); auto & rng = worker.get_rng(); // Pairs (configuration, objective function) std::vector<std::pair<configuration,double>> solutions; int n_sol_max = 0; // Number of solutions to be accumulated int n_sol, i = 0; // Global and rank-local indices of solution int n_good_solutions, n_verygood_solutions; // Number of good and very good solutions double objf_min = HUGE_VAL; // Minimal value of D do { if(params.adjust_l) { n_sol_max += params.adjust_l_range.first; if(n_sol_max > params.adjust_l_range.second) { if(params.verbosity >= 1) warning("Upper bound of adjust_l_range has been reached"); break; } } else n_sol_max = params.l; solutions.reserve(n_sol_max); if(params.verbosity >= 1) std::cout << "Increasing the total number of solutions to be accumulated to " << n_sol_max << std::endl; for(; (n_sol = comm.rank() + i*comm.size()) < n_sol_max; ++i) { if(params.verbosity >= 2) { std::cout << "[Node " << comm.rank() << "] Accumulation of particular solution " << n_sol << std::endl; } solutions.emplace_back(worker(1 + rng(params.max_rects)), 0); double D = worker.get_objf_value(); solutions.back().second = D; objf_min = std::min(objf_min, D); if(params.verbosity >= 2) { std::cout << "[Node " << comm.rank() << "] Solution " << n_sol << ": D = " << D << std::endl; } } comm.barrier(); // Global minimum of D_min objf_min = mpi_all_reduce(objf_min, comm, 0, MPI_MIN); // Recalculate numbers of good and very good solutions n_good_solutions = n_verygood_solutions = 0; for(auto const& s : solutions) { if(s.second/objf_min <= params.adjust_l_good_d) ++n_good_solutions; if(s.second/objf_min <= params.adjust_l_verygood_d) ++n_verygood_solutions; } n_good_solutions = mpi_all_reduce(n_good_solutions); n_verygood_solutions = mpi_all_reduce(n_verygood_solutions); if(params.verbosity >= 1) { std::cout << "D_min = " << objf_min << std::endl; std::cout << "Number of good solutions (D/D_min <= " << params.adjust_l_good_d << ") = " << n_good_solutions << std::endl; std::cout << "Number of very good solutions (D/D_min <= " << params.adjust_l_verygood_d << ") = " << n_verygood_solutions << std::endl; } } while(params.adjust_l && double(n_verygood_solutions) / double(n_good_solutions) < params.adjust_l_ratio); comm.barrier(); if(params.verbosity >= 1) { std::cout << "Accumulation complete." << std::endl; std::cout << "Summing up good solutions ..." << std::endl; } if(params.make_histograms) hist = histogram(objf_min, objf_min * params.hist_max, params.hist_n_bins); configuration sol_sum(ci); // Rank-local stage of summation for(auto const& s : solutions) { if(params.make_histograms) hist << s.second; // Pick only good solutions if(s.second/objf_min <= params.adjust_l_good_d) sol_sum += s.first; } sol_sum *= 1.0/double(n_good_solutions); // Sum over all processes sol_sum = mpi_reduce(sol_sum, comm, 0, true); if(params.make_histograms) hist = mpi_reduce(hist, comm, 0, true); if(params.verbosity >= 1) std::cout << "Done" << std::endl; return sol_sum; }
[[gnu::always_inline]] inline decltype(auto) reduce(T &&x, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { return mpi_reduce(std::forward<T>(x), c, root, all, op); }
std::vector<Regular<T>> mpi_reduce(std::vector<T> const &a, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { return mpi_reduce(a, c, root, all, op, is_basic<T>{}); }