BoundaryLocation determineBoundary(mpi::communicator& world) { if(world.rank() == 0 && world.rank() == world.size()-1) return DOUBLE_BDY; else if(world.rank() == 0) return LOWER_BDY; else if(world.rank() == world.size() - 1) return UPPER_BDY; return NO_BDY; }
void product_mpi (mpi::communicator world, real2D* matrix, /* to multiply by */ real1D* vector, /* to be multiplied */ real1D* result, /* result of multiply */ int nr, /* row size */ int nc) /* column size */ { int lo, hi; /* work controls */ int r, c; /* loop indices */ int rank; // work if (get_block_rows_mpi (world, 0, nr, &lo, &hi)) { for (r = lo; r < hi; r ++) { result[r] = matrix[r][0] * vector[0]; for (c = 1; c < nc; c++) { result[r] += matrix[r][c] * vector[c]; } } } // broadcast result for (rank = 0; rank < world.size (); rank++) { if (get_block_rows_mpi (world, 0, nr, &lo, &hi, rank)) { broadcast (world, &result[lo], hi - lo, rank); } } }
/// compute the array domain of the target array domain_type domain() const { auto dims = ref.shape(); long slow_size = first_dim(ref); if (std::is_same<Tag, mpi::tag::scatter>::value) { mpi::mpi_broadcast(slow_size, c, root); dims[0] = mpi::slice_length(slow_size - 1, c.size(), c.rank()); } if (std::is_same<Tag, mpi::tag::gather>::value) { if (!all) { dims[0] = mpi::mpi_reduce(slow_size, c, root); // valid only on root if (c.rank() != root) dims[0] = 1; // valid only on root } else dims[0] = mpi::mpi_all_reduce(slow_size, c, root); // in this case, it is valid on all nodes } // mpi::tag::reduce :do nothing return domain_type{dims}; }
ParallelBFS::ParallelBFS(const mpi::communicator &comm, const NodeList &vertices, const NodeList &edges) : comm(comm) { NodeId part = (NodeId)vertices.size() / comm.size(), left_vertices = (NodeId)vertices.size() % comm.size(), first_vertex = 0, first_edge = 0; NodeList part_vertices((size_t)comm.size()); NodeList first_vertices((size_t)comm.size()); NodeList part_edges((size_t)comm.size()); NodeList first_edges((size_t)comm.size()); NodeList all_description((size_t)(comm.size() << 2)); for (int i = 0; i < comm.size(); ++i) { NodeId this_part = part + (i < left_vertices); NodeId last_edge = first_vertex + this_part == vertices.size() ? (NodeId)edges.size() : vertices[first_vertex + this_part]; all_description[(i<<2)] = (NodeId)vertices.size(); all_description[(i<<2) + 1] = first_vertices[i] = first_vertex; all_description[(i<<2) + 2] = part_vertices[i] = this_part; all_description[(i<<2) + 3] = part_edges[i] = last_edge - first_edge; first_edges[i] = first_edge; first_edge = last_edge; first_vertex += this_part; } NodeList description(4); mpi::scatter(comm, all_description.data(), description.data(), 4, 0); this->vertex_total_count = description[0]; this->first_vertex = description[1]; this->vertices.resize((size_t)description[2]); mpi::scatterv(comm, vertices, part_vertices, first_vertices, this->vertices, 0); this->edges.resize((size_t)description[3]); mpi::scatterv(comm, edges, part_edges, first_edges, this->edges, 0); prepare(); }
void initialize_new_objects(mpi::communicator& world, parameter_t const& P, directory_structure_t const& ds, geometric_info_t const& gi, object_info_t& oi, vector<std::vector<std::string> > const &seq, int tt, vector<CImg<unsigned char> > const& images, vector<matrix<float> > const& grd, vector<matrix<float> >& detected_rects) { int Ncam = seq.size(); vector<object_trj_t> & trlet_list=oi.trlet_list; int nobj = trlet_list.size(); int num_new_obj = detected_rects(0).size1(); int T = seq[0].size(); int np = oi.model.size(); int num_scales = P.scales.size(); //std::cout<<"detected_rects="<<detected_rects<<std::endl; for(int oo=0; oo<num_new_obj; ++oo) { int nn = oi.curr_num_obj + oo; trlet_list(nn).startt = tt; trlet_list(nn).endt = tt; trlet_list(nn).state = 1; trlet_list(nn).trj = vector<matrix<float> >(Ncam); for(int cam=0; cam<Ncam; ++cam) { trlet_list(nn).trj(cam) = scalar_matrix<float>(T, 4, 0); } trlet_list(nn).trj_3d = scalar_matrix<float>(T, 2, 0); trlet_list(nn).hist_p = vector<matrix<float> >(Ncam); trlet_list(nn).hist_q = vector<matrix<float> >(Ncam); trlet_list(nn).fscores = vector<matrix<float> >(Ncam); trlet_list(nn).scores = scalar_matrix<float>(Ncam, T, 0); vector<candidate_array<Float> > cand_array(Ncam); for(int cam=0; cam<Ncam; ++cam) { trlet_list(nn).fscores(cam) = scalar_matrix<float>(np*2, T, 0); float w = detected_rects(cam)(oo, 2)-detected_rects(cam)(oo, 0); float h = detected_rects(cam)(oo, 3)-detected_rects(cam)(oo, 1); row(trlet_list(nn).trj(cam), tt) = row(detected_rects(cam), oo); matrix<float> rects; compute_part_rects(detected_rects(cam)(oo, 0), detected_rects(cam)(oo, 1), w, h, oi.model, rects); pmodel_t pmodel; vector<float> br(row(detected_rects(cam), oo)); rects_to_pmodel_geom(br, gi.horiz_mean, pmodel); oi.pmodel_list(cam, nn) = pmodel; //collect_sift(grd(cam), ); matrix<float> hist_p, hist_q; collect_hist(images(cam), rects, hist_p, hist_q); trlet_list(nn).hist_p(cam) = hist_p; trlet_list(nn).hist_q(cam) = hist_q; matrix<Float> cand_rects; vector<Float> cand_scale; matrix<int> cand_ijs; if(0==world.rank()) { std::vector<float> sxr, syr; for(float v=-P.xrange/2; v<=P.xrange/2; v+=P.xstep) { sxr.push_back(v); } for(float v=-P.yrange/2; v<=P.yrange/2; v+=P.ystep) { syr.push_back(v); } vector<float> xr(sxr.size()), yr(syr.size()); std::copy(sxr.begin(), sxr.end(), xr.begin()); std::copy(syr.begin(), syr.end(), yr.begin()); float feetx = (trlet_list(nn).trj(cam)(tt, 0) +trlet_list(nn).trj(cam)(tt, 2))/2; float feety = trlet_list(nn).trj(cam)(tt, 3); enumerate_rects_inpoly(images(cam), oi.pmodel_list(cam, nn), feetx, feety, xr, yr, P.scales, gi.horiz_mean, gi.horiz_sig, gi.polys_im(tt, cam), cand_rects, cand_scale, cand_ijs, cand_array(cam)); } mpi::broadcast(world, cand_rects, 0); real_timer_t timer; vector<Float> cand_hist_score(cand_rects.size1()); matrix<Float> hist_fscores; range rrank(world.rank()*cand_rects.size1()/world.size(), (world.rank()+1)*cand_rects.size1()/world.size()); matrix<Float> cand_rects_rank(project(cand_rects, rrank, range(0, 4))); vector<Float> cand_hist_score_rank; matrix<Float> hist_fscores_rank; get_cand_hist_score(images(cam), oi.model, P.logp1, P.logp2, trlet_list(nn).hist_p(cam), trlet_list(nn).hist_q(cam), cand_rects_rank, cand_hist_score_rank, hist_fscores_rank); if(world.rank()==0) { std::vector<vector<Float> > v1; std::vector<matrix<Float> > v2; mpi::gather(world, cand_hist_score_rank, v1, 0); mpi::gather(world, hist_fscores_rank, v2, 0); hist_fscores = matrix<Float>(cand_rects.size1(), hist_fscores_rank.size2()); for(int r=0; r<world.size(); ++r) { int start = r*cand_rects.size1()/world.size(); for(int vv=0; vv<v1[r].size(); ++vv) { cand_hist_score(start+vv) = v1[r](vv); } for(int vv=0; vv<v2[r].size1(); ++vv) { row(hist_fscores, start+vv) = row(v2[r], vv); } } } else { mpi::gather(world, cand_hist_score_rank, 0); mpi::gather(world, hist_fscores_rank, 0); } mpi::broadcast(world, cand_hist_score, 0); mpi::broadcast(world, hist_fscores, 0); vector<Float> cand_score=cand_hist_score; if(0==world.rank()) std::cout<<"\t\t"<<cand_rects.size1()<<" rects, \tget_cand_hist_score time:" <<timer.elapsed()/1000.0f<<"s."<<std::endl; if(0==world.rank()) { int idx_max = std::max_element(cand_score.begin(), cand_score.end()) - cand_score.begin(); column(trlet_list(nn).fscores(cam), tt) = row(hist_fscores, idx_max); trlet_list(nn).scores(cam, tt) = cand_score(idx_max); cand_array(cam).fill_score(cand_score, cand_ijs); } mpi::broadcast(world, cand_array(cam), 0); mpi::broadcast(world, trlet_list(nn).scores(cam, tt), 0); vector<Float> fscore_col; if(0==world.rank()) { fscore_col = column(trlet_list(nn).fscores(cam), tt); } mpi::broadcast(world, fscore_col, 0); if(0!=world.rank()) { column(trlet_list(nn).fscores(cam), tt) = fscore_col; } }//end for cam int best_y, best_x, best_s; if(0==world.rank()) { ground_scoremap_t<Float> grd_scoremap; combine_ground_score(tt, cand_array, grd_scoremap, gi); grd_scoremap.peak(best_y, best_x, best_s); } mpi::broadcast(world, best_y, 0); mpi::broadcast(world, best_x, 0); trlet_list(nn).trj_3d(tt, 0) = best_x; trlet_list(nn).trj_3d(tt, 1) = best_y; for(int cam=0; cam<Ncam; ++cam) { vector<Float> trj_row(4); if(0==world.rank()) { vector<double> bx(1), by(1), ix, iy; bx <<= best_x; by <<= best_y; apply_homography(gi.grd2img(tt, cam), bx, by, ix, iy); float hpre = oi.pmodel_list(cam, nn).hpre; float cur_fy = iy(0); float cur_fx = ix(0); float cur_hy = gi.horiz_mean+hpre*(cur_fy-gi.horiz_mean); float ds = P.scales(best_s)*(cur_fy-cur_hy)/oi.pmodel_list(cam, nn).bh; float ww = ds*oi.pmodel_list(cam, nn).bw; float hh = cur_fy - cur_hy; trj_row <<= (cur_fx-ww/2), cur_hy, (cur_fx+ww/2), cur_fy; } mpi::broadcast(world, trj_row, 0); row(trlet_list(nn).trj(cam), tt) = trj_row; }//endfor cam }//endfor oo oi.curr_num_obj += num_new_obj; }
/** * find prime numbers in a range between [2,limit] * * http://en.wikipedia.org/wiki/Sieve_of_Atkin * * @param limit upper limit * @param is_prime return a array[limit+1] with a representation of number (if is_prime[n] == true then n is prime, false otherwise) */ void find_prime_numbers(mpi::communicator world, int limit, ms_vector *primes) { int sqrt_limit = ceil(sqrt(limit)); vector<bool> is_prime(limit + 1, false); vector<vector<bool> > matrix_is_prime(world.size()); is_prime[2] = true; is_prime[3] = true; int size = world.size(); // if the number of process > sqrt_limit if (size > sqrt_limit) // simulate to have sqrt_limit processes size = sqrt_limit; // compute how many numbers scan for each process int howmuch = sqrt_limit / size; // compute where the process start to look int start = 1 + (howmuch * world.rank()); // compute where the process stop to look int stop = howmuch * (world.rank() + 1); // if stop is out of limit, set stop as limit if (stop > limit) stop = limit; // execute algorithm for (int x = start; x <= stop; x++) { # pragma omp parallel for default(none) shared(sqrt_limit, limit, is_prime, x) for (int y = 1; y <= sqrt_limit; y++) { int n = 4 * x * x + y * y; if (n <= limit && ((n % 12) == 1 || (n % 12) == 5)){ # pragma omp critical { is_prime[n] = !is_prime[n]; } } n = 3 * x * x + y * y; if (n <= limit && (n % 12) == 7){ # pragma omp critical { is_prime[n] = !is_prime[n]; } } n = 3 * x * x - y * y; if (x > y && n <= limit && (n % 12) == 11){ # pragma omp critical { is_prime[n] = !is_prime[n]; } } } } // gather: receive all generated matrix mpi::gather(world, is_prime, matrix_is_prime, 0); // rott process finalize the algorithm if (world.rank() == 0) { // take the last update for (unsigned int i = 1; i < matrix_is_prime.size(); i++) { # pragma omp parallel for default(none) shared(matrix_is_prime, limit, i) for (int j = 1; j <= limit; j++) { if (matrix_is_prime[i - 1][j]) { # pragma omp critical { matrix_is_prime[i][j] = !matrix_is_prime[i][j]; } } } } // remove the others no prime numbers int index = matrix_is_prime.size() - 1; # pragma omp parallel for default(none) shared(sqrt_limit, matrix_is_prime, limit, index) for (int n = 5; n <= sqrt_limit; n++) { if (matrix_is_prime[index][n]) { int k = n * n; for (int i = k; i <= limit; i += k) { # pragma omp critical { matrix_is_prime[index][i] = false; } } } } // put number 2 and 3 if (!matrix_is_prime[matrix_is_prime.size() - 1][2]) { primes->push_back(2); primes->push_back(3); } // convert the structure in a array with inside only the prime numbers is_prime2primes(matrix_is_prime[matrix_is_prime.size() - 1], limit, primes); } }
sim_results simrun_master( const sim_parameters& par,const mpi::communicator& mpicomm) { // ----- PREPARE SIMULATION ----- // assume something went wrong until we are sure it didn't sim_results res; res.success = false; // ----- RUN SIMULATION ----- Replica* rep = new Replica(par); if ( rep->prepare( par.init ) == false ) { delete rep; return res; } unsigned int finished_workers = 0; unsigned int scheduled_bins = 0; unsigned int completed_bins = 0; unsigned int enqueued_bins = par.bins; // define procedure to query the slaves for new work requests function<void()> mpiquery_work_requests( [&]() { while ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_REQUEST_BINS ) ) { // receive the request and hand out new bins to the source mpicomm.recv( status->source(), MSGTAG_S_M_REQUEST_BINS ); if ( enqueued_bins > 0 ) { mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 1 ); scheduled_bins += 1; enqueued_bins -= 1; } else { mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 0 ); ++finished_workers; } } } ); // define procedure to query the slaves for finished work function<void()> mpiquery_finished_work( [&]() { while ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, 2 ) ) { mpicomm.recv( status->source(), 2 ); --scheduled_bins; ++completed_bins; } } ); cout << ":: Performing Monte Carlo cycle" << endl; cout << endl; cout << " Progress:" << endl; // perform dry runs to reach thermal equilibrium for(unsigned int mcs = 0; mcs < par.drysweeps; mcs++) { // take care of the slaves mpiquery_finished_work(); mpiquery_work_requests(); rep->mcs(); } unsigned int completed_bins_master = 0; std::vector<double> q2_binmeans; std::vector<double> q4_binmeans; while ( enqueued_bins > 0 ) { cout << '\r' << " Bin " << completed_bins << "/" << par.bins; cout.flush(); --enqueued_bins; ++scheduled_bins; // initialize binning array vector<double> q2_currentbin; vector<double> q4_currentbin; try { // try to allocate enough memory ... q2_currentbin.reserve( par.binwidth ); q4_currentbin.reserve( par.binwidth ); } catch ( bad_alloc ) { delete rep; return res; } for (unsigned int mcs = 0;mcs < par.binwidth;++mcs ) { // take care of the slaves mpiquery_finished_work(); mpiquery_work_requests(); // perform a Monte Carlo step rep->mcs(); // measure observables double q2 = 0, q4 = 0; double thissample_q = rep->Q(); // remember the sample's properties to calculate their mean value q2 = thissample_q * thissample_q; q4 = thissample_q * thissample_q * thissample_q * thissample_q; q2_currentbin.push_back(q2); q4_currentbin.push_back(q4); } q2_binmeans.push_back( accumulate( q2_currentbin.begin(), q2_currentbin.end(), 0.0 ) / static_cast<double>( q2_currentbin.size() ) ); q2_currentbin.clear(); --scheduled_bins; ++completed_bins_master; ++completed_bins; } ++finished_workers; while ( completed_bins != par.bins || static_cast<int>( finished_workers ) < mpicomm.size() ) { if ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_FINISHED_BINS ) ) { mpicomm.recv( status->source(), MSGTAG_S_M_FINISHED_BINS ); --scheduled_bins; ++completed_bins; cout << "\n"; cout << '\r' << " Bin " << completed_bins << "/" << par.bins; cout.flush(); } if ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_REQUEST_BINS ) ) { // receive the request for more work mpicomm.recv( status->source(), MSGTAG_S_M_REQUEST_BINS ); // tell him there is no more work mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 0 ); ++finished_workers; } } assert( enqueued_bins == 0 ); assert( scheduled_bins == 0 ); cout << '\r' << " Bin " << completed_bins << "/" << par.bins << endl; cout.flush(); // all measurements done ... let's tidy things up delete rep; assert( mpicomm.rank() == 0 ); vector< vector<double> > q2_binmeans_collector; mpi::gather( mpicomm, q2_binmeans, q2_binmeans_collector, 0 ); vector<double> q2_binmeans_all; for ( auto it = q2_binmeans_collector.begin(); it != q2_binmeans_collector.end(); ++it ) { q2_binmeans_all.insert( q2_binmeans_all.end(), it->begin(), it->end() ); } double q2 = 0, q4 = 0; q2 = static_cast<double>( accumulate( q2_binmeans_all.begin(), q2_binmeans_all.end(), 0.0 ) ) / static_cast<double>( q2_binmeans_all.size() ); double B = 0; B = (3 - q4 / (q2 * q2)) / 2; res.B = B; res.success = true; return res; }
void outer_mpi(mpi::communicator world, pt1D* ptVec, /* vector of points */ real2D* matrix, /* matrix to fill */ real1D* realVec, /* vector to fill */ int n /* size */ ){ int lo, hi; /* work controls */ int r, c; /* loop indices */ real d; /* distance */ real d_max_local = -1.0; // maximum distance real d_max; // maximum distance bool work; /* do useful work? */ int i, j; /* all elements except matrix diagonal */ work = get_block_rows_mpi (world, 0, n, &lo, &hi); if (work) { for (r = lo; r < hi; r++) { realVec[r] = ptMag(&(ptVec[r])); for (c = 0; c < r; c++) { d = ptDist (&(ptVec[r]), &(ptVec[c])); if (d > d_max_local) { d_max_local = d; } // fill columns 0 to r only matrix[r][c] = d; } } } // reduce to maximum d's all_reduce (world, d_max_local, d_max, mpi::maximum<real>()); /* matrix diagonal */ d = d_max * n; if (work) { for (r = lo; r < hi; r++) { matrix[r][r] = d; } } // broadcast matrix, realVec for (i = 0; i < world.size (); i++) { if (get_block_rows_mpi (world, 0, n, &lo, &hi, i)) { broadcast (world, &realVec[lo], hi - lo, i); // broadcast row by row since n may be smaller than MAXEXT for (j = lo; j < hi; j++) { broadcast (world, matrix[j], n, i); } } } // fill in the rest to make symmetric matrix for (r = 0; r < n; r++) { for (c = 0; c < r; c++) { matrix[c][r] = matrix[r][c]; } } /* return */ }
/// Scatter a mesh over the communicator c friend gf_mesh mpi_scatter(gf_mesh m, mpi::communicator c, int root) { auto m2 = gf_mesh{m.domain(), m.size(), m.positive_only()}; std::tie(m2._first_index_window, m2._last_index_window) = mpi::slice_range(m2._first_index, m2._last_index, c.size(), c.rank()); return m2; }
void mandel_mpi (mpi::communicator world, int2D* matrix, /* to fill */ int nr, /* row size */ int nc, /* column size */ real base_x, /* lower left corner */ real base_y, /* lower left corner */ real ext_x, /* extent */ real ext_y) /* extent */ { int r, c; /* row and column indices */ real dx, dy; /* per-step deltas */ #if GRAPHICS int gfxCount = 0; /* number of times graphics called */ #endif int row_count = 0; int i; mpi::status status; int source; const int WORK_REQUEST_TAG = 0; const int WORK_RESPONSE_TAG = 1; const int NO_MORE_WORK = -1; int processed_rows = 0; dx = ext_x / (nr - 1); dy = ext_y / (nc - 1); if (world.size () > 1) { if (world.rank () == 0) { // control process // send out work while (row_count < nr) { status = world.recv (mpi::any_source, WORK_REQUEST_TAG); source = status.source (); // send next row world.isend (source, WORK_RESPONSE_TAG, row_count); row_count++; } // send out no more work for (i = 1; i < world.size (); i++) { status = world.recv (mpi::any_source, WORK_REQUEST_TAG); source = status.source (); world.isend (source, WORK_RESPONSE_TAG, NO_MORE_WORK); } // receive results for (r = 0; r < nr; r++) { world.recv (mpi::any_source, r + 1, matrix[r], nc); } } else { // work process while (true) { // request next row world.send (0, WORK_REQUEST_TAG); world.recv (0, WORK_RESPONSE_TAG, r); if (r != NO_MORE_WORK) { for (c = 0; c < nc; c++) { matrix[r][c] = mandel_calc_mpi (base_x + (r * dx), base_y + (c * dy)); } processed_rows++; // send results world.isend (0, r + 1, matrix[r], nc); } else { break; } } #if defined(TEST_OUTPUT) || defined(TEST_TIME) printf ("processed rows: %d\n", processed_rows); #endif } // broadcast matrix for (r = 0; r < nr; r++) { broadcast (world, matrix[r], nc, 0); } } else { for (r = 0; r < nr; r++) { for (c = 0; c < nc; c++) { matrix[r][c] = mandel_calc_mpi (base_x + (r * dx), base_y + (c * dy)); } } } #if GRAPHICS gfx_mandel(gfxCount++, matrix, nr, nc); #endif /* return */ }