void Simulation3DInitializer::setOffsets(const mpi::communicator & xLine, const mpi::communicator & yLine, const mpi::communicator & zLine) { x_offset=(dx*blockSize*xLine.rank()); y_offset=(dy*blockSize*yLine.rank()); z_offset=(dz*blockSize*zLine.rank()); }
BoundaryLocation determineBoundary(mpi::communicator& world) { if(world.rank() == 0 && world.rank() == world.size()-1) return DOUBLE_BDY; else if(world.rank() == 0) return LOWER_BDY; else if(world.rank() == world.size() - 1) return UPPER_BDY; return NO_BDY; }
template <typename A> REQUIRES_IS_ARRAY mpi_broadcast(A &a, mpi::communicator c = {}, int root = 0) { if (!has_contiguous_data(a)) TRIQS_RUNTIME_ERROR << "Non contiguous view in mpi_broadcast"; auto sh = a.shape(); MPI_Bcast(&sh[0], sh.size(), mpi::mpi_datatype<typename decltype(sh)::value_type>(), root, c.get()); if (c.rank() != root) resize_or_check_if_view(a, sh); MPI_Bcast(a.data_start(), a.domain().number_of_elements(), mpi::mpi_datatype<typename A::value_type>(), root, c.get()); }
/** * Heuristic strategy 2: * Select randomly the first 4 numbers, then I look for * the others cells as consequence of this selection. * I have 8 equations and 9 variables. * But, I need only 5 variables and the others can be * calculated from those. * @strategy */ void test_heuristic_strategy_2(mpi::communicator world, int limit) { // primes number data structure ms_vector primes; // vector to collecting all generated matrix vector<ms_matrix> list; // my rank int rank = world.rank(); if (rank == 0) { cout << "Test the heuristic strategy 2...\n"; } // generate primes numbers find_prime_numbers(world, limit, &primes); // send to all the prime numbers mpi::broadcast(world, primes, 0); int length = 3; ms_matrix matrix(length, ms_vector(length)); fill_in_heuristic_mode_2(&primes, &matrix, rank); // receive all generated matrix mpi::gather(world, matrix, list, 0); if (rank == 0) { // print all generated matrix cout << "Print all generated matrix:\n"; print_list_matrix(list); } }
/// compute the array domain of the target array domain_type domain() const { auto dims = ref.shape(); long slow_size = first_dim(ref); if (std::is_same<Tag, mpi::tag::scatter>::value) { mpi::mpi_broadcast(slow_size, c, root); dims[0] = mpi::slice_length(slow_size - 1, c.size(), c.rank()); } if (std::is_same<Tag, mpi::tag::gather>::value) { if (!all) { dims[0] = mpi::mpi_reduce(slow_size, c, root); // valid only on root if (c.rank() != root) dims[0] = 1; // valid only on root } else dims[0] = mpi::mpi_all_reduce(slow_size, c, root); // in this case, it is valid on all nodes } // mpi::tag::reduce :do nothing return domain_type{dims}; }
MPISlave::MPISlave(const mpi::communicator& comm, const Experiment& exp) : m_comm(comm) , m_exp(exp) , m_resonanceField(m_exp) { if (comm.rank() == MASTER_RANK) { cerr << "MPISlave created in master rank!" << endl; comm.abort(1); return; } }
std::string runPartialBatch(mpi::communicator world, boost::shared_ptr< MatcherInterface > &matcher, ReadSet &_contigs, std::string _contigFile, ReadSet & changedContigs, ReadSet & finalContigs, int batchIdx, int maxContigsPerBatch, SequenceLengthType minKmerSize, double minimumCoverage, SequenceLengthType maxKmerSize, SequenceLengthType maxExtend, SequenceLengthType kmerStep) { LOG_DEBUG(1, "Starting runPartialBatch(" << batchIdx << " of " << _contigs.getSize() << "): " << MemoryUtils::getMemoryUsage()); ReadSet contigs; // new global contigs file a subset of original std::string extendLog; for(int i = batchIdx; i < (int) _contigs.getSize() && i < batchIdx + maxContigsPerBatch; i++) contigs.append(_contigs.getRead(i)); setGlobalReadSetConstants(world, contigs); if (contigs.getGlobalSize() == 0) return extendLog; std::string contigFile = DistributedOfstreamMap::writeGlobalReadSet(world, contigs, UniqueName::generateUniqueGlobalName(".tmp-batch" + UniqueName::getOurUniqueHandle() + "-", batchIdx), ".fasta", FormatOutput::Fasta()); MatcherInterface::MatchReadResults contigReadSet = matcher->match(contigs, contigFile); assert(contigs.getSize() == contigReadSet.size()); LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, " batch " << contigs.getSize() << ". Matches made"); int numThreads = omp_get_max_threads(); std::string extendLogs[numThreads]; if (!Cap3Options::getOptions().getCap3Path().empty()) { Cap3 cap3Instances[numThreads]; #pragma omp parallel for for(int i = 0; i < numThreads; i++) { extendLogs[i] = cap3Instances[i].extendContigs(contigs, contigReadSet, changedContigs, finalContigs, minimumCoverage, i, numThreads); } } else if (!NewblerOptions::getOptions().getNewblerPath().empty()) { Newbler newblerInstances[numThreads]; #pragma omp parallel for for(int i = 0; i < numThreads; i++) { extendLogs[i] = newblerInstances[i].extendContigs(contigs, contigReadSet, changedContigs, finalContigs, minimumCoverage, i, numThreads); } } else { extendLog = extendContigsWithContigExtender(contigs, contigReadSet, changedContigs, finalContigs, minKmerSize, minimumCoverage, maxKmerSize, maxExtend, kmerStep); } for(int i = 0; i < numThreads; i++) extendLog += extendLogs[i]; unlink(contigFile.c_str()); return extendLog; }
void initialize_new_objects(mpi::communicator& world, parameter_t const& P, directory_structure_t const& ds, geometric_info_t const& gi, object_info_t& oi, vector<std::vector<std::string> > const &seq, int tt, vector<CImg<unsigned char> > const& images, vector<matrix<float> > const& grd, vector<matrix<float> >& detected_rects) { int Ncam = seq.size(); vector<object_trj_t> & trlet_list=oi.trlet_list; int nobj = trlet_list.size(); int num_new_obj = detected_rects(0).size1(); int T = seq[0].size(); int np = oi.model.size(); int num_scales = P.scales.size(); //std::cout<<"detected_rects="<<detected_rects<<std::endl; for(int oo=0; oo<num_new_obj; ++oo) { int nn = oi.curr_num_obj + oo; trlet_list(nn).startt = tt; trlet_list(nn).endt = tt; trlet_list(nn).state = 1; trlet_list(nn).trj = vector<matrix<float> >(Ncam); for(int cam=0; cam<Ncam; ++cam) { trlet_list(nn).trj(cam) = scalar_matrix<float>(T, 4, 0); } trlet_list(nn).trj_3d = scalar_matrix<float>(T, 2, 0); trlet_list(nn).hist_p = vector<matrix<float> >(Ncam); trlet_list(nn).hist_q = vector<matrix<float> >(Ncam); trlet_list(nn).fscores = vector<matrix<float> >(Ncam); trlet_list(nn).scores = scalar_matrix<float>(Ncam, T, 0); vector<candidate_array<Float> > cand_array(Ncam); for(int cam=0; cam<Ncam; ++cam) { trlet_list(nn).fscores(cam) = scalar_matrix<float>(np*2, T, 0); float w = detected_rects(cam)(oo, 2)-detected_rects(cam)(oo, 0); float h = detected_rects(cam)(oo, 3)-detected_rects(cam)(oo, 1); row(trlet_list(nn).trj(cam), tt) = row(detected_rects(cam), oo); matrix<float> rects; compute_part_rects(detected_rects(cam)(oo, 0), detected_rects(cam)(oo, 1), w, h, oi.model, rects); pmodel_t pmodel; vector<float> br(row(detected_rects(cam), oo)); rects_to_pmodel_geom(br, gi.horiz_mean, pmodel); oi.pmodel_list(cam, nn) = pmodel; //collect_sift(grd(cam), ); matrix<float> hist_p, hist_q; collect_hist(images(cam), rects, hist_p, hist_q); trlet_list(nn).hist_p(cam) = hist_p; trlet_list(nn).hist_q(cam) = hist_q; matrix<Float> cand_rects; vector<Float> cand_scale; matrix<int> cand_ijs; if(0==world.rank()) { std::vector<float> sxr, syr; for(float v=-P.xrange/2; v<=P.xrange/2; v+=P.xstep) { sxr.push_back(v); } for(float v=-P.yrange/2; v<=P.yrange/2; v+=P.ystep) { syr.push_back(v); } vector<float> xr(sxr.size()), yr(syr.size()); std::copy(sxr.begin(), sxr.end(), xr.begin()); std::copy(syr.begin(), syr.end(), yr.begin()); float feetx = (trlet_list(nn).trj(cam)(tt, 0) +trlet_list(nn).trj(cam)(tt, 2))/2; float feety = trlet_list(nn).trj(cam)(tt, 3); enumerate_rects_inpoly(images(cam), oi.pmodel_list(cam, nn), feetx, feety, xr, yr, P.scales, gi.horiz_mean, gi.horiz_sig, gi.polys_im(tt, cam), cand_rects, cand_scale, cand_ijs, cand_array(cam)); } mpi::broadcast(world, cand_rects, 0); real_timer_t timer; vector<Float> cand_hist_score(cand_rects.size1()); matrix<Float> hist_fscores; range rrank(world.rank()*cand_rects.size1()/world.size(), (world.rank()+1)*cand_rects.size1()/world.size()); matrix<Float> cand_rects_rank(project(cand_rects, rrank, range(0, 4))); vector<Float> cand_hist_score_rank; matrix<Float> hist_fscores_rank; get_cand_hist_score(images(cam), oi.model, P.logp1, P.logp2, trlet_list(nn).hist_p(cam), trlet_list(nn).hist_q(cam), cand_rects_rank, cand_hist_score_rank, hist_fscores_rank); if(world.rank()==0) { std::vector<vector<Float> > v1; std::vector<matrix<Float> > v2; mpi::gather(world, cand_hist_score_rank, v1, 0); mpi::gather(world, hist_fscores_rank, v2, 0); hist_fscores = matrix<Float>(cand_rects.size1(), hist_fscores_rank.size2()); for(int r=0; r<world.size(); ++r) { int start = r*cand_rects.size1()/world.size(); for(int vv=0; vv<v1[r].size(); ++vv) { cand_hist_score(start+vv) = v1[r](vv); } for(int vv=0; vv<v2[r].size1(); ++vv) { row(hist_fscores, start+vv) = row(v2[r], vv); } } } else { mpi::gather(world, cand_hist_score_rank, 0); mpi::gather(world, hist_fscores_rank, 0); } mpi::broadcast(world, cand_hist_score, 0); mpi::broadcast(world, hist_fscores, 0); vector<Float> cand_score=cand_hist_score; if(0==world.rank()) std::cout<<"\t\t"<<cand_rects.size1()<<" rects, \tget_cand_hist_score time:" <<timer.elapsed()/1000.0f<<"s."<<std::endl; if(0==world.rank()) { int idx_max = std::max_element(cand_score.begin(), cand_score.end()) - cand_score.begin(); column(trlet_list(nn).fscores(cam), tt) = row(hist_fscores, idx_max); trlet_list(nn).scores(cam, tt) = cand_score(idx_max); cand_array(cam).fill_score(cand_score, cand_ijs); } mpi::broadcast(world, cand_array(cam), 0); mpi::broadcast(world, trlet_list(nn).scores(cam, tt), 0); vector<Float> fscore_col; if(0==world.rank()) { fscore_col = column(trlet_list(nn).fscores(cam), tt); } mpi::broadcast(world, fscore_col, 0); if(0!=world.rank()) { column(trlet_list(nn).fscores(cam), tt) = fscore_col; } }//end for cam int best_y, best_x, best_s; if(0==world.rank()) { ground_scoremap_t<Float> grd_scoremap; combine_ground_score(tt, cand_array, grd_scoremap, gi); grd_scoremap.peak(best_y, best_x, best_s); } mpi::broadcast(world, best_y, 0); mpi::broadcast(world, best_x, 0); trlet_list(nn).trj_3d(tt, 0) = best_x; trlet_list(nn).trj_3d(tt, 1) = best_y; for(int cam=0; cam<Ncam; ++cam) { vector<Float> trj_row(4); if(0==world.rank()) { vector<double> bx(1), by(1), ix, iy; bx <<= best_x; by <<= best_y; apply_homography(gi.grd2img(tt, cam), bx, by, ix, iy); float hpre = oi.pmodel_list(cam, nn).hpre; float cur_fy = iy(0); float cur_fx = ix(0); float cur_hy = gi.horiz_mean+hpre*(cur_fy-gi.horiz_mean); float ds = P.scales(best_s)*(cur_fy-cur_hy)/oi.pmodel_list(cam, nn).bh; float ww = ds*oi.pmodel_list(cam, nn).bw; float hh = cur_fy - cur_hy; trj_row <<= (cur_fx-ww/2), cur_hy, (cur_fx+ww/2), cur_fy; } mpi::broadcast(world, trj_row, 0); row(trlet_list(nn).trj(cam), tt) = trj_row; }//endfor cam }//endfor oo oi.curr_num_obj += num_new_obj; }
/** * find prime numbers in a range between [2,limit] * * http://en.wikipedia.org/wiki/Sieve_of_Atkin * * @param limit upper limit * @param is_prime return a array[limit+1] with a representation of number (if is_prime[n] == true then n is prime, false otherwise) */ void find_prime_numbers(mpi::communicator world, int limit, ms_vector *primes) { int sqrt_limit = ceil(sqrt(limit)); vector<bool> is_prime(limit + 1, false); vector<vector<bool> > matrix_is_prime(world.size()); is_prime[2] = true; is_prime[3] = true; int size = world.size(); // if the number of process > sqrt_limit if (size > sqrt_limit) // simulate to have sqrt_limit processes size = sqrt_limit; // compute how many numbers scan for each process int howmuch = sqrt_limit / size; // compute where the process start to look int start = 1 + (howmuch * world.rank()); // compute where the process stop to look int stop = howmuch * (world.rank() + 1); // if stop is out of limit, set stop as limit if (stop > limit) stop = limit; // execute algorithm for (int x = start; x <= stop; x++) { # pragma omp parallel for default(none) shared(sqrt_limit, limit, is_prime, x) for (int y = 1; y <= sqrt_limit; y++) { int n = 4 * x * x + y * y; if (n <= limit && ((n % 12) == 1 || (n % 12) == 5)){ # pragma omp critical { is_prime[n] = !is_prime[n]; } } n = 3 * x * x + y * y; if (n <= limit && (n % 12) == 7){ # pragma omp critical { is_prime[n] = !is_prime[n]; } } n = 3 * x * x - y * y; if (x > y && n <= limit && (n % 12) == 11){ # pragma omp critical { is_prime[n] = !is_prime[n]; } } } } // gather: receive all generated matrix mpi::gather(world, is_prime, matrix_is_prime, 0); // rott process finalize the algorithm if (world.rank() == 0) { // take the last update for (unsigned int i = 1; i < matrix_is_prime.size(); i++) { # pragma omp parallel for default(none) shared(matrix_is_prime, limit, i) for (int j = 1; j <= limit; j++) { if (matrix_is_prime[i - 1][j]) { # pragma omp critical { matrix_is_prime[i][j] = !matrix_is_prime[i][j]; } } } } // remove the others no prime numbers int index = matrix_is_prime.size() - 1; # pragma omp parallel for default(none) shared(sqrt_limit, matrix_is_prime, limit, index) for (int n = 5; n <= sqrt_limit; n++) { if (matrix_is_prime[index][n]) { int k = n * n; for (int i = k; i <= limit; i += k) { # pragma omp critical { matrix_is_prime[index][i] = false; } } } } // put number 2 and 3 if (!matrix_is_prime[matrix_is_prime.size() - 1][2]) { primes->push_back(2); primes->push_back(3); } // convert the structure in a array with inside only the prime numbers is_prime2primes(matrix_is_prime[matrix_is_prime.size() - 1], limit, primes); } }
sim_results simrun_master( const sim_parameters& par,const mpi::communicator& mpicomm) { // ----- PREPARE SIMULATION ----- // assume something went wrong until we are sure it didn't sim_results res; res.success = false; // ----- RUN SIMULATION ----- Replica* rep = new Replica(par); if ( rep->prepare( par.init ) == false ) { delete rep; return res; } unsigned int finished_workers = 0; unsigned int scheduled_bins = 0; unsigned int completed_bins = 0; unsigned int enqueued_bins = par.bins; // define procedure to query the slaves for new work requests function<void()> mpiquery_work_requests( [&]() { while ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_REQUEST_BINS ) ) { // receive the request and hand out new bins to the source mpicomm.recv( status->source(), MSGTAG_S_M_REQUEST_BINS ); if ( enqueued_bins > 0 ) { mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 1 ); scheduled_bins += 1; enqueued_bins -= 1; } else { mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 0 ); ++finished_workers; } } } ); // define procedure to query the slaves for finished work function<void()> mpiquery_finished_work( [&]() { while ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, 2 ) ) { mpicomm.recv( status->source(), 2 ); --scheduled_bins; ++completed_bins; } } ); cout << ":: Performing Monte Carlo cycle" << endl; cout << endl; cout << " Progress:" << endl; // perform dry runs to reach thermal equilibrium for(unsigned int mcs = 0; mcs < par.drysweeps; mcs++) { // take care of the slaves mpiquery_finished_work(); mpiquery_work_requests(); rep->mcs(); } unsigned int completed_bins_master = 0; std::vector<double> q2_binmeans; std::vector<double> q4_binmeans; while ( enqueued_bins > 0 ) { cout << '\r' << " Bin " << completed_bins << "/" << par.bins; cout.flush(); --enqueued_bins; ++scheduled_bins; // initialize binning array vector<double> q2_currentbin; vector<double> q4_currentbin; try { // try to allocate enough memory ... q2_currentbin.reserve( par.binwidth ); q4_currentbin.reserve( par.binwidth ); } catch ( bad_alloc ) { delete rep; return res; } for (unsigned int mcs = 0;mcs < par.binwidth;++mcs ) { // take care of the slaves mpiquery_finished_work(); mpiquery_work_requests(); // perform a Monte Carlo step rep->mcs(); // measure observables double q2 = 0, q4 = 0; double thissample_q = rep->Q(); // remember the sample's properties to calculate their mean value q2 = thissample_q * thissample_q; q4 = thissample_q * thissample_q * thissample_q * thissample_q; q2_currentbin.push_back(q2); q4_currentbin.push_back(q4); } q2_binmeans.push_back( accumulate( q2_currentbin.begin(), q2_currentbin.end(), 0.0 ) / static_cast<double>( q2_currentbin.size() ) ); q2_currentbin.clear(); --scheduled_bins; ++completed_bins_master; ++completed_bins; } ++finished_workers; while ( completed_bins != par.bins || static_cast<int>( finished_workers ) < mpicomm.size() ) { if ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_FINISHED_BINS ) ) { mpicomm.recv( status->source(), MSGTAG_S_M_FINISHED_BINS ); --scheduled_bins; ++completed_bins; cout << "\n"; cout << '\r' << " Bin " << completed_bins << "/" << par.bins; cout.flush(); } if ( boost::optional<mpi::status> status = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_REQUEST_BINS ) ) { // receive the request for more work mpicomm.recv( status->source(), MSGTAG_S_M_REQUEST_BINS ); // tell him there is no more work mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 0 ); ++finished_workers; } } assert( enqueued_bins == 0 ); assert( scheduled_bins == 0 ); cout << '\r' << " Bin " << completed_bins << "/" << par.bins << endl; cout.flush(); // all measurements done ... let's tidy things up delete rep; assert( mpicomm.rank() == 0 ); vector< vector<double> > q2_binmeans_collector; mpi::gather( mpicomm, q2_binmeans, q2_binmeans_collector, 0 ); vector<double> q2_binmeans_all; for ( auto it = q2_binmeans_collector.begin(); it != q2_binmeans_collector.end(); ++it ) { q2_binmeans_all.insert( q2_binmeans_all.end(), it->begin(), it->end() ); } double q2 = 0, q4 = 0; q2 = static_cast<double>( accumulate( q2_binmeans_all.begin(), q2_binmeans_all.end(), 0.0 ) ) / static_cast<double>( q2_binmeans_all.size() ); double B = 0; B = (3 - q4 / (q2 * q2)) / 2; res.B = B; res.success = true; return res; }
void simrun_slave( const sim_parameters& par,const mpi::communicator& mpicomm) { Replica* rep = new Replica(par); if ( rep->prepare( par.init ) == false ) { delete rep; } // perform dry runs to reach thermal equilibrium rep->mcstep_dry( par.drysweeps ); unsigned int completed_bins_thisslave = 0; bool master_out_of_work = false; unsigned int scheduled_bins_thisslave; mpicomm.send( 0, MSGTAG_S_M_REQUEST_BINS ); mpicomm.recv( 0, MSGTAG_M_S_DISPATCHED_BINS, scheduled_bins_thisslave ); master_out_of_work = ( scheduled_bins_thisslave == 0 ); std::vector<double> q2_binmeans; std::vector<double> q4_binmeans; while ( scheduled_bins_thisslave > 0 ) { unsigned int new_scheduled_bins_thisslave; mpi::request master_answer; if ( !master_out_of_work ) { // ask the master for more work mpicomm.send( 0, MSGTAG_S_M_REQUEST_BINS ); master_answer = mpicomm.irecv( 0, MSGTAG_M_S_DISPATCHED_BINS, new_scheduled_bins_thisslave ); } // initialize binning array vector<double> q2_currentbin; vector<double> q4_currentbin; try { // try to allocate enough memory ... q2_currentbin.reserve( par.binwidth ); q4_currentbin.reserve( par.binwidth ); } catch ( bad_alloc ) { delete rep; } for (unsigned int mcs = 0;mcs < par.binwidth;++mcs ) { // perform a Monte Carlo step rep->mcs(); // measure observables double q2 = 0, q4 = 0; double thissample_q = rep->Q(); // remember the sample's properties to calculate their mean value q2 = thissample_q * thissample_q; q4 = thissample_q * thissample_q * thissample_q * thissample_q; q2_currentbin.push_back(q2); q4_currentbin.push_back(q4); } q2_binmeans.push_back( accumulate( q2_currentbin.begin(), q2_currentbin.end(), 0.0 ) / static_cast<double>( q2_currentbin.size() ) ); q2_currentbin.clear(); // report completion of the work mpicomm.send( 0, 2 ); ++completed_bins_thisslave; --scheduled_bins_thisslave; if ( !master_out_of_work ) { // wait for answer from master concerning the next bin master_answer.wait(); if ( new_scheduled_bins_thisslave == 1 ) { ++scheduled_bins_thisslave; } else { master_out_of_work = true; } } } assert( mpicomm.rank() != 0 ); mpi::gather( mpicomm, q2_binmeans, 0 ); return; }
Simulation3D::Simulation3D(double L_x, double L_y, double L_z, double T, unsigned int n_cells, unsigned int n_steps, unsigned int procs_x, unsigned int procs_y, unsigned int procs_z, unsigned int block_size, std::string& dump_dir, Simulation3DInitializer* init, mpi::communicator & world) : world(world), xLine(world.split(world.rank() / procs_x)), yLine(world.split(world.rank() % procs_x + (world.rank() / (procs_x*procs_y)) * procs_x)), zLine(world.split(world.rank() % (procs_x*procs_y))), nSteps(n_steps), currentStep(0), dx(L_x/n_cells), dy(L_y/n_cells), dz(L_z/n_cells), dt(T/n_steps), blockSize(block_size), preFactorX(LIGHTSPEED*dt/(2*dx)), preFactorY(LIGHTSPEED*dt/(2*dy)), preFactorZ(LIGHTSPEED*dt/(2*dz)), E(new double[3*blockSize*blockSize*blockSize]), B(new double[3*blockSize*blockSize*blockSize]), tmp_field(new double[3*blockSize*blockSize*blockSize]), rhsx(new double[blockSize*blockSize*blockSize]), rhsy(new double[blockSize*blockSize*blockSize]), rhsz(new double[blockSize*blockSize*blockSize]), rhs_ptrs_x(new double*[blockSize*blockSize]), rhs_ptrs_y(new double*[blockSize*blockSize]), rhs_ptrs_z(new double*[blockSize*blockSize]), dumpDir(dump_dir) { procsX = xLine.size(); procsY = yLine.size(); procsZ = zLine.size(); VacuumMatrixInitializer mat_init_x = VacuumMatrixInitializer(dx, dt, blockSize, determineBoundary(xLine)); VacuumMatrixInitializer mat_init_y = VacuumMatrixInitializer(dy, dt, blockSize, determineBoundary(yLine)); VacuumMatrixInitializer mat_init_z = VacuumMatrixInitializer(dz, dt, blockSize, determineBoundary(zLine)); VacuumCouplingInitializer coupling_init_x = VacuumCouplingInitializer(& mat_init_x, blockSize, xLine); VacuumCouplingInitializer coupling_init_y = VacuumCouplingInitializer(& mat_init_y, blockSize, yLine); VacuumCouplingInitializer coupling_init_z = VacuumCouplingInitializer(& mat_init_z, blockSize, zLine); std::vector<AbstractMatrixInitializer*> mat_inits_x(blockSize*blockSize, & mat_init_x); std::vector<AbstractMatrixInitializer*> mat_inits_y(blockSize*blockSize, & mat_init_y); std::vector<AbstractMatrixInitializer*> mat_inits_z(blockSize*blockSize, & mat_init_z); std::vector<AbstractCouplingInitializer*> coupling_inits_x(blockSize*blockSize, & coupling_init_x); std::vector<AbstractCouplingInitializer*> coupling_inits_y(blockSize*blockSize, & coupling_init_y); std::vector<AbstractCouplingInitializer*> coupling_inits_z(blockSize*blockSize, & coupling_init_z); guardB = allocateGuardStorage(); guardE = allocateGuardStorage(); init->setOffsets(xLine, yLine, zLine); initFields(init); xUpdateRHSs = init->initCollection(mat_inits_x, coupling_inits_x, blockSize, xLine); yUpdateRHSs = init->initCollection(mat_inits_y, coupling_inits_y, blockSize, yLine); zUpdateRHSs = init->initCollection(mat_inits_z, coupling_inits_z, blockSize, zLine); guardSendbuf = new double[3*blockSize*blockSize]; }
/// Scatter a mesh over the communicator c friend gf_mesh mpi_scatter(gf_mesh m, mpi::communicator c, int root) { auto m2 = gf_mesh{m.domain(), m.size(), m.positive_only()}; std::tie(m2._first_index_window, m2._last_index_window) = mpi::slice_range(m2._first_index, m2._last_index, c.size(), c.rank()); return m2; }
void mandel_mpi (mpi::communicator world, int2D* matrix, /* to fill */ int nr, /* row size */ int nc, /* column size */ real base_x, /* lower left corner */ real base_y, /* lower left corner */ real ext_x, /* extent */ real ext_y) /* extent */ { int r, c; /* row and column indices */ real dx, dy; /* per-step deltas */ #if GRAPHICS int gfxCount = 0; /* number of times graphics called */ #endif int row_count = 0; int i; mpi::status status; int source; const int WORK_REQUEST_TAG = 0; const int WORK_RESPONSE_TAG = 1; const int NO_MORE_WORK = -1; int processed_rows = 0; dx = ext_x / (nr - 1); dy = ext_y / (nc - 1); if (world.size () > 1) { if (world.rank () == 0) { // control process // send out work while (row_count < nr) { status = world.recv (mpi::any_source, WORK_REQUEST_TAG); source = status.source (); // send next row world.isend (source, WORK_RESPONSE_TAG, row_count); row_count++; } // send out no more work for (i = 1; i < world.size (); i++) { status = world.recv (mpi::any_source, WORK_REQUEST_TAG); source = status.source (); world.isend (source, WORK_RESPONSE_TAG, NO_MORE_WORK); } // receive results for (r = 0; r < nr; r++) { world.recv (mpi::any_source, r + 1, matrix[r], nc); } } else { // work process while (true) { // request next row world.send (0, WORK_REQUEST_TAG); world.recv (0, WORK_RESPONSE_TAG, r); if (r != NO_MORE_WORK) { for (c = 0; c < nc; c++) { matrix[r][c] = mandel_calc_mpi (base_x + (r * dx), base_y + (c * dy)); } processed_rows++; // send results world.isend (0, r + 1, matrix[r], nc); } else { break; } } #if defined(TEST_OUTPUT) || defined(TEST_TIME) printf ("processed rows: %d\n", processed_rows); #endif } // broadcast matrix for (r = 0; r < nr; r++) { broadcast (world, matrix[r], nc, 0); } } else { for (r = 0; r < nr; r++) { for (c = 0; c < nc; c++) { matrix[r][c] = mandel_calc_mpi (base_x + (r * dx), base_y + (c * dy)); } } } #if GRAPHICS gfx_mandel(gfxCount++, matrix, nr, nc); #endif /* return */ }