Пример #1
0
void Simulation3DInitializer::setOffsets(const mpi::communicator & xLine,
					  const mpi::communicator & yLine,
					  const mpi::communicator & zLine) {
  x_offset=(dx*blockSize*xLine.rank());
  y_offset=(dy*blockSize*yLine.rank());
  z_offset=(dz*blockSize*zLine.rank());
}
Пример #2
0
MPISlave::MPISlave(const mpi::communicator& comm, const Experiment& exp)
: m_comm(comm)
, m_exp(exp)
, m_resonanceField(m_exp)
{
  if (comm.rank() == MASTER_RANK) {
    cerr << "MPISlave created in master rank!" << endl;
    comm.abort(1);
    return;
  }

}
Пример #3
0
 template <typename A> REQUIRES_IS_ARRAY mpi_broadcast(A &a, mpi::communicator c = {}, int root = 0) {
  if (!has_contiguous_data(a)) TRIQS_RUNTIME_ERROR << "Non contiguous view in mpi_broadcast";
  auto sh = a.shape();
  MPI_Bcast(&sh[0], sh.size(), mpi::mpi_datatype<typename decltype(sh)::value_type>(), root, c.get());
  if (c.rank() != root) resize_or_check_if_view(a, sh);
  MPI_Bcast(a.data_start(), a.domain().number_of_elements(), mpi::mpi_datatype<typename A::value_type>(), root, c.get());
 }
/**
 * Heuristic strategy 2:
 * Select randomly the first 4 numbers, then I look for
 * the others cells as consequence of this selection.
 * I have 8 equations and 9 variables.
 * But, I need only 5 variables and the others can be
 * calculated from those.
 * @strategy
 */
void test_heuristic_strategy_2(mpi::communicator world, int limit) {
	// primes number data structure
	ms_vector primes;
	// vector to collecting all generated matrix
	vector<ms_matrix> list;
	// my rank
	int rank = world.rank();

	if (rank == 0) {
		cout << "Test the heuristic strategy 2...\n";
	}

	// generate primes numbers
	find_prime_numbers(world, limit, &primes);

	// send to all the prime numbers
	mpi::broadcast(world, primes, 0);

	int length = 3;
	ms_matrix matrix(length, ms_vector(length));

	fill_in_heuristic_mode_2(&primes, &matrix, rank);

	// receive all generated matrix
	mpi::gather(world, matrix, list, 0);

	if (rank == 0) {
		// print all generated matrix
		cout << "Print all generated matrix:\n";
		print_list_matrix(list);
	}
}
Пример #5
0
void product_mpi (mpi::communicator world,
                  real2D* matrix,           /* to multiply by */
                  real1D* vector,          /* to be multiplied */
                  real1D* result,          /* result of multiply */
                  int   nr,                /* row size */
                  int		nc)                /* column size */
{
  int		lo, hi;		/* work controls */
  int		r, c;			/* loop indices */ 
  int rank;

  // work
  if (get_block_rows_mpi (world, 0, nr, &lo, &hi)) {

    for (r = lo; r < hi; r ++) {
      result[r] = matrix[r][0] * vector[0];
      for (c = 1; c < nc; c++) {
        result[r] += matrix[r][c] * vector[c];
      }
    }

  }

  // broadcast result
  for (rank = 0; rank < world.size (); rank++) {
    if (get_block_rows_mpi (world, 0, nr, &lo, &hi, rank)) {
      broadcast (world, &result[lo], hi - lo, rank);
    }
  }

}
Пример #6
0
  /// compute the array domain of the target array
  domain_type domain() const {
   auto dims = ref.shape();
   long slow_size = first_dim(ref);
 
   if (std::is_same<Tag, mpi::tag::scatter>::value) {
    mpi::mpi_broadcast(slow_size, c, root);
    dims[0] = mpi::slice_length(slow_size - 1, c.size(), c.rank());
   }

   if (std::is_same<Tag, mpi::tag::gather>::value) {
    if (!all) {
     dims[0] = mpi::mpi_reduce(slow_size, c, root); // valid only on root
     if (c.rank() != root) dims[0] = 1;        // valid only on root
    } 
    else
     dims[0] = mpi::mpi_all_reduce(slow_size, c, root); // in this case, it is valid on all nodes
   }
   // mpi::tag::reduce :do nothing

   return domain_type{dims};
  }
Пример #7
0
BoundaryLocation determineBoundary(mpi::communicator& world) {
  if(world.rank() == 0 && world.rank() == world.size()-1)
    return DOUBLE_BDY;
  else if(world.rank() == 0)
    return LOWER_BDY;
  else if(world.rank() == world.size() - 1)
    return UPPER_BDY;
  return NO_BDY;
}
std::string runPartialBatch(mpi::communicator world, boost::shared_ptr< MatcherInterface > &matcher, ReadSet &_contigs, std::string _contigFile, ReadSet & changedContigs,
		ReadSet & finalContigs, int batchIdx, int maxContigsPerBatch, SequenceLengthType minKmerSize,
		double minimumCoverage, SequenceLengthType maxKmerSize,
		SequenceLengthType maxExtend, SequenceLengthType kmerStep) {

	LOG_DEBUG(1, "Starting runPartialBatch(" << batchIdx << " of " << _contigs.getSize() << "): " << MemoryUtils::getMemoryUsage());

	ReadSet contigs; // new global contigs file a subset of original
	std::string extendLog;
	for(int i = batchIdx; i < (int) _contigs.getSize() && i < batchIdx + maxContigsPerBatch; i++)
		contigs.append(_contigs.getRead(i));

	setGlobalReadSetConstants(world, contigs);
        if (contigs.getGlobalSize() == 0)
		return extendLog;

	std::string contigFile = DistributedOfstreamMap::writeGlobalReadSet(world, contigs, UniqueName::generateUniqueGlobalName(".tmp-batch" + UniqueName::getOurUniqueHandle() + "-", batchIdx), ".fasta", FormatOutput::Fasta());

	MatcherInterface::MatchReadResults contigReadSet = matcher->match(contigs, contigFile);
	assert(contigs.getSize() == contigReadSet.size());

	LOG_VERBOSE_OPTIONAL(1, world.rank() == 0, " batch " << contigs.getSize() << ". Matches made");

	int numThreads = omp_get_max_threads();
	std::string extendLogs[numThreads];
	if (!Cap3Options::getOptions().getCap3Path().empty()) {
		Cap3 cap3Instances[numThreads];
		#pragma omp parallel for
		for(int i = 0; i < numThreads; i++) {
			extendLogs[i] = cap3Instances[i].extendContigs(contigs, contigReadSet, changedContigs, finalContigs, minimumCoverage, i, numThreads);
		}
	} else if (!NewblerOptions::getOptions().getNewblerPath().empty()) {
		Newbler newblerInstances[numThreads];
		#pragma omp parallel for
		for(int i = 0; i < numThreads; i++) {
				extendLogs[i] = newblerInstances[i].extendContigs(contigs, contigReadSet, changedContigs, finalContigs, minimumCoverage, i, numThreads);
		}
	} else {
		extendLog = extendContigsWithContigExtender(contigs, contigReadSet,
				changedContigs, finalContigs,
				minKmerSize, minimumCoverage, maxKmerSize, maxExtend, kmerStep);
	}
	for(int i = 0; i < numThreads; i++)
		extendLog += extendLogs[i];

	unlink(contigFile.c_str());

	return extendLog;
}
Пример #9
0
ParallelBFS::ParallelBFS(const mpi::communicator &comm,
                         const NodeList &vertices,
                         const NodeList &edges) :
    comm(comm) {
  NodeId part = (NodeId)vertices.size() / comm.size(),
      left_vertices = (NodeId)vertices.size() % comm.size(),
      first_vertex = 0, first_edge = 0;
  NodeList part_vertices((size_t)comm.size());
  NodeList first_vertices((size_t)comm.size());
  NodeList part_edges((size_t)comm.size());
  NodeList first_edges((size_t)comm.size());
  NodeList all_description((size_t)(comm.size() << 2));
  for (int i = 0; i < comm.size(); ++i) {
    NodeId this_part = part + (i < left_vertices);
    NodeId last_edge = first_vertex + this_part == vertices.size() ?
                      (NodeId)edges.size() :
                      vertices[first_vertex + this_part];
    all_description[(i<<2)] = (NodeId)vertices.size();
    all_description[(i<<2) + 1] = first_vertices[i] = first_vertex;
    all_description[(i<<2) + 2] = part_vertices[i] = this_part;
    all_description[(i<<2) + 3] = part_edges[i] = last_edge - first_edge;
    first_edges[i] = first_edge;
    first_edge = last_edge;
    first_vertex += this_part;
  }
  NodeList description(4);
  mpi::scatter(comm, all_description.data(), description.data(), 4, 0);
  this->vertex_total_count = description[0];
  this->first_vertex = description[1];
  this->vertices.resize((size_t)description[2]);
  mpi::scatterv(comm, vertices, part_vertices, first_vertices,
                this->vertices, 0);
  this->edges.resize((size_t)description[3]);
  mpi::scatterv(comm, edges, part_edges, first_edges,
                this->edges, 0);
  prepare();
}
Пример #10
0
void Manager::execute(mpi::communicator slave, std::string masterComputer, std::string cheminPere) {
    mpi::communicator world;

    // for each floor
    for (std::vector<std::vector<Rule *> >::iterator it1 = building.begin(); it1 != building.end(); ++it1) {
        // for each rule
        for (std::vector<Rule *>::iterator it2 = it1->begin(); it2 != it1->end(); ++it2) {
            // execute rule if it's our turn
            if ((currentRank % (world.size()-1))+1 == world.rank()) {
                std::cout << printCurrentThread() << "executing " << (*it2)->get_name() << std::endl;
                (*it2)->execute(dictionary, masterComputer, cheminPere);
                std::cout << printCurrentThread() << "finished " << (*it2)->get_name() << std::endl;
            }
            currentRank++;
        }
        slave.barrier();
    }

    //Envoie d'un message pour dire que c'est finis
    std::stringstream messageSend;
    messageSend << world.rank() << ";done";
    world.send(0, 0, messageSend.str());

}
/**
 * find prime numbers in a range between [2,limit]
 * 
 * http://en.wikipedia.org/wiki/Sieve_of_Atkin
 *
 * @param limit upper limit
 * @param is_prime return a array[limit+1] with a representation of number (if is_prime[n] == true then n is prime, false otherwise)
 */
void find_prime_numbers(mpi::communicator world, int limit, ms_vector *primes) {
	int sqrt_limit = ceil(sqrt(limit));

	vector<bool> is_prime(limit + 1, false);
	vector<vector<bool> > matrix_is_prime(world.size());

	is_prime[2] = true;
	is_prime[3] = true;

	int size = world.size();
	// if the number of process > sqrt_limit
	if (size > sqrt_limit)
		// simulate to have sqrt_limit processes
		size = sqrt_limit;
	// compute how many numbers scan for each process
	int howmuch = sqrt_limit / size;
	// compute where the process start to look
	int start = 1 + (howmuch * world.rank());
	// compute where the process stop to look
	int stop = howmuch * (world.rank() + 1);
	// if stop is out of limit, set stop as limit
	if (stop > limit)
		stop = limit;

	// execute algorithm
	for (int x = start; x <= stop; x++) {
#		pragma omp parallel for default(none) shared(sqrt_limit, limit, is_prime, x)
		for (int y = 1; y <= sqrt_limit; y++) {
			int n = 4 * x * x + y * y;

			if (n <= limit && ((n % 12) == 1 || (n % 12) == 5)){
#				pragma omp critical
				{
					is_prime[n] = !is_prime[n];
				}
			}

			n = 3 * x * x + y * y;

			if (n <= limit && (n % 12) == 7){
#				pragma omp critical
				{
					is_prime[n] = !is_prime[n];
				}
			}

			n = 3 * x * x - y * y;

			if (x > y && n <= limit && (n % 12) == 11){
#				pragma omp critical
				{
					is_prime[n] = !is_prime[n];
				}
			}
		}
	}

	// gather: receive all generated matrix
	mpi::gather(world, is_prime, matrix_is_prime, 0);

	// rott process finalize the algorithm
	if (world.rank() == 0) {

		// take the last update
		for (unsigned int i = 1; i < matrix_is_prime.size(); i++) {
#			pragma omp parallel for default(none) shared(matrix_is_prime, limit, i)
			for (int j = 1; j <= limit; j++) {
				if (matrix_is_prime[i - 1][j]) {
#					pragma omp critical
					{
						matrix_is_prime[i][j] = !matrix_is_prime[i][j];
					}
				}
			}
		}

		// remove the others no prime numbers
		int index = matrix_is_prime.size() - 1;
#		pragma omp parallel for default(none) shared(sqrt_limit, matrix_is_prime, limit, index)
		for (int n = 5; n <= sqrt_limit; n++) {
			if (matrix_is_prime[index][n]) {
				int k = n * n;
				for (int i = k; i <= limit; i += k) {
#					pragma omp critical
					{
						matrix_is_prime[index][i] = false;
					}
				}
			}
		}

		// put number 2 and 3
		if (!matrix_is_prime[matrix_is_prime.size() - 1][2]) {
			primes->push_back(2);
			primes->push_back(3);
		}

		// convert the structure in a array with inside only the prime numbers
		is_prime2primes(matrix_is_prime[matrix_is_prime.size() - 1], limit,
				primes);
	}
}
Пример #12
0
 /// Scatter a mesh over the communicator c
 friend gf_mesh mpi_scatter(gf_mesh m, mpi::communicator c, int root) {
  auto m2 = gf_mesh{m.domain(), m.size(), m.positive_only()};
  std::tie(m2._first_index_window, m2._last_index_window) = mpi::slice_range(m2._first_index, m2._last_index, c.size(), c.rank());
  return m2;
 }
Пример #13
0
void simrun_slave( const sim_parameters& par,const mpi::communicator& mpicomm)
{
    Replica* rep = new Replica(par);

    if ( rep->prepare( par.init ) == false ) {
        delete rep;
    }

    // perform dry runs to reach thermal equilibrium
    rep->mcstep_dry( par.drysweeps );

    unsigned int completed_bins_thisslave = 0;
    bool master_out_of_work = false;
    unsigned int scheduled_bins_thisslave;
    mpicomm.send( 0, MSGTAG_S_M_REQUEST_BINS );
    mpicomm.recv( 0, MSGTAG_M_S_DISPATCHED_BINS, scheduled_bins_thisslave );
    master_out_of_work = ( scheduled_bins_thisslave == 0 );

    std::vector<double> q2_binmeans;
    std::vector<double> q4_binmeans;

    while ( scheduled_bins_thisslave > 0 ) {

        unsigned int new_scheduled_bins_thisslave;
        mpi::request master_answer;

        if ( !master_out_of_work ) {
            // ask the master for more work
            mpicomm.send( 0, MSGTAG_S_M_REQUEST_BINS );
            master_answer = mpicomm.irecv(
                        0, MSGTAG_M_S_DISPATCHED_BINS,
                        new_scheduled_bins_thisslave
                        );
        }
        // initialize binning array
        vector<double> q2_currentbin;
        vector<double> q4_currentbin;
        try {
            // try to allocate enough memory ...
            q2_currentbin.reserve( par.binwidth );
            q4_currentbin.reserve( par.binwidth );
        } catch ( bad_alloc ) {
            delete rep;
        }
        for (unsigned int mcs = 0;mcs < par.binwidth;++mcs ) {
            // perform a Monte Carlo step
            rep->mcs();

            // measure observables
            double q2 = 0, q4 = 0;
            double thissample_q = rep->Q();
            // remember the sample's properties to calculate their mean value
            q2 	= thissample_q * thissample_q;
            q4 	= thissample_q * thissample_q * thissample_q * thissample_q;
            q2_currentbin.push_back(q2);
            q4_currentbin.push_back(q4);
        }


        q2_binmeans.push_back(
                    accumulate( q2_currentbin.begin(), q2_currentbin.end(), 0.0 ) /
                    static_cast<double>( q2_currentbin.size() )
                    );
        q2_currentbin.clear();

        // report completion of the work
        mpicomm.send( 0, 2 );
        ++completed_bins_thisslave;
        --scheduled_bins_thisslave;

        if ( !master_out_of_work ) {
            // wait for answer from master concerning the next bin
            master_answer.wait();
            if ( new_scheduled_bins_thisslave == 1 ) {
                ++scheduled_bins_thisslave;
            } else {
                master_out_of_work = true;
            }
        }
    }

    assert( mpicomm.rank() != 0 );
    mpi::gather( mpicomm, q2_binmeans, 0 );
    return;
}
Пример #14
0
void
outer_mpi(mpi::communicator world,
  pt1D*		ptVec,			/* vector of points */
  real2D*	matrix,			/* matrix to fill */
  real1D*	realVec,		/* vector to fill */
  int		n			/* size */
){
  int		lo, hi;		/* work controls */
  int		r, c;			/* loop indices */
  real		d;			/* distance */
  real d_max_local = -1.0; // maximum distance
  real d_max; // maximum distance
  bool		work;			/* do useful work? */
  int i, j;

  /* all elements except matrix diagonal */
  work = get_block_rows_mpi (world, 0, n, &lo, &hi);
  if (work) {
    for (r = lo; r < hi; r++) {
      realVec[r] = ptMag(&(ptVec[r]));
      for (c = 0; c < r; c++) {
        d = ptDist (&(ptVec[r]), &(ptVec[c]));
        if (d > d_max_local) {
          d_max_local = d;
        }
        // fill columns 0 to r only
        matrix[r][c] = d;
      }
    }
  }

  // reduce to maximum d's
  all_reduce (world, d_max_local, d_max, mpi::maximum<real>());
  
  /* matrix diagonal */
  d = d_max * n;
  if (work) {
    for (r = lo; r < hi; r++) {
      matrix[r][r] = d;
    }
  }

  // broadcast matrix, realVec
  for (i = 0; i < world.size (); i++) {
    if (get_block_rows_mpi (world, 0, n, &lo, &hi, i)) {
      broadcast (world, &realVec[lo], hi - lo, i);
      // broadcast row by row since n may be smaller than MAXEXT
      for (j = lo; j < hi; j++) {
        broadcast (world, matrix[j], n, i);
      }
    }
  }

  // fill in the rest to make symmetric matrix
  for (r = 0; r < n; r++) {
    for (c = 0; c < r; c++) {
      matrix[c][r] = matrix[r][c];
    }
  }

  /* return */
}
Пример #15
0
Simulation3D::Simulation3D(double L_x, double L_y, double L_z,
			   double T,
			   unsigned int n_cells, unsigned int n_steps,
			   unsigned int procs_x, unsigned int procs_y, unsigned int procs_z,
			   unsigned int block_size,
			   std::string& dump_dir,
			   Simulation3DInitializer* init, mpi::communicator & world) :
  world(world),
  xLine(world.split(world.rank() / procs_x)),
  yLine(world.split(world.rank() % procs_x + (world.rank() / (procs_x*procs_y)) * procs_x)),
  zLine(world.split(world.rank() % (procs_x*procs_y))),
  nSteps(n_steps),
  currentStep(0),
  dx(L_x/n_cells),
  dy(L_y/n_cells),
  dz(L_z/n_cells),
  dt(T/n_steps),
  blockSize(block_size),
  preFactorX(LIGHTSPEED*dt/(2*dx)),
  preFactorY(LIGHTSPEED*dt/(2*dy)),
  preFactorZ(LIGHTSPEED*dt/(2*dz)),
  E(new double[3*blockSize*blockSize*blockSize]),
  B(new double[3*blockSize*blockSize*blockSize]),
  tmp_field(new double[3*blockSize*blockSize*blockSize]),
  rhsx(new double[blockSize*blockSize*blockSize]),
  rhsy(new double[blockSize*blockSize*blockSize]),
  rhsz(new double[blockSize*blockSize*blockSize]),
  rhs_ptrs_x(new double*[blockSize*blockSize]),
  rhs_ptrs_y(new double*[blockSize*blockSize]),
  rhs_ptrs_z(new double*[blockSize*blockSize]),
  dumpDir(dump_dir)
{
  procsX = xLine.size();
  procsY = yLine.size();
  procsZ = zLine.size();

  VacuumMatrixInitializer mat_init_x = VacuumMatrixInitializer(dx, dt, blockSize, determineBoundary(xLine));
  VacuumMatrixInitializer mat_init_y = VacuumMatrixInitializer(dy, dt, blockSize, determineBoundary(yLine));
  VacuumMatrixInitializer mat_init_z = VacuumMatrixInitializer(dz, dt, blockSize, determineBoundary(zLine));
  VacuumCouplingInitializer coupling_init_x = VacuumCouplingInitializer(& mat_init_x, blockSize, xLine);
  VacuumCouplingInitializer coupling_init_y = VacuumCouplingInitializer(& mat_init_y, blockSize, yLine);
  VacuumCouplingInitializer coupling_init_z = VacuumCouplingInitializer(& mat_init_z, blockSize, zLine);

  std::vector<AbstractMatrixInitializer*> mat_inits_x(blockSize*blockSize, & mat_init_x);
  std::vector<AbstractMatrixInitializer*> mat_inits_y(blockSize*blockSize, & mat_init_y);
  std::vector<AbstractMatrixInitializer*> mat_inits_z(blockSize*blockSize, & mat_init_z);
  std::vector<AbstractCouplingInitializer*> coupling_inits_x(blockSize*blockSize, & coupling_init_x);
  std::vector<AbstractCouplingInitializer*> coupling_inits_y(blockSize*blockSize, & coupling_init_y);
  std::vector<AbstractCouplingInitializer*> coupling_inits_z(blockSize*blockSize, & coupling_init_z);

  guardB = allocateGuardStorage();
  guardE = allocateGuardStorage();

  init->setOffsets(xLine, yLine, zLine);
  initFields(init);

  xUpdateRHSs = init->initCollection(mat_inits_x, coupling_inits_x, blockSize, xLine);
  yUpdateRHSs = init->initCollection(mat_inits_y, coupling_inits_y, blockSize, yLine);
  zUpdateRHSs = init->initCollection(mat_inits_z, coupling_inits_z, blockSize, zLine);

  guardSendbuf = new double[3*blockSize*blockSize];
}
Пример #16
0
void initialize_new_objects(mpi::communicator& world,
			    parameter_t const& P,  directory_structure_t const& ds,
			    geometric_info_t const& gi, object_info_t& oi,
			    vector<std::vector<std::string> > const &seq, int tt,
			    vector<CImg<unsigned char> > const& images,
			    vector<matrix<float> > const& grd,
			    vector<matrix<float> >& detected_rects)
{
    int Ncam = seq.size();
    vector<object_trj_t> & trlet_list=oi.trlet_list;
    int nobj = trlet_list.size();
    int num_new_obj = detected_rects(0).size1();
    int T = seq[0].size();
    int np = oi.model.size();

    int num_scales = P.scales.size();

    //std::cout<<"detected_rects="<<detected_rects<<std::endl;

    for(int oo=0; oo<num_new_obj; ++oo)
    {
	int nn = oi.curr_num_obj + oo;

	trlet_list(nn).startt = tt;
	trlet_list(nn).endt = tt;
	trlet_list(nn).state = 1;
	trlet_list(nn).trj = vector<matrix<float> >(Ncam);
	for(int cam=0; cam<Ncam; ++cam)
	{
	    trlet_list(nn).trj(cam) = scalar_matrix<float>(T, 4, 0);
	}
	trlet_list(nn).trj_3d = scalar_matrix<float>(T, 2, 0);

	trlet_list(nn).hist_p = vector<matrix<float> >(Ncam);
	trlet_list(nn).hist_q = vector<matrix<float> >(Ncam);

	trlet_list(nn).fscores = vector<matrix<float> >(Ncam);
	trlet_list(nn).scores = scalar_matrix<float>(Ncam, T, 0);

	vector<candidate_array<Float> > cand_array(Ncam);
	for(int cam=0; cam<Ncam; ++cam)
	{

	    trlet_list(nn).fscores(cam) = scalar_matrix<float>(np*2, T, 0);

	    float w = detected_rects(cam)(oo, 2)-detected_rects(cam)(oo, 0);
	    float h = detected_rects(cam)(oo, 3)-detected_rects(cam)(oo, 1);
	    row(trlet_list(nn).trj(cam), tt) = row(detected_rects(cam), oo);

	    matrix<float> rects;
	    compute_part_rects(detected_rects(cam)(oo, 0), detected_rects(cam)(oo, 1),
			  w, h, oi.model, rects);

	    pmodel_t pmodel;

	    vector<float> br(row(detected_rects(cam), oo));
	    rects_to_pmodel_geom(br, gi.horiz_mean, pmodel);
	    oi.pmodel_list(cam, nn) = pmodel;

	    //collect_sift(grd(cam), );
	    matrix<float> hist_p, hist_q;
	    collect_hist(images(cam), rects, hist_p, hist_q);
	    trlet_list(nn).hist_p(cam) = hist_p;
	    trlet_list(nn).hist_q(cam) = hist_q;

	    matrix<Float> cand_rects;
	    vector<Float> cand_scale;
	    matrix<int> cand_ijs;

	    if(0==world.rank())
	    {

		std::vector<float> sxr, syr;
		for(float v=-P.xrange/2; v<=P.xrange/2; v+=P.xstep)
		{
		    sxr.push_back(v);
		}
		for(float v=-P.yrange/2; v<=P.yrange/2; v+=P.ystep)
		{
		    syr.push_back(v);
		}
		vector<float> xr(sxr.size()), yr(syr.size());
		std::copy(sxr.begin(), sxr.end(), xr.begin());
		std::copy(syr.begin(), syr.end(), yr.begin());

		float feetx = (trlet_list(nn).trj(cam)(tt, 0)
			       +trlet_list(nn).trj(cam)(tt, 2))/2;
		float feety = trlet_list(nn).trj(cam)(tt, 3);


		enumerate_rects_inpoly(images(cam), oi.pmodel_list(cam, nn),
				       feetx, feety,
				       xr, yr, P.scales, gi.horiz_mean, gi.horiz_sig,
				       gi.polys_im(tt, cam),
				       cand_rects, cand_scale,
				       cand_ijs, cand_array(cam));

	    }

	    mpi::broadcast(world, cand_rects, 0);

	    real_timer_t timer;
	    vector<Float> cand_hist_score(cand_rects.size1());
	    matrix<Float> hist_fscores;

	    range rrank(world.rank()*cand_rects.size1()/world.size(), 
			(world.rank()+1)*cand_rects.size1()/world.size());
	    matrix<Float> cand_rects_rank(project(cand_rects, rrank, range(0, 4)));
	    vector<Float> cand_hist_score_rank;
	    matrix<Float> hist_fscores_rank;
	    get_cand_hist_score(images(cam), oi.model, P.logp1, P.logp2,
				trlet_list(nn).hist_p(cam),
				trlet_list(nn).hist_q(cam),
				cand_rects_rank,
				cand_hist_score_rank, hist_fscores_rank);
	    if(world.rank()==0)
	    {
		std::vector<vector<Float> > v1;
		std::vector<matrix<Float> > v2;
		mpi::gather(world, cand_hist_score_rank, v1, 0);
		mpi::gather(world, hist_fscores_rank, v2, 0);
		hist_fscores = matrix<Float>(cand_rects.size1(),
					     hist_fscores_rank.size2());
		for(int r=0; r<world.size(); ++r)
		{
		    int start = r*cand_rects.size1()/world.size();
		    for(int vv=0; vv<v1[r].size(); ++vv)
		    {
			cand_hist_score(start+vv) = v1[r](vv);
		    }
		    for(int vv=0; vv<v2[r].size1(); ++vv)
		    {
			row(hist_fscores, start+vv) = row(v2[r], vv);
		    }
		}
	    }
	    else
	    {
		mpi::gather(world, cand_hist_score_rank, 0);
		mpi::gather(world, hist_fscores_rank, 0);
	    }

	    mpi::broadcast(world, cand_hist_score, 0);
	    mpi::broadcast(world, hist_fscores, 0);


	    vector<Float> cand_score=cand_hist_score;
	    if(0==world.rank())
	    std::cout<<"\t\t"<<cand_rects.size1()<<" rects, \tget_cand_hist_score time:"
		     <<timer.elapsed()/1000.0f<<"s."<<std::endl;

	    if(0==world.rank())
	    {
		int idx_max = std::max_element(cand_score.begin(), cand_score.end())
		    - cand_score.begin();

		column(trlet_list(nn).fscores(cam), tt) = row(hist_fscores, idx_max);
		trlet_list(nn).scores(cam, tt) = cand_score(idx_max);
		cand_array(cam).fill_score(cand_score, cand_ijs);
	    }
	    mpi::broadcast(world, cand_array(cam), 0);
	    mpi::broadcast(world, trlet_list(nn).scores(cam, tt), 0);
	    vector<Float> fscore_col;
	    if(0==world.rank())
	    {
		fscore_col = column(trlet_list(nn).fscores(cam), tt);
	    }
	    mpi::broadcast(world, fscore_col, 0);
	    if(0!=world.rank())
	    {
		column(trlet_list(nn).fscores(cam), tt) = fscore_col;
	    }


	}//end for cam

	int best_y, best_x, best_s;
	if(0==world.rank())
	{
	    ground_scoremap_t<Float> grd_scoremap;
	    combine_ground_score(tt, cand_array, grd_scoremap, gi);
	    grd_scoremap.peak(best_y, best_x, best_s);	
	}
	mpi::broadcast(world, best_y, 0);
	mpi::broadcast(world, best_x, 0);

	trlet_list(nn).trj_3d(tt, 0) = best_x;
	trlet_list(nn).trj_3d(tt, 1) = best_y;

	for(int cam=0; cam<Ncam; ++cam)
	{
	    vector<Float> trj_row(4);

	    if(0==world.rank())
	    {
		vector<double> bx(1), by(1), ix, iy;
		bx <<= best_x; by <<= best_y;
		apply_homography(gi.grd2img(tt, cam), bx, by, ix, iy);
		float hpre = oi.pmodel_list(cam, nn).hpre;
		float cur_fy = iy(0);
		float cur_fx = ix(0);
		float cur_hy = gi.horiz_mean+hpre*(cur_fy-gi.horiz_mean);
		float ds = P.scales(best_s)*(cur_fy-cur_hy)/oi.pmodel_list(cam, nn).bh;
		float ww = ds*oi.pmodel_list(cam, nn).bw;
		float hh = cur_fy - cur_hy;

		trj_row <<= (cur_fx-ww/2), cur_hy, (cur_fx+ww/2), cur_fy;
	    }
	    mpi::broadcast(world, trj_row, 0);
	    row(trlet_list(nn).trj(cam), tt) = trj_row;


	}//endfor cam

    }//endfor oo

    oi.curr_num_obj += num_new_obj;
}
Пример #17
0
sim_results simrun_master( const sim_parameters& par,const mpi::communicator& mpicomm)
{
    // ----- PREPARE SIMULATION -----
    // assume something went wrong until we are sure it didn't
    sim_results res;
    res.success = false;
    // ----- RUN SIMULATION -----
    Replica* rep = new Replica(par);

    if ( rep->prepare( par.init ) == false ) {
        delete rep;
        return res;
    }

    unsigned int finished_workers = 0;
    unsigned int scheduled_bins = 0;
    unsigned int completed_bins = 0;
    unsigned int enqueued_bins  = par.bins;

    // define procedure to query the slaves for new work requests
    function<void()> mpiquery_work_requests( [&]() {
        while ( boost::optional<mpi::status> status
                = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_REQUEST_BINS ) ) {
            // receive the request and hand out new bins to the source
            mpicomm.recv( status->source(), MSGTAG_S_M_REQUEST_BINS );
            if ( enqueued_bins > 0 ) {
                mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 1 );
                scheduled_bins += 1;
                enqueued_bins  -= 1;
            } else {
                mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 0 );
                ++finished_workers;
            }
        }
    } );

    // define procedure to query the slaves for finished work
    function<void()> mpiquery_finished_work( [&]() {
        while ( boost::optional<mpi::status> status
                = mpicomm.iprobe( mpi::any_source, 2 ) ) {
            mpicomm.recv( status->source(), 2 );
            --scheduled_bins;
            ++completed_bins;
        }
    } );

    cout << ":: Performing Monte Carlo cycle" << endl;
    cout << endl;
    cout << "   Progress:" << endl;

    // perform dry runs to reach thermal equilibrium
    for(unsigned int mcs = 0; mcs < par.drysweeps; mcs++) {
        // take care of the slaves
        mpiquery_finished_work();
        mpiquery_work_requests();
        rep->mcs();
    }

    unsigned int completed_bins_master = 0;

    std::vector<double> q2_binmeans;
    std::vector<double> q4_binmeans;

    while ( enqueued_bins > 0 ) {
        cout << '\r' << "     Bin "
             << completed_bins << "/" << par.bins;

        cout.flush();

        --enqueued_bins;
        ++scheduled_bins;
        // initialize binning array
        vector<double> q2_currentbin;
        vector<double> q4_currentbin;
        try {
            // try to allocate enough memory ...
            q2_currentbin.reserve( par.binwidth );
            q4_currentbin.reserve( par.binwidth );
        } catch ( bad_alloc ) {
            delete rep;
            return res;
        }
        for (unsigned int mcs = 0;mcs < par.binwidth;++mcs ) {
            // take care of the slaves
            mpiquery_finished_work();
            mpiquery_work_requests();

            // perform a Monte Carlo step
            rep->mcs();

            // measure observables
            double q2 = 0, q4 = 0;
            double thissample_q = rep->Q();
            // remember the sample's properties to calculate their mean value
            q2 	= thissample_q * thissample_q;
            q4 	= thissample_q * thissample_q * thissample_q * thissample_q;
            q2_currentbin.push_back(q2);
            q4_currentbin.push_back(q4);
        }


        q2_binmeans.push_back(
                    accumulate( q2_currentbin.begin(), q2_currentbin.end(), 0.0 ) /
                    static_cast<double>( q2_currentbin.size() )
                    );
        q2_currentbin.clear();

        --scheduled_bins;
        ++completed_bins_master;
        ++completed_bins;
    }
    ++finished_workers;

    while ( completed_bins != par.bins ||
            static_cast<int>( finished_workers ) < mpicomm.size() ) {
        if ( boost::optional<mpi::status> status
             = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_FINISHED_BINS ) ) {
            mpicomm.recv( status->source(), MSGTAG_S_M_FINISHED_BINS );
            --scheduled_bins;
            ++completed_bins;

            cout << "\n";
            cout << '\r' << "     Bin " << completed_bins << "/" << par.bins;
            cout.flush();
        }

        if ( boost::optional<mpi::status> status
             = mpicomm.iprobe( mpi::any_source, MSGTAG_S_M_REQUEST_BINS ) ) {
            // receive the request for more work
            mpicomm.recv( status->source(), MSGTAG_S_M_REQUEST_BINS );
            // tell him there is no more work
            mpicomm.send( status->source(), MSGTAG_M_S_DISPATCHED_BINS, 0 );
            ++finished_workers;
        }
    }

    assert( enqueued_bins == 0 );
    assert( scheduled_bins == 0 );

    cout << '\r' << "     Bin " << completed_bins << "/" << par.bins << endl;
    cout.flush();

    // all measurements done ... let's tidy things up
    delete rep;

    assert( mpicomm.rank() == 0 );
    vector< vector<double> > q2_binmeans_collector;
    mpi::gather( mpicomm, q2_binmeans, q2_binmeans_collector, 0 );

    vector<double> q2_binmeans_all;
    for ( auto it = q2_binmeans_collector.begin();
          it != q2_binmeans_collector.end();
          ++it ) {
        q2_binmeans_all.insert( q2_binmeans_all.end(), it->begin(), it->end() );
    }

    double q2 = 0, q4 = 0;
    q2 = static_cast<double>(
                accumulate( q2_binmeans_all.begin(), q2_binmeans_all.end(), 0.0 )
                ) / static_cast<double>( q2_binmeans_all.size() );

    double B = 0;
    B = (3 - q4 / (q2 * q2)) / 2;
    res.B = B;
    res.success = true;
    return res;
}
Пример #18
0
void mandel_mpi (mpi::communicator world,
                 int2D*		matrix,			/* to fill */
                 int		nr,			/* row size */
                 int		nc,			/* column size */
                 real		base_x,			/* lower left corner */
                 real		base_y,			/* lower left corner */
                 real		ext_x,			/* extent */
                 real		ext_y)			/* extent */
{
  int		r, c;			/* row and column indices */
  real		dx, dy;			/* per-step deltas */
#if GRAPHICS
  int		gfxCount = 0;		/* number of times graphics called */
#endif

  int row_count = 0;
  int i;
  mpi::status status;
  int source;
  const int WORK_REQUEST_TAG = 0;
  const int WORK_RESPONSE_TAG = 1;
  const int NO_MORE_WORK = -1;
  int processed_rows = 0;

  dx = ext_x / (nr - 1);
  dy = ext_y / (nc - 1);

  if (world.size () > 1) {
    if (world.rank () == 0) {
      // control process

      // send out work
      while (row_count < nr) {
        status = world.recv (mpi::any_source, WORK_REQUEST_TAG);
        source = status.source ();
        // send next row
        world.isend (source, WORK_RESPONSE_TAG, row_count);
        row_count++;
      }
      // send out no more work
      for (i = 1; i < world.size (); i++) {
        status = world.recv (mpi::any_source, WORK_REQUEST_TAG);
        source = status.source ();
        world.isend (source, WORK_RESPONSE_TAG, NO_MORE_WORK);
      }
      // receive results
      for (r = 0; r < nr; r++) {
        world.recv (mpi::any_source, r + 1, matrix[r], nc);
      }
    }
    else {
      // work process
      while (true) {
        // request next row
        world.send (0, WORK_REQUEST_TAG);
        world.recv (0, WORK_RESPONSE_TAG, r);
        if (r != NO_MORE_WORK) {
          for (c = 0; c < nc; c++) {
            matrix[r][c] = mandel_calc_mpi (base_x + (r * dx), base_y + (c * dy));
          }
          processed_rows++;
          // send results
          world.isend (0, r + 1, matrix[r], nc);
        }
        else {
          break;
        }
      }
#if defined(TEST_OUTPUT) || defined(TEST_TIME)
      printf ("processed rows: %d\n", processed_rows);
#endif
    }
    // broadcast matrix
    for (r = 0; r < nr; r++) {
      broadcast (world, matrix[r], nc, 0);
    }
  }
  else {
    for (r = 0; r < nr; r++) {
      for (c = 0; c < nc; c++) {
        matrix[r][c] = mandel_calc_mpi (base_x + (r * dx), base_y + (c * dy));
      }
    }
  }

#if GRAPHICS
  gfx_mandel(gfxCount++, matrix, nr, nc);
#endif

  /* return */
}