Ejemplo n.º 1
0
/** Each rank only sets up file that it will process. */
int TrajIOarray::SetupIOarray(ArgList& argIn, TrajFrameCounter& counter,
                              CoordinateInfo& cInfo, Topology* trajParm,
                              Parallel::Comm const& ensComm, Parallel::Comm const& trajComm)
{
  // Sanity check
  if (!IOarray_.empty()) {
    mprinterr("Internal Error: SetupIOarray() has been called twice.\n");
    return 1;
  }
  // Detect format
  FileName const& repFname = replica_filenames_[ensComm.Rank()];
  TrajectoryFile::TrajFormatType repformat = TrajectoryFile::UNKNOWN_TRAJ;
  TrajectoryIO* replica0 = TrajectoryFile::DetectFormat( repFname, repformat );
  if ( replica0 == 0 ) {
    mprinterr("Error: Could not set up replica file %s\n", repFname.full());
    return 1;
  }
  mprintf("\tReading '%s' as %s\n", repFname.full(), TrajectoryFile::FormatString(repformat));
  replica0->SetDebug( debug_ );
  // Construct the IOarray_ with blanks for all except this rank.
  for (int member = 0; member != ensComm.Size(); member++)
    if (member == ensComm.Rank())
      IOarray_.push_back( replica0 );
    else
      IOarray_.push_back( 0 );
  // Process format-specific read args.
  replica0->processReadArgs( argIn );
  // Set up replica for reading and get # frames
  int nframes = replica0->setupTrajin( repFname, trajParm );
  if (nframes == TrajectoryIO::TRAJIN_ERR) {
    mprinterr("Error: Could not set up %s for reading.\n", repFname.full());
    return 1;
  }
  // Set coordinate info
  cInfo = replica0->CoordInfo();
  int totalFrames = nframes;
  if (cInfo.ReplicaDimensions().Ndims() > 0) { // TODO put in common routine
    mprintf("\tReplica dimensions:\n");
    for (int rd = 0; rd < cInfo.ReplicaDimensions().Ndims(); rd++)
      mprintf("\t\t%i: %s\n", rd+1, cInfo.ReplicaDimensions().Description(rd));
  }
  // Check # frames in all files, use lowest.
  Parallel::World().AllReduce( &totalFrames, &nframes, 1, MPI_INT, MPI_MIN );
  if (totalFrames != nframes) {
    rprintf("Warning: Replica '%s' frames (%i) is > # frames in shortest replica.\n",
            repFname.full(), nframes);
    mprintf("Warning: Setting total # of frames to read from replica ensemble to %i\n",
            totalFrames);
  }
  if (trajComm.Master()) {
    static const int iSize = 6;
    static const char* iTitle[iSize] = {"box", "velocity", "temperature", "time", "force",
                                        "replica dimensions"};
    // Check coordinate info of all files               0    1    2     3     4      5
    std::vector<int> Info( iSize * ensComm.Size() ); // box, vel, temp, time, force, nRepDims
    int rank_info[iSize];
    rank_info[0] = (int)cInfo.TrajBox().Type();
    rank_info[1] = (int)cInfo.HasVel();
    rank_info[2] = (int)cInfo.HasTemp();
    rank_info[3] = (int)cInfo.HasTime();
    rank_info[4] = (int)cInfo.HasForce();
    rank_info[5] = cInfo.ReplicaDimensions().Ndims();
    ensComm.AllGather( rank_info, iSize, MPI_INT, &Info[0] );
    // TODO Should mismatches be errors instead?
    for (int midx = 0; midx != iSize; midx++) {
      for (int ridx = midx + iSize; ridx < (int)Info.size(); ridx += iSize) {
        if (Info[midx] != Info[ridx]) {
          rprintf("Warning: Replica %i %s info does not match first replica.\n",
                  ridx/iSize, iTitle[midx]);
        }
      }
    }
  }
  // TODO: Put code below into a common routine with serial version
  // Check how many frames will actually be read
  if (counter.CheckFrameArgs( totalFrames, argIn )) return 1;
  // SANITY CHECK
  if (IOarray_.size() != replica_filenames_.size()) {
    mprinterr("Error: Not all replica files were set up.\n");
    return 1;
  }
  // Update ensemble size
  cInfo.SetEnsembleSize( (int)IOarray_.size() );
  if (debug_ > 0)
    cInfo.PrintCoordInfo( repFname.full(), trajParm->c_str() );

  return 0;
}
Ejemplo n.º 2
0
/** Synchronize timeseries data from child ranks to master. */
int DataSetList::SynchronizeData(Parallel::Comm const& commIn) {
  if (commIn.Size() < 2) return 0;
  // Ensure that the number of sets that require sync is same on each rank.
  // FIXME: Make sure this allgather does not end up taking too much time.
  //        Should it be debug only?
  std::vector<int> size_on_rank;
  size_on_rank.reserve( DataList_.size() );
  DataListType SetsToSync;
  for (DataListType::iterator ds = DataList_.begin(); ds != DataList_.end(); ++ds)
    if ( (*ds)->NeedsSync() ) {
      SetsToSync.push_back( *ds );
      size_on_rank.push_back( (*ds)->Size() );
    }
// DEBUG
  //for (int rank = 0; rank != commIn.Size(); rank++) {
  //  if (rank == commIn.Rank())
  //    for (DataListType::const_iterator ds = SetsToSync.begin(); ds != SetsToSync.end(); ++ds)
  //      rprintf("SET '%s'\n", (*ds)->legend());
  //  commIn.Barrier();
  //}
// DEBUG END
  std::vector<int> n_on_rank( commIn.Size(), 0 );
  int nSets = (int)SetsToSync.size();
  commIn.AllGather( &nSets, 1, MPI_INT, &n_on_rank[0] );
  for (int rank = 1; rank < commIn.Size(); rank++)
    if (n_on_rank[rank] != n_on_rank[0]) {
      mprinterr("Internal Error: Number of sets to sync on rank %i (%i) != number on master %i\n",
                rank, n_on_rank[rank], n_on_rank[0]);
      return 1;
    }
  // Send all data set sizes to master.
  std::vector<int> all_rank_sizes;
  if (commIn.Master()) {
    all_rank_sizes.resize( nSets * commIn.Size() );
    commIn.GatherMaster( &size_on_rank[0], nSets, MPI_INT, &all_rank_sizes[0] );
  } else {
    commIn.GatherMaster( &size_on_rank[0], nSets, MPI_INT, 0 );
  }
  size_on_rank.clear();
  // Call Sync only for sets that need it.
  std::vector<int> rank_frames( commIn.Size() );
  int total = 0; //TODO size_t?
  int idx0 = 0;
  for (DataListType::iterator ds = SetsToSync.begin(); ds != SetsToSync.end(); ++ds, ++idx0) {
    if (commIn.Master()) {
      total = all_rank_sizes[idx0];
      rank_frames[0] = all_rank_sizes[idx0];
      int idx1 = idx0 + nSets;
      for (int rank = 1; rank < commIn.Size(); rank++, idx1 += nSets) {
        total += all_rank_sizes[idx1];
        rank_frames[rank] = all_rank_sizes[idx1];
      }
      //mprintf("DEBUG: Syncing '%s' (size=%zu, total=%i)\n", (*ds)->Meta().PrintName().c_str(),
      //        (*ds)->Size(), total);
    }
    if ( (*ds)->Sync(total, rank_frames, commIn) ) {
      rprintf( "Warning: Could not sync dataset '%s'\n",(*ds)->legend());
      //return;
    }
    (*ds)->SetNeedsSync( false );
  }
  return 0;
}