int DataSet_GridDbl::Sync(size_t total, std::vector<int> const& rank_frames, Parallel::Comm const& commIn) { if (commIn.Master()) { std::vector<double> buf( grid_.size() ); commIn.Reduce( &(buf[0]), &(grid_[0]), grid_.size(), MPI_FLOAT, MPI_SUM ); std::copy( buf.begin(), buf.end(), grid_.begin() ); } else commIn.Reduce( 0, &(grid_[0]), grid_.size(), MPI_FLOAT, MPI_SUM ); return 0; }
void NetcdfFile::Sync(Parallel::Comm const& commIn) { int nc_vars[23]; if (commIn.Master()) { nc_vars[0] = ncframe_; nc_vars[1] = TempVID_; nc_vars[2] = coordVID_; nc_vars[3] = velocityVID_; nc_vars[4] = frcVID_; nc_vars[5] = cellAngleVID_; nc_vars[6] = cellLengthVID_; nc_vars[7] = timeVID_; nc_vars[8] = remd_dimension_; nc_vars[9] = indicesVID_; nc_vars[10] = ncdebug_; nc_vars[11] = ensembleDID_; nc_vars[12] = frameDID_; nc_vars[13] = atomDID_; nc_vars[14] = ncatom_; nc_vars[15] = ncatom3_; nc_vars[16] = spatialDID_; nc_vars[17] = labelDID_; nc_vars[18] = cell_spatialDID_; nc_vars[19] = cell_angularDID_; nc_vars[20] = spatialVID_; nc_vars[21] = cell_spatialVID_; nc_vars[22] = cell_angularVID_; } commIn.MasterBcast( nc_vars, 23, MPI_INT ); if (!commIn.Master()) { ncframe_ = nc_vars[0]; TempVID_ = nc_vars[1]; coordVID_ = nc_vars[2]; velocityVID_ = nc_vars[3]; frcVID_ = nc_vars[4]; cellAngleVID_ = nc_vars[5]; cellLengthVID_ = nc_vars[6]; timeVID_ = nc_vars[7]; remd_dimension_ = nc_vars[8]; indicesVID_ = nc_vars[9]; ncdebug_ = nc_vars[10]; ensembleDID_ = nc_vars[11]; frameDID_ = nc_vars[12]; atomDID_ = nc_vars[13]; ncatom_ = nc_vars[14]; ncatom3_ = nc_vars[15]; spatialDID_ = nc_vars[16]; labelDID_ = nc_vars[17]; cell_spatialDID_ = nc_vars[18]; cell_angularDID_ = nc_vars[19]; spatialVID_ = nc_vars[20]; cell_spatialVID_ = nc_vars[21]; cell_angularVID_ = nc_vars[22]; } }
int Traj_AmberNetcdf::parallelOpenTrajout(Parallel::Comm const& commIn) { if (Ncid() != -1) return 0; int err = ncmpi_open(commIn.MPIcomm(), filename_.full(), NC_WRITE, MPI_INFO_NULL, &ncid_); if (checkPNCerr(err)) { mprinterr("Error: Opening NetCDF file '%s' for writing in parallel.\n", filename_.full()); return 1; } err = ncmpi_begin_indep_data( ncid_ ); // Independent data mode return 0; }
/** Each rank searches for replica based on lowest replica number. */ int TrajIOarray::SearchForReplicas(FileName const& fname, Parallel::Comm const& ensComm, Parallel::Comm const& trajComm) { RepName repName(fname, debug_); if (repName.Error()) return 1; // TODO check for lower replica number? FileName replicaFilename = repName.RepFilename( ensComm.Rank() ); // Only traj comm masters actually check for files. if (trajComm.Master()) { if (!File::Exists( replicaFilename )) { File::ErrorMsg( replicaFilename.full() ); rprinterr("Error: File '%s' not accessible.\n", replicaFilename.full()); return 1; } } // At this point each rank has found its replica. Populate filename array. for (int offset = 0; offset < ensComm.Size(); ++offset) replica_filenames_.push_back( repName.RepFilename( offset ) ); return 0; }
/** Each rank checks that specified file is present. */ int TrajIOarray::AddReplicasFromArgs(FileName const& name0, std::string const& commaNames, Parallel::Comm const& ensComm, Parallel::Comm const& trajComm) { // First set up filename array on all ranks. if (name0.empty()) return 1; replica_filenames_.push_back( name0 ); ArgList remdtraj_list( commaNames, "," ); for (ArgList::const_iterator fname = remdtraj_list.begin(); fname != remdtraj_list.end(); ++fname) replica_filenames_.push_back( FileName( *fname ) ); if (ensComm.Size() != (int)replica_filenames_.size()) return 1; else if (trajComm.Master()) { // Only traj comm master checks file if (!File::Exists( replica_filenames_[ ensComm.Rank() ])) { File::ErrorMsg( replica_filenames_[ensComm.Rank()].full() ); rprinterr("Error: File '%s' not accessible.\n", replica_filenames_[ensComm.Rank()].full()); return 1; } } return 0; }
/** First master performs all necessary setup, then sends info to all children. */ int Traj_AmberNetcdf::parallelSetupTrajout(FileName const& fname, Topology* trajParm, CoordinateInfo const& cInfoIn, int NframesToWrite, bool append, Parallel::Comm const& commIn) { int err = 0; if (commIn.Master()) { err = setupTrajout(fname, trajParm, cInfoIn, NframesToWrite, append); // NOTE: setupTrajout leaves file open. Should this change? NC_close(); } commIn.MasterBcast(&err, 1, MPI_INT); if (err != 0) return 1; // Synchronize netcdf info on non-master threads. Sync(commIn); if (!commIn.Master()) { // Non masters need filename and allocate Coord filename_ = fname; if (Coord_ != 0) delete[] Coord_; Coord_ = new float[ Ncatom3() ]; } return 0; }
// DataSet_double::Sync() int DataSet_double::Sync(size_t total, std::vector<int> const& rank_frames, Parallel::Comm const& commIn) { if (commIn.Size()==1) return 0; if (commIn.Master()) { // Resize for total number of frames. Data_.resize( total ); double* endptr = &(Data_[0]) + rank_frames[0]; // Receive data from each rank for (int rank = 1; rank < commIn.Size(); rank++) { commIn.SendMaster( endptr, rank_frames[rank], rank, MPI_DOUBLE ); endptr += rank_frames[rank]; } } else // Send data to master //TODO adjust for repeated additions? commIn.SendMaster( &(Data_[0]), Data_.size(), commIn.Rank(), MPI_DOUBLE ); return 0; }
/** Each rank only sets up file that it will process. */ int TrajIOarray::SetupIOarray(ArgList& argIn, TrajFrameCounter& counter, CoordinateInfo& cInfo, Topology* trajParm, Parallel::Comm const& ensComm, Parallel::Comm const& trajComm) { // Sanity check if (!IOarray_.empty()) { mprinterr("Internal Error: SetupIOarray() has been called twice.\n"); return 1; } // Detect format FileName const& repFname = replica_filenames_[ensComm.Rank()]; TrajectoryFile::TrajFormatType repformat = TrajectoryFile::UNKNOWN_TRAJ; TrajectoryIO* replica0 = TrajectoryFile::DetectFormat( repFname, repformat ); if ( replica0 == 0 ) { mprinterr("Error: Could not set up replica file %s\n", repFname.full()); return 1; } mprintf("\tReading '%s' as %s\n", repFname.full(), TrajectoryFile::FormatString(repformat)); replica0->SetDebug( debug_ ); // Construct the IOarray_ with blanks for all except this rank. for (int member = 0; member != ensComm.Size(); member++) if (member == ensComm.Rank()) IOarray_.push_back( replica0 ); else IOarray_.push_back( 0 ); // Process format-specific read args. replica0->processReadArgs( argIn ); // Set up replica for reading and get # frames int nframes = replica0->setupTrajin( repFname, trajParm ); if (nframes == TrajectoryIO::TRAJIN_ERR) { mprinterr("Error: Could not set up %s for reading.\n", repFname.full()); return 1; } // Set coordinate info cInfo = replica0->CoordInfo(); int totalFrames = nframes; if (cInfo.ReplicaDimensions().Ndims() > 0) { // TODO put in common routine mprintf("\tReplica dimensions:\n"); for (int rd = 0; rd < cInfo.ReplicaDimensions().Ndims(); rd++) mprintf("\t\t%i: %s\n", rd+1, cInfo.ReplicaDimensions().Description(rd)); } // Check # frames in all files, use lowest. Parallel::World().AllReduce( &totalFrames, &nframes, 1, MPI_INT, MPI_MIN ); if (totalFrames != nframes) { rprintf("Warning: Replica '%s' frames (%i) is > # frames in shortest replica.\n", repFname.full(), nframes); mprintf("Warning: Setting total # of frames to read from replica ensemble to %i\n", totalFrames); } if (trajComm.Master()) { static const int iSize = 6; static const char* iTitle[iSize] = {"box", "velocity", "temperature", "time", "force", "replica dimensions"}; // Check coordinate info of all files 0 1 2 3 4 5 std::vector<int> Info( iSize * ensComm.Size() ); // box, vel, temp, time, force, nRepDims int rank_info[iSize]; rank_info[0] = (int)cInfo.TrajBox().Type(); rank_info[1] = (int)cInfo.HasVel(); rank_info[2] = (int)cInfo.HasTemp(); rank_info[3] = (int)cInfo.HasTime(); rank_info[4] = (int)cInfo.HasForce(); rank_info[5] = cInfo.ReplicaDimensions().Ndims(); ensComm.AllGather( rank_info, iSize, MPI_INT, &Info[0] ); // TODO Should mismatches be errors instead? for (int midx = 0; midx != iSize; midx++) { for (int ridx = midx + iSize; ridx < (int)Info.size(); ridx += iSize) { if (Info[midx] != Info[ridx]) { rprintf("Warning: Replica %i %s info does not match first replica.\n", ridx/iSize, iTitle[midx]); } } } } // TODO: Put code below into a common routine with serial version // Check how many frames will actually be read if (counter.CheckFrameArgs( totalFrames, argIn )) return 1; // SANITY CHECK if (IOarray_.size() != replica_filenames_.size()) { mprinterr("Error: Not all replica files were set up.\n"); return 1; } // Update ensemble size cInfo.SetEnsembleSize( (int)IOarray_.size() ); if (debug_ > 0) cInfo.PrintCoordInfo( repFname.full(), trajParm->c_str() ); return 0; }
/** First master performs all necessary setup, then sends info to all children. */ int Traj_GmxTrX::parallelSetupTrajout(FileName const& fname, Topology* trajParm, CoordinateInfo const& cInfoIn, int NframesToWrite, bool append, Parallel::Comm const& commIn) { int err = 0; // In parallel MUST know # of frames to write in order to correctly set size if (NframesToWrite < 1) { mprinterr("Error: # frames to write must be known for TRR output in parallel.\n"); err = 1; } else if (commIn.Master()) { err = setupTrajout(fname, trajParm, cInfoIn, NframesToWrite, append); // Determine header size, (18 * 4) + titleSize TODO put in setupTrajout? headerBytes_ = (18 * 4) + Title().size(); // Determine frame size frameSize_ = headerBytes_ + box_size_ + x_size_ + v_size_; // NOTE: setupTrajout leaves file open. Should this change? file_.CloseFile(); } commIn.MasterBcast(&err, 1, MPI_INT); if (err != 0) return 1; // Synchronize info on non-master threads. SyncTrajIO( commIn ); commIn.MasterBcast( &ir_size_, 1, MPI_INT ); commIn.MasterBcast( &e_size_, 1, MPI_INT ); commIn.MasterBcast( &box_size_, 1, MPI_INT ); commIn.MasterBcast( &vir_size_, 1, MPI_INT ); commIn.MasterBcast( &pres_size_, 1, MPI_INT ); commIn.MasterBcast( &top_size_, 1, MPI_INT ); commIn.MasterBcast( &sym_size_, 1, MPI_INT ); commIn.MasterBcast( &x_size_, 1, MPI_INT ); commIn.MasterBcast( &v_size_, 1, MPI_INT ); commIn.MasterBcast( &f_size_, 1, MPI_INT ); commIn.MasterBcast( &natoms_, 1, MPI_INT ); commIn.MasterBcast( &natom3_, 1, MPI_INT ); commIn.MasterBcast( &step_, 1, MPI_INT ); commIn.MasterBcast( &nre_, 1, MPI_INT ); commIn.MasterBcast( &precision_, 1, MPI_INT ); commIn.MasterBcast( &dt_, 1, MPI_FLOAT ); commIn.MasterBcast( &lambda_, 1, MPI_FLOAT ); // NOTE: cast these to unsigned long long to avoid ambiguity since MPI doesnt have size_t unsigned long long buf[2]; if (commIn.Master()) { buf[0] = (unsigned long long)frameSize_; buf[1] = (unsigned long long)headerBytes_; commIn.MasterBcast( buf, 2, MPI_UNSIGNED_LONG_LONG ); } else { commIn.MasterBcast( buf, 2, MPI_UNSIGNED_LONG_LONG ); frameSize_ = (size_t)buf[0]; headerBytes_ = (size_t)buf[1]; AllocateCoords(); // Should already be done on master } if (append) file_.SetupWrite( fname, debug_ ); else file_.SetupAppend( fname, debug_ ); if (debug_ > 0) rprintf("Gromacs TRR: parallel headerSize= %zu frameSize= %zu\n", headerBytes_, frameSize_); return 0; }
/** Synchronize timeseries data from child ranks to master. */ int DataSetList::SynchronizeData(Parallel::Comm const& commIn) { if (commIn.Size() < 2) return 0; // Ensure that the number of sets that require sync is same on each rank. // FIXME: Make sure this allgather does not end up taking too much time. // Should it be debug only? std::vector<int> size_on_rank; size_on_rank.reserve( DataList_.size() ); DataListType SetsToSync; for (DataListType::iterator ds = DataList_.begin(); ds != DataList_.end(); ++ds) if ( (*ds)->NeedsSync() ) { SetsToSync.push_back( *ds ); size_on_rank.push_back( (*ds)->Size() ); } // DEBUG //for (int rank = 0; rank != commIn.Size(); rank++) { // if (rank == commIn.Rank()) // for (DataListType::const_iterator ds = SetsToSync.begin(); ds != SetsToSync.end(); ++ds) // rprintf("SET '%s'\n", (*ds)->legend()); // commIn.Barrier(); //} // DEBUG END std::vector<int> n_on_rank( commIn.Size(), 0 ); int nSets = (int)SetsToSync.size(); commIn.AllGather( &nSets, 1, MPI_INT, &n_on_rank[0] ); for (int rank = 1; rank < commIn.Size(); rank++) if (n_on_rank[rank] != n_on_rank[0]) { mprinterr("Internal Error: Number of sets to sync on rank %i (%i) != number on master %i\n", rank, n_on_rank[rank], n_on_rank[0]); return 1; } // Send all data set sizes to master. std::vector<int> all_rank_sizes; if (commIn.Master()) { all_rank_sizes.resize( nSets * commIn.Size() ); commIn.GatherMaster( &size_on_rank[0], nSets, MPI_INT, &all_rank_sizes[0] ); } else { commIn.GatherMaster( &size_on_rank[0], nSets, MPI_INT, 0 ); } size_on_rank.clear(); // Call Sync only for sets that need it. std::vector<int> rank_frames( commIn.Size() ); int total = 0; //TODO size_t? int idx0 = 0; for (DataListType::iterator ds = SetsToSync.begin(); ds != SetsToSync.end(); ++ds, ++idx0) { if (commIn.Master()) { total = all_rank_sizes[idx0]; rank_frames[0] = all_rank_sizes[idx0]; int idx1 = idx0 + nSets; for (int rank = 1; rank < commIn.Size(); rank++, idx1 += nSets) { total += all_rank_sizes[idx1]; rank_frames[rank] = all_rank_sizes[idx1]; } //mprintf("DEBUG: Syncing '%s' (size=%zu, total=%i)\n", (*ds)->Meta().PrintName().c_str(), // (*ds)->Size(), total); } if ( (*ds)->Sync(total, rank_frames, commIn) ) { rprintf( "Warning: Could not sync dataset '%s'\n",(*ds)->legend()); //return; } (*ds)->SetNeedsSync( false ); } return 0; }
/** First master performs all necessary setup, then sends info to all children. */ int Traj_AmberCoord::parallelSetupTrajout(FileName const& fname, Topology* trajParm, CoordinateInfo const& cInfoIn, int NframesToWrite, bool append, Parallel::Comm const& commIn) { int err = 0; // In parallel MUST know # of frames to write in order to correctly set size if (NframesToWrite < 1) { mprinterr("Error: # frames to write must be known for Amber Coords output in parallel.\n"); err = 1; } else if (commIn.Master()) { // NOTE: This writes the title. err = setupTrajout(fname, trajParm, cInfoIn, NframesToWrite, append); // NOTE: setupTrajout leaves file open. Should this change? file_.CloseFile(); } commIn.MasterBcast(&err, 1, MPI_INT); if (err != 0) return 1; // Synchronize info on non-master threads. SyncTrajIO( commIn ); // TODO For simplicity convert everything to double. Is this just lazy? double tmpArray[10]; if (commIn.Master()) { tmpArray[0] = (double)natom3_; tmpArray[1] = (double)headerSize_; tmpArray[2] = (double)tStart_; tmpArray[3] = (double)tEnd_; tmpArray[4] = (double)numBoxCoords_; tmpArray[5] = boxAngle_[0]; tmpArray[6] = boxAngle_[1]; tmpArray[7] = boxAngle_[2]; tmpArray[8] = (double)writeType_; tmpArray[9] = (double)highPrecision_; commIn.MasterBcast(tmpArray, 10, MPI_DOUBLE); } else { commIn.MasterBcast(tmpArray, 10, MPI_DOUBLE); natom3_ = (int)tmpArray[0]; headerSize_ = (size_t)tmpArray[1]; tStart_ = (size_t)tmpArray[2]; tEnd_ = (size_t)tmpArray[3]; numBoxCoords_ = (int)tmpArray[4]; boxAngle_[0] = tmpArray[5]; boxAngle_[1] = tmpArray[6]; boxAngle_[2] = tmpArray[7]; writeType_ = (WriteType)tmpArray[8]; highPrecision_ = (bool)tmpArray[9]; if (append) file_.SetupAppend( fname, debug_ ); else file_.SetupWrite( fname, debug_ ); if (highPrecision_) outfmt_ = "%8.6f"; } // For parallel output we will need to seek. Set up the buffer again with correct offsets. // Figure out the size of the written title. unsigned int titleSize = (unsigned int)Title().size() + 1; // +1 for newline titleSize = std::min(81U, titleSize); file_.SetupFrameBuffer( natom3_, 8, 10, headerSize_, titleSize ); file_.ResizeBuffer( numBoxCoords_ ); if (debug_>0) rprintf("'%s'(Parallel): Each frame has %lu bytes.\n", file_.Filename().base(), file_.FrameSize()); // TODO set file size return 0; }