/// Broadcasts an InterpolationObject from rank 0 to all other ranks. /// /// It is commonly the case that the data needed to create the /// interpolation table is available on only one task (for example, only /// one task has read the data from a file). Broadcasting the table /// eliminates the need to put broadcast code in multiple table readers. /// /// \see eamBcastPotential void bcastInterpolationObject(InterpolationObject** table) { struct { int n; real_t x0, invDx; } buf; if (getMyRank() == 0) { buf.n = (*table)->n; buf.x0 = (*table)->x0; buf.invDx = (*table)->invDx; } bcastParallel(&buf, sizeof(buf), 0); if (getMyRank() != 0) { assert(*table == NULL); *table = comdMalloc(sizeof(InterpolationObject)); (*table)->n = buf.n; (*table)->x0 = buf.x0; (*table)->invDx = buf.invDx; (*table)->values = comdMalloc(sizeof(real_t) * (buf.n+3) ); (*table)->values++; } int valuesSize = sizeof(real_t) * ((*table)->n+3); bcastParallel((*table)->values-1, valuesSize, 0); }
/// Allocate and initialize the EAM potential data structure. /// /// \param [in] dir The directory in which potential table files are found. /// \param [in] file The name of the potential table file. /// \param [in] type The file format of the potential file (setfl or funcfl). BasePotential* initEamPot(const char* dir, const char* file, const char* type) { EamPotential* pot = comdMalloc(sizeof(EamPotential)); assert(pot); pot->force = eamForce; pot->print = eamPrint; pot->destroy = eamDestroy; pot->phi = NULL; pot->rho = NULL; pot->f = NULL; // Initialization of the next three items requires information about // the parallel decomposition and link cells that isn't available // with the potential is initialized. Hence, we defer their // initialization until the first time we call the force routine. pot->dfEmbed = NULL; pot->rhobar = NULL; pot->forceExchange = NULL; if (getMyRank() == 0) { if (strcmp(type, "setfl" ) == 0) eamReadSetfl(pot, dir, file); else if (strcmp(type,"funcfl") == 0) eamReadFuncfl(pot, dir, file); else typeNotSupported("initEamPot", type); } eamBcastPotential(pot); return (BasePotential*) pot; }
/// Broadcasts an EamPotential from rank 0 to all other ranks. /// If the table coefficients are read from a file only rank 0 does the /// read. Hence we need to broadcast the potential to all other ranks. void eamBcastPotential(EamPotential* pot) { assert(pot); struct { real_t cutoff, mass, lat; char latticeType[8]; char name[3]; int atomicNo; } buf; if (getMyRank() == 0) { buf.cutoff = pot->cutoff; buf.mass = pot->mass; buf.lat = pot->lat; buf.atomicNo = pot->atomicNo; strcpy(buf.latticeType, pot->latticeType); strcpy(buf.name, pot->name); } bcastParallel(&buf, sizeof(buf), 0); pot->cutoff = buf.cutoff; pot->mass = buf.mass; pot->lat = buf.lat; pot->atomicNo = buf.atomicNo; strcpy(pot->latticeType, buf.latticeType); strcpy(pot->name, buf.name); bcastInterpolationObject(&pot->phi); bcastInterpolationObject(&pot->rho); bcastInterpolationObject(&pot->f); }
/// \param [in] xproc x-size of domain decomposition grid. /// \param [in] yproc y-size of domain decomposition grid. /// \param [in] zproc z-size of domain decomposition grid. /// \param [in] globalExtent Size of the simulation domain (in Angstroms). Domain* initDecomposition(int xproc, int yproc, int zproc, real3 globalExtent) { assert( xproc * yproc * zproc == getNRanks()); Domain* dd = comdMalloc(sizeof(Domain)); dd->procGrid[0] = xproc; dd->procGrid[1] = yproc; dd->procGrid[2] = zproc; // calculate grid coordinates i,j,k for this processor int myRank = getMyRank(); dd->procCoord[0] = myRank % dd->procGrid[0]; myRank /= dd->procGrid[0]; dd->procCoord[1] = myRank % dd->procGrid[1]; dd->procCoord[2] = myRank / dd->procGrid[1]; // initialialize global bounds for (int i = 0; i < 3; i++) { dd->globalMin[i] = 0; dd->globalMax[i] = globalExtent[i]; dd->globalExtent[i] = dd->globalMax[i] - dd->globalMin[i]; } // initialize local bounds on this processor for (int i = 0; i < 3; i++) { dd->localExtent[i] = dd->globalExtent[i] / dd->procGrid[i]; dd->localMin[i] = dd->globalMin[i] + dd->procCoord[i] * dd->localExtent[i]; dd->localMax[i] = dd->globalMin[i] + (dd->procCoord[i]+1) * dd->localExtent[i]; } return dd; }
void Filtro2::GetParameter () { string parameter; parameter = getArgument ( "a" ); if ( parameter.length () != 0 ) { char rank[5]; sprintf ( rank, "%d", getMyRank () ); inputFile = parameter; } parameter = getArgument ( "e" ); if ( parameter.length () != 0 ) { groupsOut = parameter; } parameter = getArgument ( "f" ); if ( parameter.length () != 0 ) { deletedOut = parameter; } parameter = getArgument ( "g" ); if ( parameter.length () != 0 ) { maximalOut = parameter; } }
/// \details /// The report contains two blocks. The upper block is performance /// information for the printRank. The lower block is statistical /// information over all ranks. void printPerformanceResults(int nGlobalAtoms, int printRate) { // Collect timer statistics overall and across ranks timerStats(); if (!printRank()) return; // only print timers with non-zero values. double tick = getTick(); double loopTime = perfTimer[loopTimer].total*tick; fprintf(screenOut, "\n\nTimings for Rank %d\n", getMyRank()); fprintf(screenOut, " Timer # Calls Avg/Call (s) Total (s) %% Loop\n"); fprintf(screenOut, "___________________________________________________________________\n"); /* for (int ii=0; ii<numberOfTimers; ++ii) { double totalTime = perfTimer[ii].total*tick; if (perfTimer[ii].count > 0) fprintf(screenOut, "%-16s%12"PRIu64" %8.4f %8.4f %8.2f\n", timerName[ii], perfTimer[ii].count, totalTime/(double)perfTimer[ii].count, totalTime, totalTime/loopTime*100.0); }*/ fprintf(screenOut, "\nTiming Statistics Across %d Ranks:\n", getNRanks()); fprintf(screenOut, " Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)\n"); fprintf(screenOut, "_____________________________________________________________________________\n"); /* for (int ii = 0; ii < numberOfTimers; ++ii) { if (perfTimer[ii].count > 0) fprintf(screenOut, "%-16s%6d:%10.4f %6d:%10.4f %10.4f %10.4f\n", timerName[ii], perfTimer[ii].minRank, perfTimer[ii].minValue*tick, perfTimer[ii].maxRank, perfTimer[ii].maxValue*tick, perfTimer[ii].average*tick, perfTimer[ii].stdev*tick); }*/ double atomsPerTask = nGlobalAtoms/(real_t)getNRanks(); perfGlobal.atomRate = perfTimer[timestepTimer].average * tick * 1e6 / (atomsPerTask * perfTimer[timestepTimer].count * printRate); perfGlobal.atomAllRate = perfTimer[timestepTimer].average * tick * 1e6 / (nGlobalAtoms * perfTimer[timestepTimer].count * printRate); perfGlobal.atomsPerUSec = 1.0 / perfGlobal.atomAllRate; fprintf(screenOut, "\n---------------------------------------------------\n"); // fprintf(screenOut, " Average atom update rate: %6.2f us/atom/task\n", perfGlobal.atomRate); fprintf(screenOut, "---------------------------------------------------\n\n"); fprintf(screenOut, "\n---------------------------------------------------\n"); // fprintf(screenOut, " Average all atom update rate: %6.2f us/atom\n", perfGlobal.atomAllRate); fprintf(screenOut, "---------------------------------------------------\n\n"); fprintf(screenOut, "\n---------------------------------------------------\n"); // fprintf(screenOut, " Average atom rate: %6.2f atoms/us\n", perfGlobal.atomsPerUSec); fprintf(screenOut, "---------------------------------------------------\n\n"); }
void printPerformanceResultsYaml(FILE* file) { if (! printRank()) return; double tick = getTick(); double loopTime = perfTimer[loopTimer].total*tick; fprintf(file,"\nPerformance Results:\n"); fprintf(file, " TotalRanks: %d\n", getNRanks()); fprintf(file, " ReportingTimeUnits: seconds\n"); fprintf(file, "Performance Results For Rank %d:\n", getMyRank()); for (int ii = 0; ii < numberOfTimers; ii++) { if (perfTimer[ii].count > 0) { double totalTime = perfTimer[ii].total*tick; fprintf(file, " Timer: %s\n", timerName[ii]); fprintf(file, " CallCount: %"PRIu64"\n", perfTimer[ii].count); fprintf(file, " AvgPerCall: %8.4f\n", totalTime/(double)perfTimer[ii].count); fprintf(file, " Total: %8.4f\n", totalTime); fprintf(file, " PercentLoop: %8.2f\n", totalTime/loopTime*100); } } fprintf(file, "Performance Results Across Ranks:\n"); for (int ii = 0; ii < numberOfTimers; ii++) { if (perfTimer[ii].count > 0) { fprintf(file, " Timer: %s\n", timerName[ii]); fprintf(file, " MinRank: %d\n", perfTimer[ii].minRank); fprintf(file, " MinTime: %8.4f\n", perfTimer[ii].minValue*tick); fprintf(file, " MaxRank: %d\n", perfTimer[ii].maxRank); fprintf(file, " MaxTime: %8.4f\n", perfTimer[ii].maxValue*tick); fprintf(file, " AvgTime: %8.4f\n", perfTimer[ii].average*tick); fprintf(file, " StdevTime: %8.4f\n", perfTimer[ii].stdev*tick); } } fprintf(file,"Performance Global Update Rates:\n"); fprintf(file, " AtomUpdateRate:\n"); fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomRate); fprintf(file, " Units: us/atom/task\n"); fprintf(file, " AllAtomUpdateRate:\n"); fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomAllRate); fprintf(file, " Units: us/atom\n"); fprintf(file, " AtomRate:\n"); fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomsPerUSec); fprintf(file, " Units: atoms/us\n"); fprintf(file, "\n"); }
void Filtro2::printDeleted(set<set<int > > deleted) { int count=0; cout << "GetMyRank: " << getMyRank(); cout << " Deleted: " << endl; for(set < set< int> >::iterator it1=deleted.begin();it1!=deleted.end();it1++) { for(set < int >::iterator it2=(*it1).begin();it2!=(*it1).end();it2++) { cout << (*it2) << " "; ++count; } cout << " " << endl; } }
/// Collect timer statistics across ranks. void timerStats(void) { double sendBuf[numberOfTimers], recvBuf[numberOfTimers]; // Determine average of each timer across ranks for (int ii = 0; ii < numberOfTimers; ii++) sendBuf[ii] = (double)perfTimer[ii].total; addDoubleParallel(sendBuf, recvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) perfTimer[ii].average = recvBuf[ii] / (double)getNRanks(); // Determine min and max across ranks and which rank RankReduceData reduceSendBuf[numberOfTimers], reduceRecvBuf[numberOfTimers]; for (int ii = 0; ii < numberOfTimers; ii++) { reduceSendBuf[ii].val = (double)perfTimer[ii].total; reduceSendBuf[ii].rank = getMyRank(); } minRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) { perfTimer[ii].minValue = reduceRecvBuf[ii].val; perfTimer[ii].minRank = reduceRecvBuf[ii].rank; } maxRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) { perfTimer[ii].maxValue = reduceRecvBuf[ii].val; perfTimer[ii].maxRank = reduceRecvBuf[ii].rank; } // Determine standard deviation for (int ii = 0; ii < numberOfTimers; ii++) { double temp = (double)perfTimer[ii].total - perfTimer[ii].average; sendBuf[ii] = temp * temp; } addDoubleParallel(sendBuf, recvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) { perfTimer[ii].stdev = sqrt(recvBuf[ii] / (double) getNRanks()); } }
int main( int argc, char** argv ) { bool localRank = false; bool myRank = false; bool totalRank = false; po::options_description desc( "Allowed options" ); desc.add_options( ) ( "help,h", "produce help message" ) ( "mpi_host_rank", po::value<bool > ( &localRank )->zero_tokens( ), "get local mpi rank" ) ( "mpi_rank", po::value<bool > ( &myRank )->zero_tokens( ), "get mpi rank" ) ( "mpi_size", po::value<bool > ( &totalRank )->zero_tokens( ), "get count of mpi ranks" ); // parse command line options and config file and store values in vm po::variables_map vm; po::store( boost::program_options::parse_command_line( argc, argv, desc ), vm ); po::notify( vm ); // print help message and quit simulation if ( vm.count( "help" ) ) { std::cerr << desc << "\n"; return false; } MPI_CHECK( MPI_Init( &argc, &argv ) ); if ( localRank ) std::cout << "mpi_host_rank: " << getHostRank( ) << std::endl; if ( myRank ) std::cout << "mpi_rank: " << getMyRank( ) << std::endl; if ( totalRank ) std::cout << "mpi_size: " << getTotalRanks( ) << std::endl; MPI_CHECK( MPI_Finalize( ) ); return 0; }