/// \details /// The report contains two blocks. The upper block is performance /// information for the printRank. The lower block is statistical /// information over all ranks. void printPerformanceResults(int nGlobalAtoms, int printRate) { // Collect timer statistics overall and across ranks timerStats(); if (!printRank()) return; // only print timers with non-zero values. double tick = getTick(); double loopTime = perfTimer[loopTimer].total*tick; fprintf(screenOut, "\n\nTimings for Rank %d\n", getMyRank()); fprintf(screenOut, " Timer # Calls Avg/Call (s) Total (s) %% Loop\n"); fprintf(screenOut, "___________________________________________________________________\n"); /* for (int ii=0; ii<numberOfTimers; ++ii) { double totalTime = perfTimer[ii].total*tick; if (perfTimer[ii].count > 0) fprintf(screenOut, "%-16s%12"PRIu64" %8.4f %8.4f %8.2f\n", timerName[ii], perfTimer[ii].count, totalTime/(double)perfTimer[ii].count, totalTime, totalTime/loopTime*100.0); }*/ fprintf(screenOut, "\nTiming Statistics Across %d Ranks:\n", getNRanks()); fprintf(screenOut, " Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)\n"); fprintf(screenOut, "_____________________________________________________________________________\n"); /* for (int ii = 0; ii < numberOfTimers; ++ii) { if (perfTimer[ii].count > 0) fprintf(screenOut, "%-16s%6d:%10.4f %6d:%10.4f %10.4f %10.4f\n", timerName[ii], perfTimer[ii].minRank, perfTimer[ii].minValue*tick, perfTimer[ii].maxRank, perfTimer[ii].maxValue*tick, perfTimer[ii].average*tick, perfTimer[ii].stdev*tick); }*/ double atomsPerTask = nGlobalAtoms/(real_t)getNRanks(); perfGlobal.atomRate = perfTimer[timestepTimer].average * tick * 1e6 / (atomsPerTask * perfTimer[timestepTimer].count * printRate); perfGlobal.atomAllRate = perfTimer[timestepTimer].average * tick * 1e6 / (nGlobalAtoms * perfTimer[timestepTimer].count * printRate); perfGlobal.atomsPerUSec = 1.0 / perfGlobal.atomAllRate; fprintf(screenOut, "\n---------------------------------------------------\n"); // fprintf(screenOut, " Average atom update rate: %6.2f us/atom/task\n", perfGlobal.atomRate); fprintf(screenOut, "---------------------------------------------------\n\n"); fprintf(screenOut, "\n---------------------------------------------------\n"); // fprintf(screenOut, " Average all atom update rate: %6.2f us/atom\n", perfGlobal.atomAllRate); fprintf(screenOut, "---------------------------------------------------\n\n"); fprintf(screenOut, "\n---------------------------------------------------\n"); // fprintf(screenOut, " Average atom rate: %6.2f atoms/us\n", perfGlobal.atomsPerUSec); fprintf(screenOut, "---------------------------------------------------\n\n"); }
/// \param [in] xproc x-size of domain decomposition grid. /// \param [in] yproc y-size of domain decomposition grid. /// \param [in] zproc z-size of domain decomposition grid. /// \param [in] globalExtent Size of the simulation domain (in Angstroms). Domain* initDecomposition(int xproc, int yproc, int zproc, real3 globalExtent) { assert( xproc * yproc * zproc == getNRanks()); Domain* dd = comdMalloc(sizeof(Domain)); dd->procGrid[0] = xproc; dd->procGrid[1] = yproc; dd->procGrid[2] = zproc; // calculate grid coordinates i,j,k for this processor int myRank = getMyRank(); dd->procCoord[0] = myRank % dd->procGrid[0]; myRank /= dd->procGrid[0]; dd->procCoord[1] = myRank % dd->procGrid[1]; dd->procCoord[2] = myRank / dd->procGrid[1]; // initialialize global bounds for (int i = 0; i < 3; i++) { dd->globalMin[i] = 0; dd->globalMax[i] = globalExtent[i]; dd->globalExtent[i] = dd->globalMax[i] - dd->globalMin[i]; } // initialize local bounds on this processor for (int i = 0; i < 3; i++) { dd->localExtent[i] = dd->globalExtent[i] / dd->procGrid[i]; dd->localMin[i] = dd->globalMin[i] + dd->procCoord[i] * dd->localExtent[i]; dd->localMax[i] = dd->globalMin[i] + (dd->procCoord[i]+1) * dd->localExtent[i]; } return dd; }
/// Collect timer statistics across ranks. void timerStats(void) { double sendBuf[numberOfTimers], recvBuf[numberOfTimers]; // Determine average of each timer across ranks for (int ii = 0; ii < numberOfTimers; ii++) sendBuf[ii] = (double)perfTimer[ii].total; addDoubleParallel(sendBuf, recvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) perfTimer[ii].average = recvBuf[ii] / (double)getNRanks(); // Determine min and max across ranks and which rank RankReduceData reduceSendBuf[numberOfTimers], reduceRecvBuf[numberOfTimers]; for (int ii = 0; ii < numberOfTimers; ii++) { reduceSendBuf[ii].val = (double)perfTimer[ii].total; reduceSendBuf[ii].rank = getMyRank(); } minRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) { perfTimer[ii].minValue = reduceRecvBuf[ii].val; perfTimer[ii].minRank = reduceRecvBuf[ii].rank; } maxRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) { perfTimer[ii].maxValue = reduceRecvBuf[ii].val; perfTimer[ii].maxRank = reduceRecvBuf[ii].rank; } // Determine standard deviation for (int ii = 0; ii < numberOfTimers; ii++) { double temp = (double)perfTimer[ii].total - perfTimer[ii].average; sendBuf[ii] = temp * temp; } addDoubleParallel(sendBuf, recvBuf, numberOfTimers); for (int ii = 0; ii < numberOfTimers; ii++) { perfTimer[ii].stdev = sqrt(recvBuf[ii] / (double) getNRanks()); } }
void printPerformanceResultsYaml(FILE* file) { if (! printRank()) return; double tick = getTick(); double loopTime = perfTimer[loopTimer].total*tick; fprintf(file,"\nPerformance Results:\n"); fprintf(file, " TotalRanks: %d\n", getNRanks()); fprintf(file, " ReportingTimeUnits: seconds\n"); fprintf(file, "Performance Results For Rank %d:\n", getMyRank()); for (int ii = 0; ii < numberOfTimers; ii++) { if (perfTimer[ii].count > 0) { double totalTime = perfTimer[ii].total*tick; fprintf(file, " Timer: %s\n", timerName[ii]); fprintf(file, " CallCount: %"PRIu64"\n", perfTimer[ii].count); fprintf(file, " AvgPerCall: %8.4f\n", totalTime/(double)perfTimer[ii].count); fprintf(file, " Total: %8.4f\n", totalTime); fprintf(file, " PercentLoop: %8.2f\n", totalTime/loopTime*100); } } fprintf(file, "Performance Results Across Ranks:\n"); for (int ii = 0; ii < numberOfTimers; ii++) { if (perfTimer[ii].count > 0) { fprintf(file, " Timer: %s\n", timerName[ii]); fprintf(file, " MinRank: %d\n", perfTimer[ii].minRank); fprintf(file, " MinTime: %8.4f\n", perfTimer[ii].minValue*tick); fprintf(file, " MaxRank: %d\n", perfTimer[ii].maxRank); fprintf(file, " MaxTime: %8.4f\n", perfTimer[ii].maxValue*tick); fprintf(file, " AvgTime: %8.4f\n", perfTimer[ii].average*tick); fprintf(file, " StdevTime: %8.4f\n", perfTimer[ii].stdev*tick); } } fprintf(file,"Performance Global Update Rates:\n"); fprintf(file, " AtomUpdateRate:\n"); fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomRate); fprintf(file, " Units: us/atom/task\n"); fprintf(file, " AllAtomUpdateRate:\n"); fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomAllRate); fprintf(file, " Units: us/atom\n"); fprintf(file, " AtomRate:\n"); fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomsPerUSec); fprintf(file, " Units: atoms/us\n"); fprintf(file, "\n"); }
/// Check that the user input meets certain criteria. void sanityChecks(Command cmd, double cutoff, double latticeConst, char latticeType[8]) { int failCode = 0; // Check that domain grid matches number of ranks. (fail code 1) int nProcs = cmd.xproc * cmd.yproc * cmd.zproc; if (nProcs != getNRanks()) { failCode |= 1; if (printRank() ) fprintf(screenOut, "\nNumber of MPI ranks must match xproc * yproc * zproc\n"); } // Check whether simuation is too small (fail code 2) double minx = 2*cutoff*cmd.xproc; double miny = 2*cutoff*cmd.yproc; double minz = 2*cutoff*cmd.zproc; double sizex = cmd.nx*latticeConst; double sizey = cmd.ny*latticeConst; double sizez = cmd.nz*latticeConst; if ( sizex < minx || sizey < miny || sizez < minz) { failCode |= 2; if (printRank()) fprintf(screenOut,"\nSimulation too small.\n" " Increase the number of unit cells to make the simulation\n" " at least (%3.2f, %3.2f. %3.2f) Ansgstroms in size\n", minx, miny, minz); } // Check for supported lattice structure (fail code 4) if (strcasecmp(latticeType, "FCC") != 0) { failCode |= 4; if ( printRank() ) fprintf(screenOut, "\nOnly FCC Lattice type supported, not %s. Fatal Error.\n", latticeType); } int checkCode = failCode; bcastParallel(&checkCode, sizeof(int), 0); // This assertion can only fail if different tasks failed different // sanity checks. That should not be possible. assert(checkCode == failCode); if (failCode != 0) exit(failCode); }