示例#1
0
/// \details
/// The report contains two blocks.  The upper block is performance
/// information for the printRank.  The lower block is statistical
/// information over all ranks.
void printPerformanceResults(int nGlobalAtoms, int printRate)
{
   // Collect timer statistics overall and across ranks
   timerStats();

   if (!printRank())
      return;

   // only print timers with non-zero values.
   double tick = getTick();
   double loopTime = perfTimer[loopTimer].total*tick;
   
   fprintf(screenOut, "\n\nTimings for Rank %d\n", getMyRank());
   fprintf(screenOut, "        Timer        # Calls    Avg/Call (s)   Total (s)    %% Loop\n");
   fprintf(screenOut, "___________________________________________________________________\n");
/*   for (int ii=0; ii<numberOfTimers; ++ii)
   {
      double totalTime = perfTimer[ii].total*tick;
      if (perfTimer[ii].count > 0)
         fprintf(screenOut, "%-16s%12"PRIu64"     %8.4f      %8.4f    %8.2f\n", 
                 timerName[ii],
                 perfTimer[ii].count,
                 totalTime/(double)perfTimer[ii].count,
                 totalTime,
                 totalTime/loopTime*100.0);
   }*/

   fprintf(screenOut, "\nTiming Statistics Across %d Ranks:\n", getNRanks());
   fprintf(screenOut, "        Timer        Rank: Min(s)       Rank: Max(s)      Avg(s)    Stdev(s)\n");
   fprintf(screenOut, "_____________________________________________________________________________\n");

/*   for (int ii = 0; ii < numberOfTimers; ++ii)
   {
      if (perfTimer[ii].count > 0)
         fprintf(screenOut, "%-16s%6d:%10.4f  %6d:%10.4f  %10.4f  %10.4f\n", 
            timerName[ii], 
            perfTimer[ii].minRank, perfTimer[ii].minValue*tick,
            perfTimer[ii].maxRank, perfTimer[ii].maxValue*tick,
            perfTimer[ii].average*tick, perfTimer[ii].stdev*tick);
   }*/
   double atomsPerTask = nGlobalAtoms/(real_t)getNRanks();
   perfGlobal.atomRate = perfTimer[timestepTimer].average * tick * 1e6 /
      (atomsPerTask * perfTimer[timestepTimer].count * printRate);
   perfGlobal.atomAllRate = perfTimer[timestepTimer].average * tick * 1e6 /
      (nGlobalAtoms * perfTimer[timestepTimer].count * printRate);
   perfGlobal.atomsPerUSec = 1.0 / perfGlobal.atomAllRate;

   fprintf(screenOut, "\n---------------------------------------------------\n");
//   fprintf(screenOut, " Average atom update rate:     %6.2f us/atom/task\n", perfGlobal.atomRate);
   fprintf(screenOut, "---------------------------------------------------\n\n");

   fprintf(screenOut, "\n---------------------------------------------------\n");
  // fprintf(screenOut, " Average all atom update rate: %6.2f us/atom\n", perfGlobal.atomAllRate);
   fprintf(screenOut, "---------------------------------------------------\n\n");

   fprintf(screenOut, "\n---------------------------------------------------\n");
//   fprintf(screenOut, " Average atom rate:            %6.2f atoms/us\n", perfGlobal.atomsPerUSec);
   fprintf(screenOut, "---------------------------------------------------\n\n");
}
示例#2
0
/// \param [in] xproc x-size of domain decomposition grid.
/// \param [in] yproc y-size of domain decomposition grid.
/// \param [in] zproc z-size of domain decomposition grid.
/// \param [in] globalExtent Size of the simulation domain (in Angstroms).
Domain* initDecomposition(int xproc, int yproc, int zproc, real3 globalExtent)
{
   assert( xproc * yproc * zproc == getNRanks());

   Domain* dd = comdMalloc(sizeof(Domain));
   dd->procGrid[0] = xproc;
   dd->procGrid[1] = yproc;
   dd->procGrid[2] = zproc;
   // calculate grid coordinates i,j,k for this processor
   int myRank = getMyRank();
   dd->procCoord[0] = myRank % dd->procGrid[0];
   myRank /= dd->procGrid[0];
   dd->procCoord[1] = myRank % dd->procGrid[1];
   dd->procCoord[2] = myRank / dd->procGrid[1];

   // initialialize global bounds
   for (int i = 0; i < 3; i++)
   {
      dd->globalMin[i] = 0;
      dd->globalMax[i] = globalExtent[i];
      dd->globalExtent[i] = dd->globalMax[i] - dd->globalMin[i];
   }

   // initialize local bounds on this processor
   for (int i = 0; i < 3; i++)
   {
      dd->localExtent[i] = dd->globalExtent[i] / dd->procGrid[i];
      dd->localMin[i] = dd->globalMin[i] +  dd->procCoord[i]    * dd->localExtent[i];
      dd->localMax[i] = dd->globalMin[i] + (dd->procCoord[i]+1) * dd->localExtent[i];
   }

   return dd;
}
/// Collect timer statistics across ranks.
void timerStats(void)
{
   double sendBuf[numberOfTimers], recvBuf[numberOfTimers];
   
   // Determine average of each timer across ranks
   for (int ii = 0; ii < numberOfTimers; ii++)
      sendBuf[ii] = (double)perfTimer[ii].total;
   addDoubleParallel(sendBuf, recvBuf, numberOfTimers);

   for (int ii = 0; ii < numberOfTimers; ii++)
      perfTimer[ii].average = recvBuf[ii] / (double)getNRanks();


   // Determine min and max across ranks and which rank
   RankReduceData reduceSendBuf[numberOfTimers], reduceRecvBuf[numberOfTimers];
   for (int ii = 0; ii < numberOfTimers; ii++)
   {
      reduceSendBuf[ii].val = (double)perfTimer[ii].total;
      reduceSendBuf[ii].rank = getMyRank();
   }
   minRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers);   
   for (int ii = 0; ii < numberOfTimers; ii++)
   {
      perfTimer[ii].minValue = reduceRecvBuf[ii].val;
      perfTimer[ii].minRank = reduceRecvBuf[ii].rank;
   }
   maxRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers);   
   for (int ii = 0; ii < numberOfTimers; ii++)
   {
      perfTimer[ii].maxValue = reduceRecvBuf[ii].val;
      perfTimer[ii].maxRank = reduceRecvBuf[ii].rank;
   }
   
   // Determine standard deviation
   for (int ii = 0; ii < numberOfTimers; ii++)
   {
      double temp = (double)perfTimer[ii].total - perfTimer[ii].average;
      sendBuf[ii] = temp * temp;
   }
   addDoubleParallel(sendBuf, recvBuf, numberOfTimers);
   for (int ii = 0; ii < numberOfTimers; ii++)
   {
      perfTimer[ii].stdev = sqrt(recvBuf[ii] / (double) getNRanks());
   }
}
示例#4
0
void printPerformanceResultsYaml(FILE* file)
{
   if (! printRank())
      return;

   double tick = getTick();
   double loopTime = perfTimer[loopTimer].total*tick;

   fprintf(file,"\nPerformance Results:\n");
   fprintf(file, "  TotalRanks: %d\n", getNRanks());
   fprintf(file, "  ReportingTimeUnits: seconds\n");
   fprintf(file, "Performance Results For Rank %d:\n", getMyRank());
   for (int ii = 0; ii < numberOfTimers; ii++)
   {
      if (perfTimer[ii].count > 0)
      {
         double totalTime = perfTimer[ii].total*tick;
         fprintf(file, "  Timer: %s\n", timerName[ii]);
         fprintf(file, "    CallCount:  %"PRIu64"\n", perfTimer[ii].count); 
         fprintf(file, "    AvgPerCall: %8.4f\n", totalTime/(double)perfTimer[ii].count);
         fprintf(file, "    Total:      %8.4f\n", totalTime);
         fprintf(file, "    PercentLoop: %8.2f\n", totalTime/loopTime*100);
      }
   }

   fprintf(file, "Performance Results Across Ranks:\n");
   for (int ii = 0; ii < numberOfTimers; ii++)
   {
      if (perfTimer[ii].count > 0)
      {
         fprintf(file, "  Timer: %s\n", timerName[ii]);
         fprintf(file, "    MinRank: %d\n", perfTimer[ii].minRank);
         fprintf(file, "    MinTime: %8.4f\n", perfTimer[ii].minValue*tick);     
         fprintf(file, "    MaxRank: %d\n", perfTimer[ii].maxRank);
         fprintf(file, "    MaxTime: %8.4f\n", perfTimer[ii].maxValue*tick);
         fprintf(file, "    AvgTime: %8.4f\n", perfTimer[ii].average*tick);
         fprintf(file, "    StdevTime: %8.4f\n", perfTimer[ii].stdev*tick);
      }
   }

   fprintf(file,"Performance Global Update Rates:\n");
   fprintf(file, "  AtomUpdateRate:\n");
   fprintf(file, "    AverageRate: %6.2f\n", perfGlobal.atomRate);
   fprintf(file, "    Units: us/atom/task\n");
   fprintf(file, "  AllAtomUpdateRate:\n");
   fprintf(file, "    AverageRate: %6.2f\n", perfGlobal.atomAllRate);
   fprintf(file, "    Units: us/atom\n");
   fprintf(file, "  AtomRate:\n");
   fprintf(file, "    AverageRate: %6.2f\n", perfGlobal.atomsPerUSec);
   fprintf(file, "    Units: atoms/us\n");
 
   fprintf(file, "\n");
}
示例#5
0
/// Check that the user input meets certain criteria.
void sanityChecks(Command cmd, double cutoff, double latticeConst, char latticeType[8])
{
   int failCode = 0;

   // Check that domain grid matches number of ranks. (fail code 1)
   int nProcs = cmd.xproc * cmd.yproc * cmd.zproc;
   if (nProcs != getNRanks())
   {
      failCode |= 1;
      if (printRank() )
         fprintf(screenOut,
                 "\nNumber of MPI ranks must match xproc * yproc * zproc\n");
   }

   // Check whether simuation is too small (fail code 2)
   double minx = 2*cutoff*cmd.xproc;
   double miny = 2*cutoff*cmd.yproc;
   double minz = 2*cutoff*cmd.zproc;
   double sizex = cmd.nx*latticeConst;
   double sizey = cmd.ny*latticeConst;
   double sizez = cmd.nz*latticeConst;

   if ( sizex < minx || sizey < miny || sizez < minz)
   {
      failCode |= 2;
      if (printRank())
         fprintf(screenOut,"\nSimulation too small.\n"
                 "  Increase the number of unit cells to make the simulation\n"
                 "  at least (%3.2f, %3.2f. %3.2f) Ansgstroms in size\n",
                 minx, miny, minz);
   }

   // Check for supported lattice structure (fail code 4)
   if (strcasecmp(latticeType, "FCC") != 0)
   {
      failCode |= 4;
      if ( printRank() )
         fprintf(screenOut,
                 "\nOnly FCC Lattice type supported, not %s. Fatal Error.\n",
                 latticeType);
   }
   int checkCode = failCode;
   bcastParallel(&checkCode, sizeof(int), 0);
   // This assertion can only fail if different tasks failed different
   // sanity checks.  That should not be possible.
   assert(checkCode == failCode);
      
   if (failCode != 0)
      exit(failCode);
}