Example #1
0
void kmp_stats_output_module::outputStats(const char* heading) 
{
    statistic allStats[TIMER_LAST];
    statistic allCounters[COUNTER_LAST];

    // stop all the explicit timers for all threads
    windupExplicitTimers();

    FILE * eventsOut;
    FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr;

    if (eventPrintingEnabled()) {
        eventsOut = fopen(eventsFileName, "w+");
    }

    if (!statsOut)
        statsOut = stderr;

    fprintf(statsOut, "%s\n",heading);
    // Accumulate across threads.
    kmp_stats_list::iterator it;
    for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
        int t = (*it)->getGtid();
        // Output per thread stats if requested.
        if (perThreadPrintingEnabled()) {
            fprintf (statsOut, "Thread %d\n", t);
            printStats(statsOut, (*it)->getTimers(), true);
            printCounters(statsOut, (*it)->getCounters());
            fprintf(statsOut,"\n");
        }
        // Output per thread events if requested.
        if (eventPrintingEnabled()) {
            kmp_stats_event_vector events = (*it)->getEventVector();
            printEvents(eventsOut, &events, t);
        }

        for (int s = 0; s<TIMER_LAST; s++) {
            // See if we should ignore this timer when aggregating
            if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker
                (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master
                timeStat::synthesized(timer_e(s))                 // It's a synthesized stat, so there's no raw data for it.
               )            
            {
                continue;
            }

            statistic * threadStat = (*it)->getTimer(timer_e(s));
            allStats[s] += *threadStat;
        }

        // Special handling for synthesized statistics.
        // These just have to be coded specially here for now. 
        // At present we only have a few: 
        // The total parallel work done in each thread.
        // The variance here makes it easy to see load imbalance over the whole program (though, of course,
        // it's possible to have a code with awful load balance in every parallel region but perfect load
        // balance oever the whole program.)
        // The time spent in barriers in each thread.
        allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal());

        // Time in explicit barriers.
        allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal());

        for (int c = 0; c<COUNTER_LAST; c++) {
            if (counter::masterOnly(counter_e(c)) && t != 0)
                continue;
            allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue());
        }
    }

    if (eventPrintingEnabled()) {
        printPloticusFile();
        fclose(eventsOut);
    }

    fprintf (statsOut, "Aggregate for all threads\n");
    printStats (statsOut, &allStats[0], true);
    fprintf (statsOut, "\n");
    printStats (statsOut, &allCounters[0], false);

    if (statsOut != stderr)
        fclose(statsOut);

}
Example #2
0
void kmp_stats_output_module::outputStats(const char *heading) {
  // Stop all the explicit timers in all threads
  // Do this before declaring the local statistics because thay have
  // constructors so will take time to create.
  windupExplicitTimers();

  statistic allStats[TIMER_LAST];
  statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of
                                       normal timer stats */
  statistic allCounters[COUNTER_LAST];

  FILE *statsOut =
      !outputFileName.empty() ? fopen(outputFileName.c_str(), "a+") : stderr;
  if (!statsOut)
    statsOut = stderr;

  FILE *eventsOut;
  if (eventPrintingEnabled()) {
    eventsOut = fopen(eventsFileName, "w+");
  }

  printHeaderInfo(statsOut);
  fprintf(statsOut, "%s\n", heading);
  // Accumulate across threads.
  kmp_stats_list::iterator it;
  for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
    int t = (*it)->getGtid();
    // Output per thread stats if requested.
    if (printPerThreadFlag) {
      fprintf(statsOut, "Thread %d\n", t);
      printTimerStats(statsOut, (*it)->getTimers(), 0);
      printCounters(statsOut, (*it)->getCounters());
      fprintf(statsOut, "\n");
    }
    // Output per thread events if requested.
    if (eventPrintingEnabled()) {
      kmp_stats_event_vector events = (*it)->getEventVector();
      printEvents(eventsOut, &events, t);
    }

    // Accumulate timers.
    for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
      // See if we should ignore this timer when aggregating
      if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on master
          // and this thread is worker
          (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker
          // and this thread is the master
          ) {
        continue;
      }

      statistic *threadStat = (*it)->getTimer(s);
      allStats[s] += *threadStat;

      // Add Total stats for timers that are valid in more than one thread
      if (!timeStat::noTotal(s))
        totalStats[s].addSample(threadStat->getTotal());
    }

    // Accumulate counters.
    for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
      if (counter::masterOnly(c) && t != 0)
        continue;
      allCounters[c].addSample((*it)->getCounter(c)->getValue());
    }
  }

  if (eventPrintingEnabled()) {
    printPloticusFile();
    fclose(eventsOut);
  }

  fprintf(statsOut, "Aggregate for all threads\n");
  printTimerStats(statsOut, &allStats[0], &totalStats[0]);
  fprintf(statsOut, "\n");
  printCounterStats(statsOut, &allCounters[0]);

  if (statsOut != stderr)
    fclose(statsOut);
}