void kmp_stats_output_module::printTimerStats(FILE *statsOut, statistic const *theStats, statistic const *totalStats) { fprintf(statsOut, "Timer, SampleCount, Min, " "Mean, Max, Total, SD\n"); for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { statistic const *stat = &theStats[s]; char tag = timeStat::noUnits(s) ? ' ' : 'T'; fprintf(statsOut, "%-28s, %s\n", timeStat::name(s), stat->format(tag, true).c_str()); } // Also print the Total_ versions of times. for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { char tag = timeStat::noUnits(s) ? ' ' : 'T'; if (totalStats && !timeStat::noTotal(s)) fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s), totalStats[s].format(tag, true).c_str()); } }
void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers) { if (areTimers) { // Check if we have useful timers, since we don't print zero value timers we need to avoid // printing a header and then no data. bool haveTimers = false; for (int s = 0; s<TIMER_LAST; s++) { if (theStats[s].getCount() != 0) { haveTimers = true; break; } } if (!haveTimers) return; } // Print const char * title = areTimers ? "Timer, SampleCount," : "Counter, ThreadCount,"; fprintf (statsOut, "%s Min, Mean, Max, Total, SD\n", title); if (areTimers) { for (int s = 0; s<TIMER_LAST; s++) { statistic const * stat = &theStats[s]; if (stat->getCount() != 0) { char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str()); } } } else { // Counters for (int s = 0; s<COUNTER_LAST; s++) { statistic const * stat = &theStats[s]; fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str()); } } }
void kmp_stats_output_module::outputStats(const char* heading) { statistic allStats[TIMER_LAST]; statistic allCounters[COUNTER_LAST]; // stop all the explicit timers for all threads windupExplicitTimers(); FILE * eventsOut; FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr; if (eventPrintingEnabled()) { eventsOut = fopen(eventsFileName, "w+"); } if (!statsOut) statsOut = stderr; fprintf(statsOut, "%s\n",heading); // Accumulate across threads. kmp_stats_list::iterator it; for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { int t = (*it)->getGtid(); // Output per thread stats if requested. if (perThreadPrintingEnabled()) { fprintf (statsOut, "Thread %d\n", t); printStats(statsOut, (*it)->getTimers(), true); printCounters(statsOut, (*it)->getCounters()); fprintf(statsOut,"\n"); } // Output per thread events if requested. if (eventPrintingEnabled()) { kmp_stats_event_vector events = (*it)->getEventVector(); printEvents(eventsOut, &events, t); } for (int s = 0; s<TIMER_LAST; s++) { // See if we should ignore this timer when aggregating if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master timeStat::synthesized(timer_e(s)) // It's a synthesized stat, so there's no raw data for it. ) { continue; } statistic * threadStat = (*it)->getTimer(timer_e(s)); allStats[s] += *threadStat; } // Special handling for synthesized statistics. // These just have to be coded specially here for now. // At present we only have a few: // The total parallel work done in each thread. // The variance here makes it easy to see load imbalance over the whole program (though, of course, // it's possible to have a code with awful load balance in every parallel region but perfect load // balance oever the whole program.) // The time spent in barriers in each thread. allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal()); // Time in explicit barriers. allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal()); for (int c = 0; c<COUNTER_LAST; c++) { if (counter::masterOnly(counter_e(c)) && t != 0) continue; allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue()); } } if (eventPrintingEnabled()) { printPloticusFile(); fclose(eventsOut); } fprintf (statsOut, "Aggregate for all threads\n"); printStats (statsOut, &allStats[0], true); fprintf (statsOut, "\n"); printStats (statsOut, &allCounters[0], false); if (statsOut != stderr) fclose(statsOut); }
void kmp_stats_output_module::outputStats(const char *heading) { // Stop all the explicit timers in all threads // Do this before declaring the local statistics because thay have // constructors so will take time to create. windupExplicitTimers(); statistic allStats[TIMER_LAST]; statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of normal timer stats */ statistic allCounters[COUNTER_LAST]; FILE *statsOut = !outputFileName.empty() ? fopen(outputFileName.c_str(), "a+") : stderr; if (!statsOut) statsOut = stderr; FILE *eventsOut; if (eventPrintingEnabled()) { eventsOut = fopen(eventsFileName, "w+"); } printHeaderInfo(statsOut); fprintf(statsOut, "%s\n", heading); // Accumulate across threads. kmp_stats_list::iterator it; for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { int t = (*it)->getGtid(); // Output per thread stats if requested. if (printPerThreadFlag) { fprintf(statsOut, "Thread %d\n", t); printTimerStats(statsOut, (*it)->getTimers(), 0); printCounters(statsOut, (*it)->getCounters()); fprintf(statsOut, "\n"); } // Output per thread events if requested. if (eventPrintingEnabled()) { kmp_stats_event_vector events = (*it)->getEventVector(); printEvents(eventsOut, &events, t); } // Accumulate timers. for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { // See if we should ignore this timer when aggregating if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on master // and this thread is worker (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker // and this thread is the master ) { continue; } statistic *threadStat = (*it)->getTimer(s); allStats[s] += *threadStat; // Add Total stats for timers that are valid in more than one thread if (!timeStat::noTotal(s)) totalStats[s].addSample(threadStat->getTotal()); } // Accumulate counters. for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) { if (counter::masterOnly(c) && t != 0) continue; allCounters[c].addSample((*it)->getCounter(c)->getValue()); } } if (eventPrintingEnabled()) { printPloticusFile(); fclose(eventsOut); } fprintf(statsOut, "Aggregate for all threads\n"); printTimerStats(statsOut, &allStats[0], &totalStats[0]); fprintf(statsOut, "\n"); printCounterStats(statsOut, &allCounters[0]); if (statsOut != stderr) fclose(statsOut); }