void perf_stat_dump( FILE *yaml, const stress_t stressors[], const proc_info_t procs[STRESS_MAX], const int32_t max_procs, const double duration) { int32_t i; bool no_perf_stats = true; setlocale(LC_ALL, ""); pr_yaml(yaml, "perfstats:\n"); for (i = 0; i < STRESS_MAX; i++) { int p; uint64_t counter_totals[STRESS_PERF_MAX]; uint64_t total_cpu_cycles = 0; uint64_t total_cache_refs = 0; uint64_t total_branches = 0; int ids[STRESS_PERF_MAX]; bool got_data = false; char *munged; memset(counter_totals, 0, sizeof(counter_totals)); /* Sum totals across all instances of the stressor */ for (p = 0; p < STRESS_PERF_MAX; p++) { int32_t j, n = (i * max_procs); stress_perf_t *sp = &shared->stats[n].sp; if (!perf_stat_succeeded(sp)) continue; ids[p] = ~0; for (j = 0; j < procs[i].started_procs; j++, n++) { uint64_t counter; if (perf_get_counter_by_index(sp, p, &counter, &ids[p]) < 0) break; if (counter == STRESS_PERF_INVALID) { counter_totals[p] = STRESS_PERF_INVALID; break; } counter_totals[p] += counter; got_data |= (counter > 0); } if (ids[p] == STRESS_PERF_HW_CPU_CYCLES) total_cpu_cycles = counter_totals[p]; if (ids[p] == STRESS_PERF_HW_CACHE_REFERENCES) total_cache_refs = counter_totals[p]; if (ids[p] == STRESS_PERF_HW_BRANCH_INSTRUCTIONS) total_branches = counter_totals[p]; } if (!got_data) continue; munged = munge_underscore((char *)stressors[i].name); pr_inf(stdout, "%s:\n", munged); pr_yaml(yaml, " - stressor: %s\n", munged); pr_yaml(yaml, " duration: %f\n", duration); for (p = 0; p < STRESS_PERF_MAX; p++) { const char *l = perf_get_label_by_index(p); uint64_t ct = counter_totals[p]; if (l && (ct != STRESS_PERF_INVALID)) { char extra[32]; char yaml_label[128]; *extra = '\0'; no_perf_stats = false; if ((ids[p] == STRESS_PERF_HW_INSTRUCTIONS) && (total_cpu_cycles > 0)) snprintf(extra, sizeof(extra), " (%.3f instr. per cycle)", (double)ct / (double)total_cpu_cycles); if ((ids[p] == STRESS_PERF_HW_CACHE_MISSES) && (total_cache_refs > 0)) snprintf(extra, sizeof(extra), " (%5.2f%%)", 100.0 * (double)ct / (double)total_cache_refs); if ((ids[p] == STRESS_PERF_HW_BRANCH_MISSES) && (total_branches > 0)) snprintf(extra, sizeof(extra), " (%5.2f%%)", 100.0 * (double)ct / (double)total_branches); pr_inf(stdout, "%'26" PRIu64 " %-23s %s%s\n", ct, l, perf_stat_scale(ct, duration), extra); perf_yaml_label(yaml_label, l, sizeof(yaml_label)); pr_yaml(yaml, " %s_total: %" PRIu64 "\n", yaml_label, ct); pr_yaml(yaml, " %s_per_second: %f\n", yaml_label, (double)ct / duration); } } pr_yaml(yaml, "\n"); } if (no_perf_stats) pr_inf(stdout, "perf counters are not available " "on this device\n"); }
void perf_stat_dump(FILE *yaml, proc_info_t *procs_head, const double duration) { bool no_perf_stats = true; proc_info_t *pi; #if defined(HAVE_LOCALE_H) (void)setlocale(LC_ALL, ""); #endif pr_yaml(yaml, "perfstats:\n"); for (pi = procs_head; pi; pi = pi->next) { int p; uint64_t counter_totals[STRESS_PERF_MAX]; uint64_t total_cpu_cycles = 0; uint64_t total_cache_refs = 0; uint64_t total_branches = 0; bool got_data = false; char *munged; (void)memset(counter_totals, 0, sizeof(counter_totals)); /* Sum totals across all instances of the stressor */ for (p = 0; p < STRESS_PERF_MAX && perf_info[p].label; p++) { int32_t j; stress_perf_t *sp = &pi->stats[0]->sp; if (!perf_stat_succeeded(sp)) continue; for (j = 0; j < pi->started_procs; j++) { const uint64_t counter = sp->perf_stat[p].counter; if (counter == STRESS_PERF_INVALID) { counter_totals[p] = STRESS_PERF_INVALID; break; } counter_totals[p] += counter; got_data |= (counter > 0); } if (perf_info[p].type == PERF_TYPE_HARDWARE) { unsigned long config = perf_info[p].config; if (config == PERF_COUNT_HW_CPU_CYCLES) total_cpu_cycles = counter_totals[p]; else if (config == PERF_COUNT_HW_CACHE_REFERENCES) total_cache_refs = counter_totals[p]; else if (config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) total_branches = counter_totals[p]; } } if (!got_data) continue; munged = stress_munge_underscore(pi->stressor->name); pr_inf("%s:\n", munged); pr_yaml(yaml, " - stressor: %s\n", munged); pr_yaml(yaml, " duration: %f\n", duration); for (p = 0; p < STRESS_PERF_MAX && perf_info[p].label; p++) { const char *l = perf_info[p].label; uint64_t ct = counter_totals[p]; if (l && (ct != STRESS_PERF_INVALID)) { char extra[32]; char yaml_label[128]; *extra = '\0'; no_perf_stats = false; if (perf_info[p].type == PERF_TYPE_HARDWARE) { unsigned long config = perf_info[p].config; if ((config == PERF_COUNT_HW_INSTRUCTIONS) && (total_cpu_cycles > 0)) (void)snprintf(extra, sizeof(extra), " (%.3f instr. per cycle)", (double)ct / (double)total_cpu_cycles); else if ((config == PERF_COUNT_HW_CACHE_MISSES) && (total_cache_refs > 0)) (void)snprintf(extra, sizeof(extra), " (%5.2f%%)", 100.0 * (double)ct / (double)total_cache_refs); else if ((config == PERF_COUNT_HW_BRANCH_MISSES) && (total_branches > 0)) (void)snprintf(extra, sizeof(extra), " (%5.2f%%)", 100.0 * (double)ct / (double)total_branches); } pr_inf("%'26" PRIu64 " %-24s %s%s\n", ct, l, perf_stat_scale(ct, duration), extra); perf_yaml_label(yaml_label, l, sizeof(yaml_label)); pr_yaml(yaml, " %s_total: %" PRIu64 "\n", yaml_label, ct); pr_yaml(yaml, " %s_per_second: %f\n", yaml_label, (double)ct / duration); } } pr_yaml(yaml, "\n"); } if (no_perf_stats) { if (geteuid() != 0) { char buffer[64]; int ret; bool paranoid = false; int level = 0; static char *path = "/proc/sys/kernel/perf_event_paranoid"; ret = system_read(path, buffer, sizeof(buffer) - 1); if (ret > 0) { if (sscanf(buffer, "%5d", &level) == 1) paranoid = true; } if (paranoid & (level > 1)) { pr_inf("Cannot read perf counters, " "do not have CAP_SYS_ADMIN capability " "or %s is set too high (%d)\n", path, level); } } else { pr_inf("perf counters are not available " "on this device\n"); } } }