예제 #1
0
void perf_stat_dump(
	FILE *yaml,
	const stress_t stressors[],
	const proc_info_t procs[STRESS_MAX],
	const int32_t max_procs,
	const double duration)
{
	int32_t i;
	bool no_perf_stats = true;

	setlocale(LC_ALL, "");

	pr_yaml(yaml, "perfstats:\n");

	for (i = 0; i < STRESS_MAX; i++) {
		int p;
		uint64_t counter_totals[STRESS_PERF_MAX];
		uint64_t total_cpu_cycles = 0;
		uint64_t total_cache_refs = 0;
		uint64_t total_branches = 0;
		int ids[STRESS_PERF_MAX];
		bool got_data = false;
		char *munged;

		memset(counter_totals, 0, sizeof(counter_totals));

		/* Sum totals across all instances of the stressor */
		for (p = 0; p < STRESS_PERF_MAX; p++) {
			int32_t j, n = (i * max_procs);
			stress_perf_t *sp = &shared->stats[n].sp;

			if (!perf_stat_succeeded(sp))
				continue;

			ids[p] = ~0;
			for (j = 0; j < procs[i].started_procs; j++, n++) {
				uint64_t counter;

				if (perf_get_counter_by_index(sp, p,
				    &counter, &ids[p]) < 0)
					break;
				if (counter == STRESS_PERF_INVALID) {
					counter_totals[p] = STRESS_PERF_INVALID;
					break;
				}
				counter_totals[p] += counter;
				got_data |= (counter > 0);
			}
			if (ids[p] == STRESS_PERF_HW_CPU_CYCLES)
				total_cpu_cycles = counter_totals[p];
			if (ids[p] == STRESS_PERF_HW_CACHE_REFERENCES)
				total_cache_refs = counter_totals[p];
			if (ids[p] == STRESS_PERF_HW_BRANCH_INSTRUCTIONS)
				total_branches = counter_totals[p];
		}

		if (!got_data)
			continue;

		munged = munge_underscore((char *)stressors[i].name);
		pr_inf(stdout, "%s:\n", munged);
		pr_yaml(yaml, "    - stressor: %s\n", munged);
		pr_yaml(yaml, "      duration: %f\n", duration);

		for (p = 0; p < STRESS_PERF_MAX; p++) {
			const char *l = perf_get_label_by_index(p);
			uint64_t ct = counter_totals[p];

			if (l && (ct != STRESS_PERF_INVALID)) {
				char extra[32];
				char yaml_label[128];
				*extra = '\0';

				no_perf_stats = false;

				if ((ids[p] == STRESS_PERF_HW_INSTRUCTIONS) &&
				    (total_cpu_cycles > 0))
					snprintf(extra, sizeof(extra),
						" (%.3f instr. per cycle)",
						(double)ct / (double)total_cpu_cycles);
				if ((ids[p] == STRESS_PERF_HW_CACHE_MISSES) &&
				     (total_cache_refs > 0))
					snprintf(extra, sizeof(extra),
						" (%5.2f%%)",
						100.0 * (double)ct / (double)total_cache_refs);
				if ((ids[p] == STRESS_PERF_HW_BRANCH_MISSES) &&
				    (total_branches > 0))
					snprintf(extra, sizeof(extra),
						" (%5.2f%%)",
						100.0 * (double)ct / (double)total_branches);

				pr_inf(stdout, "%'26" PRIu64 " %-23s %s%s\n",
					ct, l, perf_stat_scale(ct, duration),
					extra);

				perf_yaml_label(yaml_label, l, sizeof(yaml_label));
				pr_yaml(yaml, "      %s_total: %" PRIu64
					"\n", yaml_label, ct);
				pr_yaml(yaml, "      %s_per_second: %f\n",
					yaml_label, (double)ct / duration);
			}
		}
		pr_yaml(yaml, "\n");
	}
	if (no_perf_stats)
		pr_inf(stdout, "perf counters are not available "
			"on this device\n");
}
예제 #2
0
void perf_stat_dump(FILE *yaml, proc_info_t *procs_head, const double duration)
{
	bool no_perf_stats = true;
	proc_info_t *pi;

#if defined(HAVE_LOCALE_H)
	(void)setlocale(LC_ALL, "");
#endif

	pr_yaml(yaml, "perfstats:\n");

	for (pi = procs_head; pi; pi = pi->next) {
		int p;
		uint64_t counter_totals[STRESS_PERF_MAX];
		uint64_t total_cpu_cycles = 0;
		uint64_t total_cache_refs = 0;
		uint64_t total_branches = 0;
		bool got_data = false;
		char *munged;

		(void)memset(counter_totals, 0, sizeof(counter_totals));

		/* Sum totals across all instances of the stressor */
		for (p = 0; p < STRESS_PERF_MAX && perf_info[p].label; p++) {
			int32_t j;
			stress_perf_t *sp = &pi->stats[0]->sp;

			if (!perf_stat_succeeded(sp))
				continue;

			for (j = 0; j < pi->started_procs; j++) {
				const uint64_t counter = sp->perf_stat[p].counter;

				if (counter == STRESS_PERF_INVALID) {
					counter_totals[p] = STRESS_PERF_INVALID;
					break;
				}
				counter_totals[p] += counter;
				got_data |= (counter > 0);
			}
			if (perf_info[p].type == PERF_TYPE_HARDWARE) {
				unsigned long config = perf_info[p].config;

				if (config == PERF_COUNT_HW_CPU_CYCLES)
					total_cpu_cycles = counter_totals[p];
				else if (config == PERF_COUNT_HW_CACHE_REFERENCES)
					total_cache_refs = counter_totals[p];
				else if (config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
					total_branches = counter_totals[p];
			}
		}

		if (!got_data)
			continue;

		munged = stress_munge_underscore(pi->stressor->name);
		pr_inf("%s:\n", munged);
		pr_yaml(yaml, "    - stressor: %s\n", munged);
		pr_yaml(yaml, "      duration: %f\n", duration);

		for (p = 0; p < STRESS_PERF_MAX && perf_info[p].label; p++) {
			const char *l = perf_info[p].label;
			uint64_t ct = counter_totals[p];

			if (l && (ct != STRESS_PERF_INVALID)) {
				char extra[32];
				char yaml_label[128];
				*extra = '\0';

				no_perf_stats = false;

				if (perf_info[p].type == PERF_TYPE_HARDWARE) {
					unsigned long config = perf_info[p].config;
					if ((config == PERF_COUNT_HW_INSTRUCTIONS) &&
					    (total_cpu_cycles > 0))
						(void)snprintf(extra, sizeof(extra),
							" (%.3f instr. per cycle)",
							(double)ct / (double)total_cpu_cycles);
					else if ((config == PERF_COUNT_HW_CACHE_MISSES) &&
					     (total_cache_refs > 0))
						(void)snprintf(extra, sizeof(extra),
							" (%5.2f%%)",
							100.0 * (double)ct / (double)total_cache_refs);
					else if ((config == PERF_COUNT_HW_BRANCH_MISSES) &&
					    (total_branches > 0))
						(void)snprintf(extra, sizeof(extra),
							" (%5.2f%%)",
							100.0 * (double)ct / (double)total_branches);
				}

				pr_inf("%'26" PRIu64 " %-24s %s%s\n",
					ct, l, perf_stat_scale(ct, duration),
					extra);

				perf_yaml_label(yaml_label, l, sizeof(yaml_label));
				pr_yaml(yaml, "      %s_total: %" PRIu64
					"\n", yaml_label, ct);
				pr_yaml(yaml, "      %s_per_second: %f\n",
					yaml_label, (double)ct / duration);
			}
		}
		pr_yaml(yaml, "\n");
	}
	if (no_perf_stats) {
		if (geteuid() != 0) {
			char buffer[64];
			int ret;
			bool paranoid = false;
			int level = 0;
			static char *path = "/proc/sys/kernel/perf_event_paranoid";

			ret = system_read(path, buffer, sizeof(buffer) - 1);
			if (ret > 0) {
				if (sscanf(buffer, "%5d", &level) == 1)
					paranoid = true;
			}
			if (paranoid & (level > 1)) {
				pr_inf("Cannot read perf counters, "
					"do not have CAP_SYS_ADMIN capability "
					"or %s is set too high (%d)\n",
					path, level);
			}
		} else {
			pr_inf("perf counters are not available "
				"on this device\n");
		}
	}
}