コード例 #1
0
ファイル: output.cpp プロジェクト: 01org/hpc-speedometer
void
printcounters(struct counter *ctrs, uint64_t duration)
{
    struct metrics s = {0};

    s.timestamp = _rdtsc();
    s.duration = duration;
    // We skip the last core
    int corethreads =0;
    for (int cpu = 1; cpu < gbl.ncpus-3; ++cpu)
    {
        double delta[NEVENTS];
        // volatile because another thread is changing it.
        volatile struct counter *p = &ctrs[cpu];

        for (int i = 0; i < NEVENTS; ++i)
        {
            union {
                __m512d c;
                uint64_t values[8];
            } t;
            t.c = _mm512_load_pd((void *)&p->counts[i][0]);
            delta[i] = perf_scale_delta(t.values, lastctr[cpu].counts[i]);
            _mm512_storenrngo_pd((void *)&lastctr[cpu].counts[i][0], t.c);
            if (delta[i] < 0)
                delta[i] = 0;
            sevents[i] += delta[i];
        }

        if (2*delta[clocks1] > duration)
        {
            s.nthreads += 1;
            corethreads += 1;
        }

        if ((cpu % 4) == 0) // Last thread on this core
        {
            if (corethreads)
                s.ncores += 1;
            corethreads = 0;
        }

        s.vpu_ea += delta[vpu_ea];
        s.instrs += delta[instrs];
        s.vinstrs += delta[vpu_ie];
    }
    uint64_t nreads = 0, nwrites = 0;
    for (int i = 0; i < NGBOXES; ++i)
        for (int j = 0; j < 2; ++j)
        {
            nreads += pmu_rdctr(i, j, 0);
            nwrites += pmu_rdctr(i, j, 1);
        }
    s.rbytes = (nreads - prevnreads) * 64;
    s.wbytes = (nwrites - prevnwrites)* 64;
    prevnreads = nreads;
    prevnwrites = nwrites;

    sample(&s);
}
コード例 #2
0
ファイル: output.cpp プロジェクト: 01org/hpc-speedometer
void
printcounters(struct counter *ctrs, uint64_t duration)
{
    struct metrics s = {0};

    uint64_t thisBytesWritten = pcm->bytesWritten();
    uint64_t thisBytesRead = pcm->bytesRead();
    memset(threadspercore, 0, gbl.ncores * sizeof(int));
    s.timestamp = _rdtsc();
    s.duration = duration;
    for (int cpu = 0; cpu < gbl.ncpus; ++cpu)
    {
        double delta[NEVENTS];
        // volatile because another thread is changing it.
        volatile struct counter *p = &ctrs[cpu];

        for (int i = 0; i < NEVENTS; ++i)
        {
            union {
                __m256d c;
                uint64_t values[4];
            } t;
            t.c = _mm256_load_pd((const double *)&p->counts[i][0]);
            delta[i] = perf_scale_delta(t.values, lastctr[cpu].counts[i]);
            _mm256_store_pd((double *)&lastctr[cpu].counts[i][0], t.c);
            if (delta[i] < 0)
                delta[i] = 0;
            sevents[i] += delta[i];
        }

        //printf("clocks %g duration %lu\n", delta[clocks], duration);
        if (2*delta[clocks] > duration)
        {
            int thiscore = pcm->getSocketId(cpu)  * gbl.corespersocket +
                pcm->getCoreId(cpu);
            ++s.nthreads;
            ++threadspercore[thiscore];
        }
        s.dsimd += delta[simd_dp];
        s.dsse += delta[sse_dp];
        s.dscalar += delta[scalar_dp];
        s.ssimd += delta[simd_sp];
        s.ssse += delta[sse_sp];
        s.sscalar += delta[scalar_sp];
        s.instrs += delta[instrs];
    }
    s.rbytes = thisBytesRead - lastBytesRead;
    s.wbytes = thisBytesWritten - lastBytesWritten;
    lastBytesRead = thisBytesRead;
    lastBytesWritten = thisBytesWritten;
    for (int i = 0; i < gbl.ncores; ++i)
        if (threadspercore[i])
            ++s.ncores;

    sample(&s);
}
コード例 #3
0
ファイル: syst_count.c プロジェクト: FMCalisto/SMP
void read_cpu(int c)
{
	perf_event_desc_t *fds;
	uint64_t val, delta;
	double ratio;
	int i, j, n, ret;

	fds = all_fds[c];

	if (fds[0].fd == -1) {
		printf("CPU%d not monitored\n", c);
		return;
	}

	for(i=0, j = 0; i < options.num_groups; i++) {
		for(n = 0; n < options.nevents[i]; n++, j++) {

			ret = read(fds[j].fd, fds[j].values, sizeof(fds[j].values));
			if (ret != sizeof(fds[j].values)) {
				if (ret == -1)
					err(1, "cannot read event %s : %d", fds[j].name, ret);
				else {
					warnx("CPU%d G%-2d could not read event %s, read=%d", c, i, fds[j].name, ret);
					continue;
				}
			}
			/*
			 * scaling because we may be sharing the PMU and
			 * thus may be multiplexed
			 */
			delta = perf_scale_delta(fds[j].values, fds[j].prev_values);
			val = perf_scale(fds[j].values);
			ratio = perf_scale_ratio(fds[j].values);

			printf("CPU%-3d G%-2d %'20"PRIu64" %'20"PRIu64" %s (scaling %.2f%%, ena=%'"PRIu64", run=%'"PRIu64") %s\n",
				c,
				i,
				val,
				delta,
				fds[j].name,
				(1.0-ratio)*100,
				fds[j].values[1],
				fds[j].values[2],
				options.cgroup_name ? options.cgroup_name : "");

			fds[j].prev_values[0] = fds[j].values[0];
			fds[j].prev_values[1] = fds[j].values[1];
			fds[j].prev_values[2] = fds[j].values[2];

			if (fds[j].values[2] > fds[j].values[1])
				errx(1, "WARNING: time_running > time_enabled %"PRIu64"\n", fds[j].values[2] - fds[j].values[1]);
		}
	}
}
コード例 #4
0
ファイル: task.c プロジェクト: DanieleDeSensi/mammut
static void
print_counts(perf_event_desc_t *fds, int num)
{
	double ratio;
	uint64_t val, delta;
	int i;

	read_groups(fds, num);

	for(i=0; i < num; i++) {

		val   = perf_scale(fds[i].values);
		delta = perf_scale_delta(fds[i].values, fds[i].prev_values);
		ratio = perf_scale_ratio(fds[i].values);

		/* separate groups */
		if (perf_is_group_leader(fds, i))
			putchar('\n');

		if (options.print)
			printf("%'20"PRIu64" %'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n",
				val,
				delta,
				fds[i].name,
				(1.0-ratio)*100.0,
				fds[i].values[1],
				fds[i].values[2]);
		else
			printf("%'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n",
				val,
				fds[i].name,
				(1.0-ratio)*100.0,
				fds[i].values[1],
				fds[i].values[2]);

		fds[i].prev_values[0] = fds[i].values[0];
		fds[i].prev_values[1] = fds[i].values[1];
		fds[i].prev_values[2] = fds[i].values[2];
	}
}
コード例 #5
0
static void
print_counts(perf_event_desc_t *fds, int num, int do_delta)
{
	ssize_t ret;
	int i;

	/*
	 * now simply read the results.
	 */
	for(i=0; i < num; i++) {
		uint64_t val;
		double ratio;

		ret = read(fds[i].fd, fds[i].values, sizeof(fds[i].values));
		if (ret < (ssize_t)sizeof(fds[i].values)) {
			if (ret == -1)
				err(1, "cannot read values event %s", fds[i].name);
			else
				warnx("could not read event%d", i);
		}

		val = perf_scale(fds[i].values);
		ratio = perf_scale_ratio(fds[i].values);

		val = do_delta ? perf_scale_delta(fds[i].values, fds[i].prev_values) : val;

		fds[i].prev_values[0] = fds[i].values[0];
		fds[i].prev_values[1] = fds[i].values[1];
		fds[i].prev_values[2] = fds[i].values[2];

		if (ratio == 1.0)
			printf("%20"PRIu64" %s\n", val, fds[i].name);
		else
			if (ratio == 0.0)
				printf("%20"PRIu64" %s (did not run: incompatible events, too many events in a group, competing session)\n", val, fds[i].name);
			else
				printf("%20"PRIu64" %s (scaled from %.2f%% of time)\n", val, fds[i].name, ratio*100.0);
	}
}