void printcounters(struct counter *ctrs, uint64_t duration) { struct metrics s = {0}; s.timestamp = _rdtsc(); s.duration = duration; // We skip the last core int corethreads =0; for (int cpu = 1; cpu < gbl.ncpus-3; ++cpu) { double delta[NEVENTS]; // volatile because another thread is changing it. volatile struct counter *p = &ctrs[cpu]; for (int i = 0; i < NEVENTS; ++i) { union { __m512d c; uint64_t values[8]; } t; t.c = _mm512_load_pd((void *)&p->counts[i][0]); delta[i] = perf_scale_delta(t.values, lastctr[cpu].counts[i]); _mm512_storenrngo_pd((void *)&lastctr[cpu].counts[i][0], t.c); if (delta[i] < 0) delta[i] = 0; sevents[i] += delta[i]; } if (2*delta[clocks1] > duration) { s.nthreads += 1; corethreads += 1; } if ((cpu % 4) == 0) // Last thread on this core { if (corethreads) s.ncores += 1; corethreads = 0; } s.vpu_ea += delta[vpu_ea]; s.instrs += delta[instrs]; s.vinstrs += delta[vpu_ie]; } uint64_t nreads = 0, nwrites = 0; for (int i = 0; i < NGBOXES; ++i) for (int j = 0; j < 2; ++j) { nreads += pmu_rdctr(i, j, 0); nwrites += pmu_rdctr(i, j, 1); } s.rbytes = (nreads - prevnreads) * 64; s.wbytes = (nwrites - prevnwrites)* 64; prevnreads = nreads; prevnwrites = nwrites; sample(&s); }
void printcounters(struct counter *ctrs, uint64_t duration) { struct metrics s = {0}; uint64_t thisBytesWritten = pcm->bytesWritten(); uint64_t thisBytesRead = pcm->bytesRead(); memset(threadspercore, 0, gbl.ncores * sizeof(int)); s.timestamp = _rdtsc(); s.duration = duration; for (int cpu = 0; cpu < gbl.ncpus; ++cpu) { double delta[NEVENTS]; // volatile because another thread is changing it. volatile struct counter *p = &ctrs[cpu]; for (int i = 0; i < NEVENTS; ++i) { union { __m256d c; uint64_t values[4]; } t; t.c = _mm256_load_pd((const double *)&p->counts[i][0]); delta[i] = perf_scale_delta(t.values, lastctr[cpu].counts[i]); _mm256_store_pd((double *)&lastctr[cpu].counts[i][0], t.c); if (delta[i] < 0) delta[i] = 0; sevents[i] += delta[i]; } //printf("clocks %g duration %lu\n", delta[clocks], duration); if (2*delta[clocks] > duration) { int thiscore = pcm->getSocketId(cpu) * gbl.corespersocket + pcm->getCoreId(cpu); ++s.nthreads; ++threadspercore[thiscore]; } s.dsimd += delta[simd_dp]; s.dsse += delta[sse_dp]; s.dscalar += delta[scalar_dp]; s.ssimd += delta[simd_sp]; s.ssse += delta[sse_sp]; s.sscalar += delta[scalar_sp]; s.instrs += delta[instrs]; } s.rbytes = thisBytesRead - lastBytesRead; s.wbytes = thisBytesWritten - lastBytesWritten; lastBytesRead = thisBytesRead; lastBytesWritten = thisBytesWritten; for (int i = 0; i < gbl.ncores; ++i) if (threadspercore[i]) ++s.ncores; sample(&s); }
void read_cpu(int c) { perf_event_desc_t *fds; uint64_t val, delta; double ratio; int i, j, n, ret; fds = all_fds[c]; if (fds[0].fd == -1) { printf("CPU%d not monitored\n", c); return; } for(i=0, j = 0; i < options.num_groups; i++) { for(n = 0; n < options.nevents[i]; n++, j++) { ret = read(fds[j].fd, fds[j].values, sizeof(fds[j].values)); if (ret != sizeof(fds[j].values)) { if (ret == -1) err(1, "cannot read event %s : %d", fds[j].name, ret); else { warnx("CPU%d G%-2d could not read event %s, read=%d", c, i, fds[j].name, ret); continue; } } /* * scaling because we may be sharing the PMU and * thus may be multiplexed */ delta = perf_scale_delta(fds[j].values, fds[j].prev_values); val = perf_scale(fds[j].values); ratio = perf_scale_ratio(fds[j].values); printf("CPU%-3d G%-2d %'20"PRIu64" %'20"PRIu64" %s (scaling %.2f%%, ena=%'"PRIu64", run=%'"PRIu64") %s\n", c, i, val, delta, fds[j].name, (1.0-ratio)*100, fds[j].values[1], fds[j].values[2], options.cgroup_name ? options.cgroup_name : ""); fds[j].prev_values[0] = fds[j].values[0]; fds[j].prev_values[1] = fds[j].values[1]; fds[j].prev_values[2] = fds[j].values[2]; if (fds[j].values[2] > fds[j].values[1]) errx(1, "WARNING: time_running > time_enabled %"PRIu64"\n", fds[j].values[2] - fds[j].values[1]); } } }
static void print_counts(perf_event_desc_t *fds, int num) { double ratio; uint64_t val, delta; int i; read_groups(fds, num); for(i=0; i < num; i++) { val = perf_scale(fds[i].values); delta = perf_scale_delta(fds[i].values, fds[i].prev_values); ratio = perf_scale_ratio(fds[i].values); /* separate groups */ if (perf_is_group_leader(fds, i)) putchar('\n'); if (options.print) printf("%'20"PRIu64" %'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n", val, delta, fds[i].name, (1.0-ratio)*100.0, fds[i].values[1], fds[i].values[2]); else printf("%'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n", val, fds[i].name, (1.0-ratio)*100.0, fds[i].values[1], fds[i].values[2]); fds[i].prev_values[0] = fds[i].values[0]; fds[i].prev_values[1] = fds[i].values[1]; fds[i].prev_values[2] = fds[i].values[2]; } }
static void print_counts(perf_event_desc_t *fds, int num, int do_delta) { ssize_t ret; int i; /* * now simply read the results. */ for(i=0; i < num; i++) { uint64_t val; double ratio; ret = read(fds[i].fd, fds[i].values, sizeof(fds[i].values)); if (ret < (ssize_t)sizeof(fds[i].values)) { if (ret == -1) err(1, "cannot read values event %s", fds[i].name); else warnx("could not read event%d", i); } val = perf_scale(fds[i].values); ratio = perf_scale_ratio(fds[i].values); val = do_delta ? perf_scale_delta(fds[i].values, fds[i].prev_values) : val; fds[i].prev_values[0] = fds[i].values[0]; fds[i].prev_values[1] = fds[i].values[1]; fds[i].prev_values[2] = fds[i].values[2]; if (ratio == 1.0) printf("%20"PRIu64" %s\n", val, fds[i].name); else if (ratio == 0.0) printf("%20"PRIu64" %s (did not run: incompatible events, too many events in a group, competing session)\n", val, fds[i].name); else printf("%20"PRIu64" %s (scaled from %.2f%% of time)\n", val, fds[i].name, ratio*100.0); } }