int perf_event_open_pfm(const char *str, int group_fd) { struct perf_event_attr attr; pfm_perf_encode_arg_t arg; // Clear memset(&attr, 0, sizeof(attr)); attr.size = sizeof(attr); memset(&arg, 0, sizeof(arg)); arg.size = sizeof(arg); arg.attr = &attr; // Use pfm to populate "attr" if (!pfm_initialized) { if (pfm_initialize() != PFM_SUCCESS) return -2; pfm_initialized = 1; } if (pfm_get_os_event_encoding(str, PFM_PLM0 | PFM_PLM3, PFM_OS_PERF_EVENT, &arg) != PFM_SUCCESS) return -2; #ifdef LOG_PFM_DECODE printf("PFM decode %s = %lx %lx %lx %lx\n", str, attr.type, attr.config, attr.config1, attr.config2); #endif // Generate event return perf_event_open_gen(&attr, group_fd); }
/* the **fd parameter must point to a null pointer on the first call * max_fds and num_fds must both point to a zero value on the first call * The return value is success (0) vs. failure (non-zero) */ int perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int *num_fds) { perf_event_desc_t *fd; pfm_perf_encode_arg_t arg; int new_max, ret, num, max_fds; int group_leader; if (!(argv && fds && num_fds)) return -1; fd = *fds; if (fd) { max_fds = fd[0].max_fds; if (max_fds < 2) return -1; num = *num_fds; } else { max_fds = num = 0; /* bootstrap */ } group_leader = num; while(*argv) { if (num == max_fds) { if (max_fds == 0) new_max = 2; else new_max = max_fds << 1; if (new_max < max_fds) { warn("too many entries"); goto error; } fd = realloc(fd, new_max * sizeof(*fd)); if (!fd) { warn("cannot allocate memory"); goto error; } /* reset newly allocated chunk */ memset(fd + max_fds, 0, (new_max - max_fds) * sizeof(*fd)); max_fds = new_max; /* update max size */ fd[0].max_fds = max_fds; } /* ABI compatibility, set before calling libpfm */ fd[num].hw.size = sizeof(fd[num].hw); memset(&arg, 0, sizeof(arg)); arg.attr = &fd[num].hw; arg.fstr = &fd[num].fstr; /* fd[].fstr is NULL */ ret = pfm_get_os_event_encoding(*argv, PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT_EXT, &arg); if (ret != PFM_SUCCESS) { warnx("event %s: %s", *argv, pfm_strerror(ret)); goto error; } fd[num].name = strdup(*argv); fd[num].group_leader = group_leader; fd[num].idx = arg.idx; fd[num].cpu = arg.cpu; num++; argv++; } *num_fds = num; *fds = fd; return 0; error: perf_free_fds(fd, num); return -1; }
/* * Open a file descriptor for perf events with `event_name', mmap it, and set * things up so that the calling thread receives SIGIO signals from it. * * Returns the perf_event_handle on success, else folly::none. */ folly::Optional<perf_event_handle> enable_event(const char* event_name, uint64_t sample_freq) { struct perf_event_attr attr = {}; pfm_perf_encode_arg_t arg = {}; arg.attr = &attr; arg.size = sizeof(arg); // Populate the `type', `config', and `exclude_*' members on `attr'. auto const pfmr = pfm_get_os_event_encoding(event_name, PFM_PLM3, PFM_OS_PERF_EVENT, &arg); if (pfmr != PFM_SUCCESS) { Logger::Warning("perf_event: failed to get encoding for %s: %s", event_name, pfm_strerror(pfmr)); return folly::none; } // Finish setting up `attr' and open the event. attr.size = sizeof(attr); attr.disabled = 1; attr.sample_freq = sample_freq; attr.freq = 1; attr.watermark = 0; attr.wakeup_events = 1; attr.precise_ip = 2; // request zero skid attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | PERF_SAMPLE_CALLCHAIN ; auto const ret = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); if (ret < 0) { // Some machines might not have PEBS support (needed for precise_ip > 0), // but then PERF_SAMPLE_ADDR will always return zeros instead of the target // memory address. Just fail silently in this case. Logger::Verbose("perf_event: perf_event_open failed with: %s", folly::errnoStr(errno).c_str()); return folly::none; } auto const fd = safe_cast<int>(ret); // Recent versions of Linux have a CLOEXEC flag for perf_event_open(), but // use fcntl() for portability. Note that since we do this after we open the // event, this could in theory race with an exec() from another thread---but // that shouldn't be happening anyway. fcntl(fd, F_SETFD, O_CLOEXEC); // Make sure that any SIGIO sent from `fd' is handled by the calling thread. f_owner_ex owner; owner.type = F_OWNER_TID; owner.pid = syscall(__NR_gettid); // Set up `fd' to send SIGIO with sigaction info. if (fcntl(fd, F_SETFL, O_ASYNC) < 0 || fcntl(fd, F_SETSIG, SIGIO) < 0 || fcntl(fd, F_SETOWN_EX, &owner) < 0) { Logger::Warning("perf_event: failed to set up asynchronous I/O: %s", folly::errnoStr(errno).c_str()); close(fd); return folly::none; } // Map the ring buffer for our samples. auto const base = mmap(nullptr, mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (base == MAP_FAILED) { Logger::Warning("perf_event: failed to mmap perf_event: %s", folly::errnoStr(errno).c_str()); close(fd); return folly::none; } auto const meta = reinterpret_cast<struct perf_event_mmap_page*>(base); auto const pe = perf_event_handle { fd, meta }; // Reset the event. This seems to be present in most examples, but it's // unclear if it's necessary or just good hygeine. (It's possible that it's // necessary on successive opens.) if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) < 0) { Logger::Warning("perf_event: failed to reset perf_event: %s", folly::errnoStr(errno).c_str()); close_event(pe); return folly::none; } // Enable the event. The man page and other examples of usage all suggest // that the right thing to do is to start with the event disabled and then // enable it manually afterwards, so we do the same here even though it seems // strange and circuitous. if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { Logger::Warning("perf_event: failed to enable perf_event: %s", folly::errnoStr(errno).c_str()); close_event(pe); return folly::none; } return pe; }
// Setup the counters and populate the counters struct with their data void pc_init(counters_t *counters, int pid) { #ifndef __arm__ return; #else int ret; ret = pfm_initialize(); if (ret != PFM_SUCCESS) { errx(1, "cannot initialize library: %s", pfm_strerror(ret)); } // Set values for getting cycle count memset(&counters->cycles.attr, 0, sizeof(counters->cycles.attr)); memset(&counters->l1_misses.attr, 0, sizeof(counters->l1_misses.attr)); memset(&counters->ic.attr, 0, sizeof(counters->ic.attr)); memset(&counters->cycles.arg, 0, sizeof(counters->cycles.arg)); memset(&counters->l1_misses.arg, 0, sizeof(counters->l1_misses.arg)); memset(&counters->ic.arg, 0, sizeof(counters->ic.arg)); counters->cycles.count = 0; counters->l1_misses.count = 0; counters->ic.count = 0; counters->cycles.arg.size = sizeof(counters->cycles.arg); counters->l1_misses.arg.size = sizeof(counters->l1_misses.arg); counters->ic.arg.size = sizeof(counters->ic.arg); counters->cycles.arg.attr = &counters->cycles.attr; counters->l1_misses.arg.attr = &counters->l1_misses.attr; counters->ic.arg.attr = &counters->ic.attr; // Get the encoding for the events // cycles ret = pfm_get_os_event_encoding("cycles", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->cycles.arg); if (ret != PFM_SUCCESS) { err(1,"Cycles: cannot get encoding %s", pfm_strerror(ret)); } // l1 cache misses ret = pfm_get_os_event_encoding("l1-dcache-load-misses", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->l1_misses.arg); if (ret != PFM_SUCCESS) { err(1,"L1 Cache Misses:cannot get encoding %s", pfm_strerror(ret)); } // instruction count misses ret = pfm_get_os_event_encoding("instructions", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->ic.arg); if (ret != PFM_SUCCESS) { err(1,"Instruction Count:cannot get encoding %s", pfm_strerror(ret)); } // Set more options counters->cycles.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; counters->l1_misses.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; counters->ic.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; // do not start immediately after perf_event_open() counters->cycles.attr.disabled = 1; counters->l1_misses.attr.disabled = 1; counters->ic.attr.disabled = 1; // Open the counters counters->cycles.fd = perf_event_open(&counters->cycles.attr, pid, -1, -1, 0); if (counters->cycles.fd < 0) { err(1, "Cycle: cannot create event"); } counters->l1_misses.fd = perf_event_open(&counters->l1_misses.attr, pid, -1, -1, 0); if (counters->l1_misses.fd < 0) { err(1, "L1 miss: cannot create event"); } counters->ic.fd = perf_event_open(&counters->ic.attr, pid, -1, -1, 0); if (counters->ic.fd < 0) { err(1, "Instruction count: cannot create event"); } return; #endif }