Ejemplo n.º 1
0
int perf_event_open_pfm(const char *str,
                        int group_fd)
{
    struct perf_event_attr attr;
    pfm_perf_encode_arg_t arg;

    // Clear
    memset(&attr, 0, sizeof(attr));
    attr.size = sizeof(attr);
    memset(&arg, 0, sizeof(arg));
    arg.size = sizeof(arg);
    arg.attr = &attr;

    // Use pfm to populate "attr"
    if (!pfm_initialized) {
        if (pfm_initialize() != PFM_SUCCESS)
            return -2;
        pfm_initialized = 1;
    }
    if (pfm_get_os_event_encoding(str, PFM_PLM0 | PFM_PLM3, PFM_OS_PERF_EVENT, &arg)
          != PFM_SUCCESS)
        return -2;

#ifdef LOG_PFM_DECODE
    printf("PFM decode %s = %lx %lx %lx %lx\n", str,
           attr.type, attr.config, attr.config1, attr.config2);
#endif

    // Generate event
    return perf_event_open_gen(&attr, group_fd);
}
/* the **fd parameter must point to a null pointer on the first call
 * max_fds and num_fds must both point to a zero value on the first call
 * The return value is success (0) vs. failure (non-zero)
 */
int
perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int *num_fds)
{
	perf_event_desc_t *fd;
	pfm_perf_encode_arg_t arg;
	int new_max, ret, num, max_fds;
	int group_leader;

	if (!(argv && fds && num_fds))
		return -1;

	fd = *fds;
	if (fd) {
		max_fds = fd[0].max_fds;
		if (max_fds < 2)
			return -1;
		num = *num_fds;
	} else {
		max_fds = num = 0; /* bootstrap */
	}
	group_leader = num;

	while(*argv) {
		if (num == max_fds) {
			if (max_fds == 0)
				new_max = 2;
			else
				new_max = max_fds << 1;

			if (new_max < max_fds) {
				warn("too many entries");
				goto error;
			}
			fd = realloc(fd, new_max * sizeof(*fd));
			if (!fd) {
				warn("cannot allocate memory");
				goto error;
			}
			/* reset newly allocated chunk */
			memset(fd + max_fds, 0, (new_max - max_fds) * sizeof(*fd));
			max_fds = new_max;

			/* update max size */
			fd[0].max_fds = max_fds;
		}
		/* ABI compatibility, set before calling libpfm */
		fd[num].hw.size = sizeof(fd[num].hw);

		memset(&arg, 0, sizeof(arg));
		arg.attr = &fd[num].hw;
		arg.fstr = &fd[num].fstr; /* fd[].fstr is NULL */

		ret = pfm_get_os_event_encoding(*argv, PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT_EXT, &arg);
		if (ret != PFM_SUCCESS) {
			warnx("event %s: %s", *argv, pfm_strerror(ret));
			goto error;
		}

		fd[num].name = strdup(*argv);
		fd[num].group_leader = group_leader;
		fd[num].idx = arg.idx;
		fd[num].cpu = arg.cpu;

		num++;
		argv++;
	}
	*num_fds = num;
	*fds = fd;
	return 0;
error:
	perf_free_fds(fd, num);
	return -1;
}
Ejemplo n.º 3
0
/*
 * Open a file descriptor for perf events with `event_name', mmap it, and set
 * things up so that the calling thread receives SIGIO signals from it.
 *
 * Returns the perf_event_handle on success, else folly::none.
 */
folly::Optional<perf_event_handle> enable_event(const char* event_name,
                                                uint64_t sample_freq) {
  struct perf_event_attr attr = {};
  pfm_perf_encode_arg_t arg = {};
  arg.attr = &attr;
  arg.size = sizeof(arg);

  // Populate the `type', `config', and `exclude_*' members on `attr'.
  auto const pfmr = pfm_get_os_event_encoding(event_name, PFM_PLM3,
                                              PFM_OS_PERF_EVENT, &arg);
  if (pfmr != PFM_SUCCESS) {
    Logger::Warning("perf_event: failed to get encoding for %s: %s",
                    event_name, pfm_strerror(pfmr));
    return folly::none;
  }

  // Finish setting up `attr' and open the event.
  attr.size = sizeof(attr);
  attr.disabled = 1;
  attr.sample_freq = sample_freq;
  attr.freq = 1;
  attr.watermark = 0;
  attr.wakeup_events = 1;
  attr.precise_ip = 2;  // request zero skid

  attr.sample_type = PERF_SAMPLE_IP
                   | PERF_SAMPLE_TID
                   | PERF_SAMPLE_ADDR
                   | PERF_SAMPLE_CALLCHAIN
                   ;

  auto const ret = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
  if (ret < 0) {
    // Some machines might not have PEBS support (needed for precise_ip > 0),
    // but then PERF_SAMPLE_ADDR will always return zeros instead of the target
    // memory address.  Just fail silently in this case.
    Logger::Verbose("perf_event: perf_event_open failed with: %s",
                    folly::errnoStr(errno).c_str());
    return folly::none;
  }
  auto const fd = safe_cast<int>(ret);

  // Recent versions of Linux have a CLOEXEC flag for perf_event_open(), but
  // use fcntl() for portability.  Note that since we do this after we open the
  // event, this could in theory race with an exec() from another thread---but
  // that shouldn't be happening anyway.
  fcntl(fd, F_SETFD, O_CLOEXEC);

  // Make sure that any SIGIO sent from `fd' is handled by the calling thread.
  f_owner_ex owner;
  owner.type = F_OWNER_TID;
  owner.pid = syscall(__NR_gettid);

  // Set up `fd' to send SIGIO with sigaction info.
  if (fcntl(fd, F_SETFL, O_ASYNC) < 0 ||
      fcntl(fd, F_SETSIG, SIGIO) < 0 ||
      fcntl(fd, F_SETOWN_EX, &owner) < 0) {
    Logger::Warning("perf_event: failed to set up asynchronous I/O: %s",
                    folly::errnoStr(errno).c_str());
    close(fd);
    return folly::none;
  }

  // Map the ring buffer for our samples.
  auto const base = mmap(nullptr, mmap_sz(), PROT_READ | PROT_WRITE,
                         MAP_SHARED, fd, 0);
  if (base == MAP_FAILED) {
    Logger::Warning("perf_event: failed to mmap perf_event: %s",
                    folly::errnoStr(errno).c_str());
    close(fd);
    return folly::none;
  }
  auto const meta = reinterpret_cast<struct perf_event_mmap_page*>(base);

  auto const pe = perf_event_handle { fd, meta };

  // Reset the event.  This seems to be present in most examples, but it's
  // unclear if it's necessary or just good hygeine.  (It's possible that it's
  // necessary on successive opens.)
  if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) < 0) {
    Logger::Warning("perf_event: failed to reset perf_event: %s",
                    folly::errnoStr(errno).c_str());
    close_event(pe);
    return folly::none;
  }

  // Enable the event.  The man page and other examples of usage all suggest
  // that the right thing to do is to start with the event disabled and then
  // enable it manually afterwards, so we do the same here even though it seems
  // strange and circuitous.
  if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
    Logger::Warning("perf_event: failed to enable perf_event: %s",
                    folly::errnoStr(errno).c_str());
    close_event(pe);
    return folly::none;
  }

  return pe;
}
Ejemplo n.º 4
0
// Setup the counters and populate the counters struct with their data
void pc_init(counters_t *counters, int pid)
{

#ifndef __arm__
  return;

#else
  int ret;
  ret = pfm_initialize();

  if (ret != PFM_SUCCESS) {
    errx(1, "cannot initialize library: %s", pfm_strerror(ret));
  }

  // Set values for getting cycle count
  memset(&counters->cycles.attr, 0, sizeof(counters->cycles.attr));
  memset(&counters->l1_misses.attr, 0, sizeof(counters->l1_misses.attr));
  memset(&counters->ic.attr, 0, sizeof(counters->ic.attr));

  memset(&counters->cycles.arg, 0, sizeof(counters->cycles.arg));
  memset(&counters->l1_misses.arg, 0, sizeof(counters->l1_misses.arg));
  memset(&counters->ic.arg, 0, sizeof(counters->ic.arg));

  counters->cycles.count = 0;
  counters->l1_misses.count = 0;
  counters->ic.count = 0;

  counters->cycles.arg.size = sizeof(counters->cycles.arg);
  counters->l1_misses.arg.size = sizeof(counters->l1_misses.arg);
  counters->ic.arg.size = sizeof(counters->ic.arg);

  counters->cycles.arg.attr = &counters->cycles.attr;
  counters->l1_misses.arg.attr = &counters->l1_misses.attr;
  counters->ic.arg.attr = &counters->ic.attr;

  // Get the encoding for the events
  // cycles
  ret = pfm_get_os_event_encoding("cycles", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->cycles.arg);
  if (ret != PFM_SUCCESS) {
    err(1,"Cycles: cannot get encoding %s", pfm_strerror(ret));
  }
  // l1 cache misses
  ret = pfm_get_os_event_encoding("l1-dcache-load-misses", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->l1_misses.arg);
  if (ret != PFM_SUCCESS) {
    err(1,"L1 Cache Misses:cannot get encoding %s", pfm_strerror(ret));
  }

  // instruction count misses
  ret = pfm_get_os_event_encoding("instructions", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->ic.arg);
  if (ret != PFM_SUCCESS) {
    err(1,"Instruction Count:cannot get encoding %s", pfm_strerror(ret));
  }

  // Set more options
  counters->cycles.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
  counters->l1_misses.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
  counters->ic.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;

  // do not start immediately after perf_event_open()
  counters->cycles.attr.disabled = 1;
  counters->l1_misses.attr.disabled = 1;
  counters->ic.attr.disabled = 1;

  // Open the counters
  counters->cycles.fd = perf_event_open(&counters->cycles.attr, pid, -1, -1, 0);
  if (counters->cycles.fd < 0) {
    err(1, "Cycle: cannot create event");
  }

  counters->l1_misses.fd = perf_event_open(&counters->l1_misses.attr, pid, -1, -1, 0);
  if (counters->l1_misses.fd < 0) {
    err(1, "L1 miss: cannot create event");
  }

  counters->ic.fd = perf_event_open(&counters->ic.attr, pid, -1, -1, 0);
  if (counters->ic.fd < 0) {
    err(1, "Instruction count: cannot create event");
  }
  return;
#endif
}