Exemple #1
0
static void
show_event_info(char *name, unsigned int idx)
{
	pfmlib_regmask_t cnt, impl_cnt;
	char *desc;
	unsigned int n1, n2, i, c;
	int code, prev_code = 0, first = 1;
	int ret;

	pfm_get_event_counters(idx, &cnt);
	pfm_get_num_counters(&n2);
	pfm_get_impl_counters(&impl_cnt);

	n1 = n2;
	printf("#-----------------------------\n"
	       "Name     : %s\n",
	       name);

	pfm_get_event_description(idx, &desc);
 	printf("Desc     : %s\n", desc);
	free(desc);

	printf("Code     :");
	for (i=0; n1; i++) {
		if (pfm_regmask_isset(&impl_cnt, i))
			n1--;
		if (pfm_regmask_isset(&cnt, i)) {
		    pfm_get_event_code_counter(idx,i,&code);
		    if (first == 1 || code != prev_code) {
			    printf(" 0x%x", code);
		    	    first = 0;
		    }
		    prev_code = code;
		}
	}
	putchar('\n');

	n1 = n2;
	printf("Counters : [ ");
	for (i=0; n1; i++) {
		if (pfm_regmask_isset(&impl_cnt, i))
			n1--;
		if (pfm_regmask_isset(&cnt, i))
			printf("%d ", i);
	}
	puts("]");
	pfm_get_num_event_masks(idx, &n1);
	for (i = 0; i < n1; i++) {
		ret = pfm_get_event_mask_name(idx, i, name, max_len+1);
		if (ret != PFMLIB_SUCCESS)
			continue;
		pfm_get_event_mask_description(idx, i, &desc);
		pfm_get_event_mask_code(idx, i, &c);
		printf("Umask-%02u : 0x%02x : [%s] : %s\n", i, c, name, desc);
		free(desc);
	}
}
static int setup_preset_term(int *native, pfmlib_event_t *event)
{
    /* It seems this could be greatly simplified. If impl_cnt is non-zero,
	the event lives on a counter. Therefore the entire routine could be:
	if (impl_cnt!= 0) encode_native_event.
	Am I wrong?
    */
  pfmlib_regmask_t impl_cnt, evnt_cnt;
  unsigned int n;
  int j, ret;

  /* find out which counters it lives on */
  if ((ret = pfm_get_event_counters(event->event,&evnt_cnt)) != PFMLIB_SUCCESS)
    {
      PAPIERROR("pfm_get_event_counters(%d,%p): %s",event->event,&evnt_cnt,pfm_strerror(ret));
      return(PAPI_EBUG);
    }
  if ((ret = pfm_get_impl_counters(&impl_cnt)) != PFMLIB_SUCCESS)
    {
      PAPIERROR("pfm_get_impl_counters(%p): %s", &impl_cnt, pfm_strerror(ret));
      return(PAPI_EBUG);
    }

  /* Make sure this event lives on some counter, if so, put in the description. If not, BUG */
  if ((ret = pfm_get_num_counters(&n)) != PFMLIB_SUCCESS)
    {
      PAPIERROR("pfm_get_num_counters(%d): %s", n, pfm_strerror(ret));
      return(PAPI_EBUG);
    }

  for (j=0;n;j++)
    {
      if (pfm_regmask_isset(&impl_cnt, j))
	{
	  n--;
	  if (pfm_regmask_isset(&evnt_cnt,j))
	    {
	      *native = encode_native_event(event->event,event->num_masks,event->unit_masks);
	      return(PAPI_OK);
	    }
	}
    }

  PAPIERROR("PAPI preset 0x%08x PFM event %d did not have any available counters", event->event, j);
  return(PAPI_ENOEVNT);
}
Exemple #3
0
int
mainloop(char **arg)
{
	ctx_arg_t ctx;
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_load_t load_args;
	pfm_msg_t msg;
	unsigned long ovfl_count = 0UL;
	unsigned long sample_period;
	unsigned long smpl_pmd_mask = 0UL;
	pid_t pid;
	int status, ret, fd;
	unsigned int i, num_counters;

	/*
	 * intialize all locals
	 */
	memset(&ctx, 0, sizeof(ctx));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(pd, 0, sizeof(pd));
	memset(pc, 0, sizeof(pc));

	/*
	 * locate events
	 */
	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");

	i = 2;

	if (i > num_counters) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}
	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;
	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	
	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */

	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	/*
	 * the PMC controlling the event ALWAYS come first, that's why this loop
	 * is safe even when extra PMC are needed to support a particular event.
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
		/* build sampling mask */
		smpl_pmd_mask  |= 1UL << pc[i].reg_num;
	}

	printf("smpl_pmd_mask=0x%lx\n", smpl_pmd_mask);

	/*
	 * now we indicate what to record when each counter overflows.
	 * In our case, we only have one sampling period and it is set for the
	 * first event. Here we indicate that when the sampling period expires
	 * then we want to record the value of all the other counters.
	 *
	 * We exclude the first counter in this case.
	 */
	smpl_pmd_mask  &= ~(1UL << pc[0].reg_num);

	pc[0].reg_smpl_pmds[0] = smpl_pmd_mask;

	/*
	 * we our sampling counter overflow, we want to be notified.
	 * The notification will come ONLY when the sampling buffer
	 * becomes full.
	 *
	 * We also activate randomization of the sampling period.
	 */
	pc[0].reg_flags	|= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM;

	/*
	 * we also want to reset the other PMDs on
	 * every overflow. If we do not set
	 * this, the non-overflowed counters
	 * will be untouched.
	 */
	pc[0].reg_reset_pmds[0] |= smpl_pmd_mask;

	sample_period = 1000000UL;

	pd[0].reg_value       = (~0) - sample_period + 1;
	pd[0].reg_short_reset = (~0) - sample_period + 1;
	pd[0].reg_long_reset  = (~0) - sample_period + 1;
	/*
	 * setup randomization parameters, we allow a range of up to +256 here.
	 */
	pd[0].reg_random_seed = 5;
	pd[0].reg_random_mask = 0xff;


	printf("programming %u PMCS and %u PMDS\n", outp.pfp_pmc_count, inp.pfp_event_count);

	/*
	 * prepare context structure.
	 *
	 * format specific parameters MUST be concatenated to the regular
	 * pfarg_context_t structure. For convenience, the default sampling
	 * format provides a data structure that already combines the pfarg_context_t
	 * with what is needed fot this format.
	 */

	 /*
	  * We initialize the format specific information.
	  * The format is identified by its UUID which must be copied
	  * into the ctx_buf_fmt_id field.
	  */
	memcpy(ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t));

	/*
	 * the size of the buffer is indicated in bytes (not entries).
	 *
	 * The kernel will record into the buffer up to a certain point.
	 * No partial samples are ever recorded.
	 */
	ctx.buf_arg.buf_size = 8192;

	/*
	 * now create our perfmon context.
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}

	/*
	 * extract the file descriptor we will use to
	 * identify this newly created context
	 */
	fd = ctx.ctx_arg.ctx_fd;

	/*
	 * retrieve the virtual address at which the sampling
	 * buffer has been mapped
	 */
	buf_addr = ctx.ctx_arg.ctx_smpl_vaddr;

	printf("context [%d] buffer mapped @%p\n", fd, buf_addr);

	/*
	 * Now program the registers
	 */
	if (perfmonctl(fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}
	/*
	 * initialize the PMDs
	 */
	if (perfmonctl(fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Create the child task
	 */
	if ((pid=fork()) == -1) fatal_error("Cannot fork process\n");

	/*
	 * In order to get the PFM_END_MSG message, it is important
	 * to ensure that the child task does not inherit the file
	 * descriptor of the context. By default, file descriptor
	 * are inherited during exec(). We explicitely close it
	 * here. We could have set it up through fcntl(FD_CLOEXEC)
	 * to achieve the same thing.
	 */
	if (pid == 0) {
		close(fd);
		child(arg);
	}

	/*
	 * wait for the child to exec
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * process is stopped at this point
	 */
	if (WIFEXITED(status)) {
		warning("task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status));
		goto terminate_session;
	}

	/*
	 * attach context to stopped task
	 */
	load_args.load_pid = pid;
	if (perfmonctl(fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}
	/*
	 * activate monitoring for stopped task.
	 * (nothing will be measured at this point
	 */
	if (perfmonctl(fd, PFM_START, NULL, 0) == -1) {
		fatal_error(" perfmonctl error PFM_START errno %d\n",errno);
	}
	/*
	 * detach child. Side effect includes
	 * activation of monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);

	/*
	 * core loop
	 */
	for(;;) {
		/*
		 * wait for overflow/end notification messages
		 */
		ret = read(fd, &msg, sizeof(msg));
		if (ret == -1) {
			fatal_error("cannot read perfmon msg: %s\n", strerror(errno));
		}
		switch(msg.type) {
			case PFM_MSG_OVFL: /* the sampling buffer is full */
				process_smpl_buf(fd, smpl_pmd_mask, 1);
				ovfl_count++;
				break;
			case PFM_MSG_END: /* monitored task terminated */
				printf("task terminated\n");
				goto terminate_session;
			default: fatal_error("unknown message type %d\n", msg.type);
		}
	}
terminate_session:
	/*
	 * cleanup child
	 */
	waitpid(pid, &status, 0);

	/*
	 * check for any leftover samples
	 */
	process_smpl_buf(fd, smpl_pmd_mask, 0);

	/*
	 * destroy perfmon context
	 */
	close(fd);

	printf("%lu samples collected in %lu buffer overflows\n", collect_samples, ovfl_count);

	return 0;
}
Exemple #4
0
int
_papi_libpfm_init(papi_vector_t *my_vector, int cidx) {

   int retval;
   unsigned int ncnt;
   unsigned int version;
   char pmu_name[PAPI_MIN_STR_LEN];


   /* The following checks the version of the PFM library
      against the version PAPI linked to... */
   SUBDBG( "pfm_initialize()\n" );
   if ( ( retval = pfm_initialize(  ) ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_initialize(): %s", pfm_strerror( retval ) );
      return PAPI_ESYS;
   }

   /* Get the libpfm3 version */
   SUBDBG( "pfm_get_version(%p)\n", &version );
   if ( pfm_get_version( &version ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_version(%p): %s", version, pfm_strerror( retval ) );
      return PAPI_ESYS;
   }

   /* Set the version */
   sprintf( my_vector->cmp_info.support_version, "%d.%d",
	    PFM_VERSION_MAJOR( version ), PFM_VERSION_MINOR( version ) );

   /* Complain if the compiled-against version doesn't match current version */
   if ( PFM_VERSION_MAJOR( version ) != PFM_VERSION_MAJOR( PFMLIB_VERSION ) ) {
      PAPIERROR( "Version mismatch of libpfm: compiled %#x vs. installed %#x\n",
				   PFM_VERSION_MAJOR( PFMLIB_VERSION ),
				   PFM_VERSION_MAJOR( version ) );
      return PAPI_ESYS;
   }

   /* Always initialize globals dynamically to handle forks properly. */

   _perfmon2_pfm_pmu_type = -1;

   /* Opened once for all threads. */
   SUBDBG( "pfm_get_pmu_type(%p)\n", &_perfmon2_pfm_pmu_type );
   if ( pfm_get_pmu_type( &_perfmon2_pfm_pmu_type ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_pmu_type(%p): %s", _perfmon2_pfm_pmu_type,
				   pfm_strerror( retval ) );
      return PAPI_ESYS;
   }

   pmu_name[0] = '\0';
   if ( pfm_get_pmu_name( pmu_name, PAPI_MIN_STR_LEN ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_pmu_name(%p,%d): %s", pmu_name, PAPI_MIN_STR_LEN,
				   pfm_strerror( retval ) );
      return PAPI_ESYS;
   }
   SUBDBG( "PMU is a %s, type %d\n", pmu_name, _perfmon2_pfm_pmu_type );

   /* Setup presets */
   retval = _papi_load_preset_table( pmu_name, _perfmon2_pfm_pmu_type, cidx );
   if ( retval )
      return retval;

   /* Fill in cmp_info */

   SUBDBG( "pfm_get_num_events(%p)\n", &ncnt );
   if ( ( retval = pfm_get_num_events( &ncnt ) ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_num_events(%p): %s\n", &ncnt,
				   pfm_strerror( retval ) );
      return PAPI_ESYS;
   }
   SUBDBG( "pfm_get_num_events: %d\n", ncnt );
   my_vector->cmp_info.num_native_events = ncnt;
   num_native_events = ncnt;

   pfm_get_num_counters( ( unsigned int * ) &my_vector->cmp_info.num_cntrs );
   SUBDBG( "pfm_get_num_counters: %d\n", my_vector->cmp_info.num_cntrs );


   if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) {
     /* Pentium4 */
     if ( _papi_hwi_system_info.hw_info.cpuid_family == 15 ) {
       PAPI_NATIVE_EVENT_AND_MASK = 0x000000ff;
       PAPI_NATIVE_UMASK_AND_MASK = 0x0fffff00;
       PAPI_NATIVE_UMASK_SHIFT = 8;
       /* Itanium2 */
     } else if ( _papi_hwi_system_info.hw_info.cpuid_family == 31 ||
		 _papi_hwi_system_info.hw_info.cpuid_family == 32 ) {
       PAPI_NATIVE_EVENT_AND_MASK = 0x00000fff;
       PAPI_NATIVE_UMASK_AND_MASK = 0x0ffff000;
       PAPI_NATIVE_UMASK_SHIFT = 12;
     }
   }


   return PAPI_OK;
}
Exemple #5
0
int
parent(char **arg)
{
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_context_t ctx[1];
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_load_t load_args;
	unsigned int i, num_counters;
	int status, ret;
	int ctx_fd;
	pid_t pid;
	char name[MAX_EVT_NAME_LEN];

	memset(pc, 0, sizeof(ctx));
	memset(pd, 0, sizeof(ctx));
	memset(ctx, 0, sizeof(ctx));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(&load_args,0, sizeof(load_args));

	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");
	i = 2;

	if (num_counters < i) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;

	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * now create a context. we will later attach it to the task we are creating.
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/*
	 * extract the identifier for our context
	 */
	ctx_fd = ctx[0].ctx_fd;

	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */
	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	/*
	 * the PMC controlling the event ALWAYS come first, that's why this loop
	 * is safe even when extra PMC are needed to support a particular event.
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
	}

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann counting monitors.
	 */

	if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}

	if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Create the child task
	 */
	if ((pid=fork()) == -1) fatal_error("Cannot fork process\n");

	/*
	 * and launch the child code
	 */
	if (pid == 0) exit(child(arg));

	/*
	 * wait for the child to exec
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * check if process exited early
	 */
	if (WIFEXITED(status)) {
		fatal_error("command %s exited too early with status %d\n", arg[0], WEXITSTATUS(status));
	}
	/*
	 * the task is stopped at this point
	 */
	
	
	/*
	 * now we load (i.e., attach) the context to ourself
	 */
	load_args.load_pid = pid;

	if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}

	/*
	 * activate monitoring. The task is still STOPPED at this point. Monitoring
	 * will not take effect until the execution of the task is resumed.
	 */
	if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_START errno %d\n",errno);
	}

	/*
	 * now resume execution of the task, effectively activating
	 * monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);

	/*
	 * now the task is running
	 */

	/*
	 * simply wait for completion
	 */
	waitpid(pid, &status, 0);

	/*
	 * the task has disappeared at this point but our context is still
	 * present and contains all the latest counts.
	 */

	/*
	 * now simply read the results.
	 */
	if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error READ_PMDS errno %d\n",errno);
		return -1;
	}
	/*
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN);
		printf("PMD%u %20"PRIu64" %s\n",
			pd[i].reg_num,
			pd[i].reg_value,
			name);
	}
	/*
	 * free the context
	 */
	close(ctx_fd);

	return 0;
}
int
main(int argc, char **argv)
{
	char **p;
	int i, ret;
	pid_t pid = getpid();
	pfmlib_param_t evt;
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_context_t ctx[1];
	pfmlib_options_t pfmlib_options;

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	if (pfm_initialize() != PFMLIB_SUCCESS) {
		printf("Can't initialize library\n");
		exit(1);
	}
	
	/* 
	 * check that the user did not specify too many events
	 */
	if (argc-1 > pfm_get_num_counters()) {
		printf("Too many events specified\n");
		exit(1);
	}

	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
	pfm_set_options(&pfmlib_options);

	memset(pd, 0, sizeof(pd));
	memset(ctx, 0, sizeof(ctx));

	/*
	 * prepare parameters to library. we don't use any Itanium
	 * specific features here. so the pfp_model is NULL.
	 */
	memset(&evt,0, sizeof(evt));

	/*
	 * be nice to user!
	 */
	p = argc > 1 ? argv+1 : event_list;
	for (i=0; *p ; i++, p++) {
		if (pfm_find_event(*p, &evt.pfp_events[i].event) != PFMLIB_SUCCESS) {
			fatal_error("Cannot find %s event\n", *p);
		}
	}

	/*
	 * set the default privilege mode for all counters:
	 * 	PFM_PLM3 : user level only
	 */
	evt.pfp_dfl_plm   = PFM_PLM3; 

	/*
	 * how many counters we use
	 */
	evt.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&evt)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * for this example, we have decided not to get notified
	 * on counter overflows and the monitoring is not to be inherited
	 * in derived tasks.
	 */
	ctx[0].ctx_flags = PFM_FL_INHERIT_NONE;

	/*
	 * now create the context for self monitoring/per-task
	 */
	if (perfmonctl(pid, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/* 
	 * Must be done before any PMD/PMD calls (unfreeze PMU). Initialize
	 * PMC/PMD to safe values. psr.up is cleared.
	 */
	if (perfmonctl(pid, PFM_ENABLE, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_ENABLE errno %d\n",errno);
	}

	/*
	 * Now prepare the argument to initialize the PMDs.
	 * the memset(pd) initialized the entire array to zero already, so
	 * we just have to fill in the register numbers from the pc[] array.
	 */
	for (i=0; i < evt.pfp_event_count; i++) {
		pd[i].reg_num = evt.pfp_pc[i].reg_num;
	}
	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann coutning monitors.
	 */
	if (perfmonctl(pid, PFM_WRITE_PMCS, evt.pfp_pc, evt.pfp_pc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}
	if (perfmonctl(pid, PFM_WRITE_PMDS, pd, evt.pfp_event_count) == -1) {
		{int i; for(i=0; i < evt.pfp_event_count; i++) printf("pmd%d: 0x%x\n", i, pd[i].reg_flags);}
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Let's roll now
	 */
	pfm_start();

	noploop(10000000);

	pfm_stop();

	/* 
	 * now read the results
	 */
	if (perfmonctl(pid, PFM_READ_PMDS, pd, evt.pfp_event_count) == -1) {
		fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno);
		return -1;
	}
	/* 
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < evt.pfp_event_count; i++) {
		char *name;
		pfm_get_event_name(evt.pfp_events[i].event, &name);
		printf("PMD%u %20lu %s\n", 
			pd[i].reg_num, 
			pd[i].reg_value, 
			name);
	}
	/* 
	 * let's stop this now
	 */
	if (perfmonctl(pid, PFM_DESTROY_CONTEXT, NULL, 0) == -1) {
		fatal_error( "child: perfmonctl error PFM_DESTROY errno %d\n",errno);
	}

	return 0;
}
int
parent(pid_t pid, unsigned long delay)
{
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_pmr_t pc[NUM_PMCS];
	pfarg_pmr_t pd[NUM_PMDS];
	pfarg_sinfo_t sif;
	struct pollfd pollfd;
	pfarg_msg_t msg;
	unsigned int i, num_counters;
	int status, ret;
	int ctx_fd;
	char name[MAX_EVT_NAME_LEN];

	memset(pc, 0, sizeof(pc));
	memset(pd, 0, sizeof(pd));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(&sif,0, sizeof(sif));

	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");
	i = 2;

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;

	if (i > num_counters) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}
	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * now create a session. we will later attach it to the task we are creating.
	 */
	ctx_fd = pfm_create(0, &sif);
	if (ctx_fd == -1) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("cannot create session %s\n", strerror(errno));
	}
	/*
	 * build the pfp_unavail_pmcs bitmask by looking
	 * at what perfmon has available. It is not always
	 * the case that all PMU registers are actually available
	 * to applications. For instance, on IA-32 platforms, some
	 * registers may be reserved for the NMI watchdog timer.
	 *
	 * With this bitmap, the library knows which registers NOT to
	 * use. Of source, it is possible that no valid assignement may
	 * be possible if certina PMU registers  are not available.
	 */
	detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL);

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * use our file descriptor for the poll.
	 * we are interested in read events only.
	 */
	pollfd.fd     = ctx_fd;
	pollfd.events = POLLIN;

	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */
	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}
	for (i=0; i < outp.pfp_pmd_count; i++)
		pd[i].reg_num = outp.pfp_pmds[i].reg_num;
	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann counting monitors.
	 */

	if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1)
		fatal_error("pfm_write error errno %d\n",errno);

	/*
	 * To be read, each PMD must be either written or declared
	 * as being part of a sample (reg_smpl_pmds)
	 */
	if (pfm_write(ctx_fd, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1)
		fatal_error("pfm_write(PMD) error errno %d\n",errno);

	ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
	if (ret == -1)
		fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));

	/*
	 * wait for the child to be actually stopped
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * check if process exited early
	 */
	if (WIFEXITED(status))
		fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status));

	/*
	 * the task is stopped at this point
	 */
	
	/*
	 * now we attach (i.e., attach) the session to ourself
	 */
	if (pfm_attach(ctx_fd, 0, pid) == -1)
		fatal_error("pfm_attach error errno %d\n",errno);

	/*
	 * activate monitoring. The task is still STOPPED at this point. Monitoring
	 * will not take effect until the execution of the task is resumed.
	 */
	if (pfm_set_state(ctx_fd, 0, PFM_ST_START) == -1)
		fatal_error("pfm_set_state(start) error errno %d\n",errno);

	/*
	 * now resume execution of the task, effectively activating
	 * monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);
	printf("attached to [%d], timeout set to %lu seconds\n", pid, delay);

	/*
	 * now the task is running
	 */

	/*
	 * We cannot simply do a waitpid() because we may be attaching to a process
	 * totally unrelated to our program. Instead we use a perfmon facility that
	 * notifies us when the monitoring task is exiting.
	 *
	 * When a task with a monitoring session attached to it exits, a PFM_MSG_END
	 * is generated. It can be retrieve with a simple read() on the session's descriptor.
	 *
	 * Another reason why you might return from the read is if there was a counter
	 * overflow, unlikely in this example.
	 *
	 * To measure only for short period of time, use select or poll with a timeout,
	 * see task_attach_timeout.c
	 *
	 */
	ret = poll(&pollfd, 1, delay*1000);
	switch( ret ) {
		case -1:
			fatal_error("cannot read from descriptor: %s\n", strerror(errno));
			/* no return */
		case  1:
			/*
	 		 * there is a message, i.e., the program exited before our timeout
	 		 */
			if (ret == 1) {
				/*
		 		* extract message
		 		*/
				ret = read(ctx_fd, &msg, sizeof(msg));

				if (msg.type != PFM_MSG_END)
					fatal_error("unexpected msg type : %d\n", msg.type);
			}
			break;
		case   0:
			/*
			 * we timed out, we need to stop the task to unload
			 */
			ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
			if (ret == -1)
				fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));
			/*
			 * wait for task to be actually stopped
			 */
			waitpid(pid, &status, WUNTRACED);

			/*
	 		 * check if process exited, then no need to unload
	 		 */
			if (WIFEXITED(status)) goto read_results;

			if (pfm_attach(ctx_fd, 0, PFM_NO_TARGET) == -1)
				fatal_error("pfm_detach error errno %d\n",errno);

			/*
			 * let it run free again
			 */
			ptrace(PTRACE_DETACH, pid, NULL, 0);
			break;
		default:
			fatal_error("unexpected return from poll: %d\n", ret);
	}

read_results:
	/*
	 * now simply read the results.
	 */
	if (pfm_read(ctx_fd, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) {
		fatal_error("pfm_read(PMD) error errno %d\n",errno);
		return -1;
	}

	/*
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN);
		printf("PMD%-3u %20"PRIu64" %s\n",
			pd[i].reg_num,
			pd[i].reg_value,
			name);
	}
	/*
	 * free the session 
	 */
	close(ctx_fd);

	return 0;
}
Exemple #8
0
int
main(int argc, char **argv)
{
	pfarg_ctx_t ctx;
	pfarg_pmc_t pc[NUM_PMCS];
	pfarg_pmd_t *pd;
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	uint64_t cpu_list;
	void *desc;
	unsigned int num_counters;
	uint32_t i, j, k, l, ncpus, npmds;
	size_t len;
	int ret;
	char *name;

	if (pfm_initialize() != PFMLIB_SUCCESS)
		fatal_error("cannot initialize libpfm\n");

	if (pfms_initialize())
		fatal_error("cannot initialize libpfms\n");

	pfm_get_num_counters(&num_counters);
	pfm_get_max_event_name_len(&len);

	name = malloc(len+1);
	if (name == NULL)
		fatal_error("cannot allocate memory for event name\n");

	memset(&ctx, 0, sizeof(ctx));
	memset(pc, 0, sizeof(pc));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));

	cpu_list = argc > 1 ? strtoul(argv[1], NULL, 0) : 0x3;

	ncpus = popcount(cpu_list);

		if (pfm_get_cycle_event(&inp.pfp_events[0].event) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1].event) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");

	i = 2;

	inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0;

	if (i > num_counters) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}
	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * indicate we are using the monitors for a system-wide session.
	 * This may impact the way the library sets up the PMC values.
	 */
	inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS)
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));

	npmds = ncpus * inp.pfp_event_count;
	dprint("ncpus=%u npmds=%u\n", ncpus, npmds);

	pd = calloc(npmds, sizeof(pfarg_pmd_t));
	if (pd == NULL)
		fatal_error("cannot allocate pd array\n");

	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	for(l=0, k = 0; l < ncpus; l++) {
		for (i=0, j=0; i < inp.pfp_event_count; i++, k++) {
			pd[k].reg_num   = outp.pfp_pmcs[j].reg_pmd_num;
			for(; j < outp.pfp_pmc_count; j++)  if (outp.pfp_pmcs[j].reg_evt_idx != i) break;
		}
	}

	/*
	 * create a context on all CPUs we asked for
	 *
	 * libpfms only works for system-wide, so we set the flag in
	 * the master context. the context argument is not modified by
	 * call.
	 *
	 * desc is an opaque descriptor used to identify session.
	 */
	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;

	ret = pfms_create(&cpu_list, 1, &ctx, NULL, &desc);
	if (ret == -1)
		fatal_error("create error %d\n", ret);

	/*
	 * program the PMC registers on all CPUs of interest
	 */
	ret = pfms_write_pmcs(desc, pc, outp.pfp_pmc_count);
	if (ret == -1)
		fatal_error("write_pmcs error %d\n", ret);

	/*
	 * program the PMD registers on all CPUs of interest
	 */
	ret = pfms_write_pmds(desc, pd, inp.pfp_event_count);
	if (ret == -1)
		fatal_error("write_pmds error %d\n", ret);

	/*
	 * load context on all CPUs of interest
	 */
	ret = pfms_load(desc);
	if (ret == -1)
		fatal_error("load error %d\n", ret);

	/*
	 * start monitoring on all CPUs of interest
	 */
	ret = pfms_start(desc);
	if (ret == -1)
		fatal_error("start error %d\n", ret);

	/*
	 * simulate some work
	 */
	sleep(10);

	/*
	 * stop monitoring on all CPUs of interest
	 */
	ret = pfms_stop(desc);
	if (ret == -1)
		fatal_error("stop error %d\n", ret);
	
	/*
	 * read the PMD registers on all CPUs of interest.
	 * The pd[] array must be organized such that to
	 * read 2 PMDs on each CPU you need:
	 * 	- 2 * number of CPUs of interest
	 * 	- the first 2 elements of pd[] read on 1st CPU
	 * 	- the next  2 elements of pd[] read on the 2nd CPU
	 * 	- and so on
	 */
	ret = pfms_read_pmds(desc, pd, npmds);
	if (ret == -1)
		fatal_error("read_pmds error %d\n", ret);

	/*
	 * pre per-CPU results
	 */
	for(j=0, k= 0; j < ncpus; j++) {
		for (i=0; i < inp.pfp_event_count; i++, k++) {
			pfm_get_full_event_name(&inp.pfp_events[i], name, len);
			printf("CPU%-3d PMD%u %20"PRIu64" %s\n",
			j,
			pd[k].reg_num,
			pd[k].reg_value,
			name);
		}
	}

	/*
	 * destroy context  on all CPUs of interest.
	 * After this call desc is invalid
	 */
	ret = pfms_close(desc);
	if (ret == -1)
		fatal_error("close error %d\n", ret);

	free(name);

	return 0;
}
int
parent(pid_t pid, unsigned long delay)
{
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_context_t ctx[1];
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_load_t load_args;
	struct pollfd pollfd;
	pfm_msg_t msg;
	unsigned int i, num_counters;
	int status, ret;
	int ctx_fd;
	char name[MAX_EVT_NAME_LEN];


	memset(pc, 0, sizeof(ctx));
	memset(pd, 0, sizeof(ctx));
	memset(ctx, 0, sizeof(ctx));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(&load_args,0, sizeof(load_args));

	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");
	i = 2;

	if (num_counters < i) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;

	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * now create a context. we will later attach it to the task we are creating.
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/*
	 * extract the identifier for our context
	 */
	ctx_fd = ctx[0].ctx_fd;

	/*
	 * use our file descriptor for the poll.
	 * we are interested in read events only.
	 */
	pollfd.fd     = ctx_fd;
	pollfd.events = POLLIN;


	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */
	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	/*
	 * the PMC controlling the event ALWAYS come first, that's why this loop
	 * is safe even when extra PMC are needed to support a particular event.
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
	}

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann counting monitors.
	 */

	if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}

	if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
	if (ret == -1) {
		fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));
	}

	/*
	 * wait for the child to be actually stopped
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * check if process exited early
	 */
	if (WIFEXITED(status)) {
		fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status));
	}

	/*
	 * the task is stopped at this point
	 */
	
	/*
	 * now we load (i.e., attach) the context to ourself
	 */
	load_args.load_pid = pid;

	if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}

	/*
	 * activate monitoring. The task is still STOPPED at this point. Monitoring
	 * will not take effect until the execution of the task is resumed.
	 */
	if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_START errno %d\n",errno);
	}

	/*
	 * now resume execution of the task, effectively activating
	 * monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);
	printf("attached to [%d], timeout set to %lu seconds\n", pid, delay);

	/*
	 * now the task is running
	 */

	/*
	 * We cannot simply do a waitpid() because we may be attaching to a process
	 * totally unrelated to our program. Instead we use a perfmon facility that
	 * notifies us when the monitoring task is exiting.
	 *
	 * When a task with a monitoring context attached to it exits, a PFM_MSG_END
	 * is generated. It can be retrieve with a simple read() on the context's descriptor.
	 *
	 * Another reason why you might return from the read is if there was a counter
	 * overflow, unlikely in this example.
	 *
	 * To measure only for short period of time, use select or poll with a timeout,
	 * see task_attach_timeout.c
	 *
	 */
	ret = poll(&pollfd, 1, delay*1000);
	switch( ret ) {
		case -1:
			fatal_error("cannot read from descriptor: %s\n", strerror(errno));
			/* no return */
		case  1:
			/*
	 		 * there is a message, i.e., the program exited before our timeout
	 		 */
			if (ret == 1) {
				/*
		 		* extract message
		 		*/
				ret = read(ctx_fd, &msg, sizeof(msg));

				if (msg.type != PFM_MSG_END) {
					fatal_error("unexpected msg type : %d\n", msg.type);
				}
			}
			break;
		case   0:
			/*
			 * we timed out, we need to stop the task to unload
			 */
			ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
			if (ret == -1) {
				fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));
			}
			/*
			 * wait for task to be actually stopped
			 */
			waitpid(pid, &status, WUNTRACED);

			/*
	 		 * check if process exited, then no need to unload
	 		 */
			if (WIFEXITED(status)) goto read_results;

			if (perfmonctl(ctx_fd, PFM_UNLOAD_CONTEXT, NULL, 0) == -1) {
				fatal_error("perfmonctl error PFM_UNLOAD_CONTEXT errno %d\n",errno);
			}

			/*
			 * let it run free again
			 */
			ptrace(PTRACE_DETACH, pid, NULL, 0);
			break;
		default:
			fatal_error("unexpected return from poll: %d\n", ret);
	}

read_results:
	/*
	 * now simply read the results.
	 */
	if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error READ_PMDS errno %d\n",errno);
		return -1;
	}

	/*
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN);
		printf("PMD%u %20"PRIu64" %s\n",
			pd[i].reg_num,
			pd[i].reg_value,
			name);
	}
	/*
	 * free the context
	 */
	close(ctx_fd);

	return 0;
}
Exemple #10
0
int
main(int argc, char **argv)
{
	pfarg_context_t ctx[1];
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_load_t load_args;
	pfmlib_options_t pfmlib_options;
	struct sigaction act;
	size_t len;
	unsigned int i, num_counters;
	int ret;

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	if (pfm_initialize() != PFMLIB_SUCCESS) {
		printf("Can't initialize library\n");
		exit(1);
	}

	/*
	 * Install the signal handler (SIGIO)
	 */
	memset(&act, 0, sizeof(act));
	act.sa_handler = (sig_t)sigio_handler;
	sigaction (SIGIO, &act, 0);

	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
	pfm_set_options(&pfmlib_options);

	memset(pc, 0, sizeof(pc));
	memset(ctx, 0, sizeof(ctx));
	memset(&load_args, 0, sizeof(load_args));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));

	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");

	i = 2;

	if (i > num_counters) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}

	/*
	 * set the default privilege mode for all counters:
	 * 	PFM_PLM3 : user level only
	 */
	inp.pfp_dfl_plm = PFM_PLM3;

	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * how many counters we use
	 */
	if (i > 1) {
		inp.pfp_event_count = i;

		pfm_get_max_event_name_len(&len);

		event1_name = malloc(len+1);
		if (event1_name == NULL)
			fatal_error("cannot allocate event name\n");

		pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1);
	}

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("Cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * when we know we are self-monitoring and we have only one context, then
	 * when we get an overflow we know where it is coming from. Therefore we can
	 * save the call to the kernel to extract the notification message. By default,
	 * a message is generated. The queue of messages has a limited size, therefore
	 * it is important to clear the queue by reading the message on overflow. Failure
	 * to do so may result in a queue full and you will lose notification messages.
	 *
	 * With the PFM_FL_OVFL_NO_MSG, no message will be queue, but you will still get
	 * the signal. Similarly, the PFM_MSG_END will be generated.
	 */
	ctx[0].ctx_flags = PFM_FL_OVFL_NO_MSG;

	/*
	 * now create the context for self monitoring/per-task
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	ctx_fd = ctx->ctx_fd;

	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We use pfp_pmc_count to determine the number of registers to
	 * setup. Note that this field can be >= pfp_event_count.
	 */

	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
	}
	/*
	 * We want to get notified when the counter used for our first
	 * event overflows
	 */
	pc[0].reg_flags 	|= PFM_REGFL_OVFL_NOTIFY;
	pc[0].reg_reset_pmds[0] |= 1UL << outp.pfp_pmcs[1].reg_num;

	/*
	 * we arm the first counter, such that it will overflow
	 * after SMPL_PERIOD events have been observed
	 */
	pd[0].reg_value       = (~0UL) - SMPL_PERIOD + 1;
	pd[0].reg_long_reset  = (~0UL) - SMPL_PERIOD + 1;
	pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD + 1;

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more than counting monitors.
	 */
	if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}

	if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * we want to monitor ourself
	 */
	load_args.load_pid = getpid();

	if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * setup asynchronous notification on the file descriptor
	 */
	ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC);
	if (ret == -1) {
		fatal_error("cannot set ASYNC: %s\n", strerror(errno));
	}

	/*
	 * get ownership of the descriptor
	 */
	ret = fcntl(ctx_fd, F_SETOWN, getpid());
	if (ret == -1) {
		fatal_error("cannot setown: %s\n", strerror(errno));
	}

	/*
	 * Let's roll now
	 */
	pfm_self_start(ctx_fd);

	busyloop();

	pfm_self_stop(ctx_fd);

	/*
	 * free our context
	 */
	close(ctx_fd);

	return 0;
}