Example #1
0
pfm_instance::pfm_instance()
{
	int ret = pfm_initialize();
        if (ret != PFM_SUCCESS)
                errx(1, "cannot initialize library: %s", pfm_strerror(ret));
	return;
}
Example #2
0
void perfevent_initialize(char*events) {
  int i, ret;
  perfevent_events = strdup(events);

  /*
   * Initialize pfm library (required before we can use it)
   */
  ret = pfm_initialize();
  if (ret != PFM_SUCCESS)
    errx(1, "Cannot initialize library: %s", pfm_strerror(ret));

  ret = perf_setup_list_events(events, &perfevent_fds, &perfevent_num_fds);
  if (ret || !perfevent_num_fds)
    errx(1, "cannot setup events");

  perfevent_fds[0].fd = -1;
  for(i=0; i < perfevent_num_fds; i++) {
    /* request timing information necessary for scaling */
    perfevent_fds[i].hw.read_format = PERF_FORMAT_SCALE;

    perfevent_fds[i].hw.disabled = (i == 0); /* do not start now */
    perfevent_fds[i].hw.inherit = 1; /* pass on to child threads */

    /* each event is in an independent group (multiplexing likely) */
    perfevent_fds[i].fd = perf_event_open(&perfevent_fds[i].hw, 0, -1, perfevent_fds[0].fd, 0);
    if (perfevent_fds[i].fd == -1)
      err(1, "cannot open event %d", i);
  }
}
int
main(int argc, char **argv)
{
	pfmlib_options_t pfmlib_options;
	unsigned long delay;
	pid_t pid;
	int ret;

	if (argc < 2)
		fatal_error("usage: %s pid [timeout]\n", argv[0]);

	pid   = atoi(argv[1]);
	delay = argc > 2 ? strtoul(argv[2], NULL, 10) : 10;

	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
	pfm_set_options(&pfmlib_options);

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	ret = pfm_initialize();
	if (ret != PFMLIB_SUCCESS)
		fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret));

	return parent(pid, delay);
}
static int setup_preset_term(int *native, pfmlib_event_t *event)
{
    /* It seems this could be greatly simplified. If impl_cnt is non-zero,
	the event lives on a counter. Therefore the entire routine could be:
	if (impl_cnt!= 0) encode_native_event.
	Am I wrong?
    */
  pfmlib_regmask_t impl_cnt, evnt_cnt;
  unsigned int n;
  int j, ret;

  /* find out which counters it lives on */
  if ((ret = pfm_get_event_counters(event->event,&evnt_cnt)) != PFMLIB_SUCCESS)
    {
      PAPIERROR("pfm_get_event_counters(%d,%p): %s",event->event,&evnt_cnt,pfm_strerror(ret));
      return(PAPI_EBUG);
    }
  if ((ret = pfm_get_impl_counters(&impl_cnt)) != PFMLIB_SUCCESS)
    {
      PAPIERROR("pfm_get_impl_counters(%p): %s", &impl_cnt, pfm_strerror(ret));
      return(PAPI_EBUG);
    }

  /* Make sure this event lives on some counter, if so, put in the description. If not, BUG */
  if ((ret = pfm_get_num_counters(&n)) != PFMLIB_SUCCESS)
    {
      PAPIERROR("pfm_get_num_counters(%d): %s", n, pfm_strerror(ret));
      return(PAPI_EBUG);
    }

  for (j=0;n;j++)
    {
      if (pfm_regmask_isset(&impl_cnt, j))
	{
	  n--;
	  if (pfm_regmask_isset(&evnt_cnt,j))
	    {
	      *native = encode_native_event(event->event,event->num_masks,event->unit_masks);
	      return(PAPI_OK);
	    }
	}
    }

  PAPIERROR("PAPI preset 0x%08x PFM event %d did not have any available counters", event->event, j);
  return(PAPI_ENOEVNT);
}
int *fd_group_init(int num_events, char **events) {
  struct perf_event_attr *attr;
  int fd, ret, leader_fd, i;
  int *fds = malloc(num_events * sizeof(int));
  if (!fds) err(1, "could not allocate memory");
  attr = calloc(num_events, sizeof(*attr));
  // do refs, then misses
  for (i = 0; i < num_events; i++) {
    /*
     * 1st argument: event string
     * 2nd argument: default privilege level (used if not specified in the event string)
     * 3rd argument: the perf_event_attr to initialize
     */
    ret = pfm_get_perf_event_encoding(events[i], PFM_PLM3, &attr[i], NULL, NULL);
    if (ret != PFM_SUCCESS) errx(1, "evt %d: cannot find encoding: %s", i, pfm_strerror(ret));
    printf("Using encoding %lx for event %s\n", attr[i].config, events[i]);

    attr[i].inherit = 0; // inheritance currently doesn't work with FORMAT_GROUP

    /*
     * request timing information because event may be multiplexed
     * and thus it may not count all the time. The scaling information
     * will be used to scale the raw count as if the event had run all
     * along
     */
    attr[i].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING
                          | PERF_FORMAT_GROUP;

    /* do not start immediately after perf_event_open() */
    attr[i].disabled = 1;

    /*
     * create the event and attach to self
     * Note that it attaches only to the main thread, there is no inheritance
     * to threads that may be created subsequently.
     *
     * if mulithreaded, then getpid() must be replaced by gettid()
     */
    if (i == 0) {
      fd = perf_event_open(&attr[i], 0, -1, -1, 0);
      fds[i] = fd;
      leader_fd = fd;
    } else {
      fd = perf_event_open(&attr[i], 0, -1, leader_fd, 0);
      fds[i] = fd;
    }
    if (fd < 0) {
      warn("warning: evt %d: cannot create event", i);
      free(attr);
      free(fds);
      return NULL;
    }
  }
  free(attr);
  return fds;
}
Example #6
0
static int check_test_events(FILE *fp)
{
	const test_event_t *e;
	char *fstr;
	uint64_t *codes;
	int count, i, j;
	int ret, errors = 0;

	for (i=0, e = x86_test_events; i < NUM_TEST_EVENTS; i++, e++) {
		codes = NULL;
		count = 0;
		fstr = NULL;
		ret = pfm_get_event_encoding(e->name, PFM_PLM0 | PFM_PLM3, &fstr, NULL, &codes, &count);
		if (ret != e->ret) {
			fprintf(fp,"Event%d %s, ret=%s(%d) expected %s(%d)\n", i, e->name, pfm_strerror(ret), ret, pfm_strerror(e->ret), e->ret);
			errors++;
		} else {
			if (ret != PFM_SUCCESS) {
				if (fstr) {
					fprintf(fp,"Event%d %s, expected fstr NULL but it is not\n", i, e->name);
					errors++;
				}
				if (count != 0) {
					fprintf(fp,"Event%d %s, expected count=0 instead of %d\n", i, e->name, count);
					errors++;
				}
				if (codes) {
					fprintf(fp,"Event%d %s, expected codes[] NULL but it is not\n", i, e->name);
					errors++;
				}
			} else {
				if (count != e->count) {
					fprintf(fp,"Event%d %s, count=%d expected %d\n", i, e->name, count, e->count);
					errors++;
				}
				for (j=0; j < count; j++) {
					if (codes[j] != e->codes[j]) {
						fprintf(fp,"Event%d %s, codes[%d]=%#"PRIx64" expected %#"PRIx64"\n", i, e->name, j, codes[j], e->codes[j]);
						errors++;
					}
				}
				if (e->fstr && strcmp(fstr, e->fstr)) {
					fprintf(fp,"Event%d %s, fstr=%s expected %s\n", i, e->name, fstr, e->fstr);
					errors++;
				}
			}
		}
		if (codes)
			free(codes);
		if (fstr)
			free(fstr);
	}
	printf("\t %d x86 events: %d errors\n", i, errors);
	return errors;
}
Example #7
0
int
_papi_libpfm_ntv_enum_events( unsigned int *EventCode, int modifier )
{
	unsigned int event, umask, num_masks;
	int ret;

	if ( modifier == PAPI_ENUM_FIRST ) {
		*EventCode = PAPI_NATIVE_MASK;	/* assumes first native event is always 0x4000000 */
		return ( PAPI_OK );
	}

	if ( _pfm_decode_native_event( *EventCode, &event, &umask ) != PAPI_OK )
		return ( PAPI_ENOEVNT );

	ret = pfm_get_num_event_masks( event, &num_masks );
	if ( ret != PFMLIB_SUCCESS ) {
		PAPIERROR( "pfm_get_num_event_masks(%d,%p): %s", event, &num_masks,
				   pfm_strerror( ret ) );
		return ( PAPI_ENOEVNT );
	}
	if ( num_masks > PAPI_NATIVE_UMASK_MAX )
		num_masks = PAPI_NATIVE_UMASK_MAX;
	SUBDBG( "This is umask %d of %d\n", umask, num_masks );

	if ( modifier == PAPI_ENUM_EVENTS ) {
		if ( event < ( unsigned int ) num_native_events - 1 ) {
			*EventCode =
				( unsigned int ) encode_native_event_raw( event + 1, 0 );
			return ( PAPI_OK );
		}
		return ( PAPI_ENOEVNT );
	} else if ( modifier == PAPI_NTV_ENUM_UMASK_COMBOS ) {
		if ( umask + 1 < ( unsigned int ) ( 1 << num_masks ) ) {
			*EventCode =
				( unsigned int ) encode_native_event_raw( event, umask + 1 );
			return ( PAPI_OK );
		}
		return ( PAPI_ENOEVNT );
	} else if ( modifier == PAPI_NTV_ENUM_UMASKS ) {
		int thisbit = ffs( ( int ) umask );

		SUBDBG( "First bit is %d in %08x\b\n", thisbit - 1, umask );
		thisbit = 1 << thisbit;

		if ( thisbit & ( ( 1 << num_masks ) - 1 ) ) {
			*EventCode =
				( unsigned int ) encode_native_event_raw( event,
														  ( unsigned int )
														  thisbit );
			return ( PAPI_OK );
		}
		return ( PAPI_ENOEVNT );
	} else
		return ( PAPI_EINVAL );
}
Example #8
0
 EXTERNAL void sysPerfEventCreate(int id, const char *eventName)
 {
   TRACE_PRINTF("%s: sysPerfEventCreate\n", Me);
   struct perf_event_attr *pe = (perf_event_attrs + id);
   int ret = pfm_get_perf_event_encoding(eventName, PFM_PLM3, pe, NULL, NULL);
   if (ret != PFM_SUCCESS) {
     errx(1, "error creating event %d '%s': %s\n", id, eventName, pfm_strerror(ret));
   }
   pe->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
   pe->disabled = 1;
   pe->inherit = 1;
   perf_event_fds[id] = perf_event_open(pe, 0, -1, -1, 0);
   if (perf_event_fds[id] == -1) {
     err(1, "error in perf_event_open for event %d '%s'", id, eventName);
   }
 }
Example #9
0
/* convert the mask values in a pfm event structure into a PAPI unit mask */
static inline unsigned int
convert_pfm_masks( pfmlib_event_t * gete )
{
	int ret;
	unsigned int i, code, tmp = 0;

	for ( i = 0; i < gete->num_masks; i++ ) {
		if ( ( ret =
			   pfm_get_event_mask_code( gete->event, gete->unit_masks[i],
										&code ) ) == PFMLIB_SUCCESS ) {
			SUBDBG( "Mask value is 0x%08x\n", code );
			tmp |= code;
		} else {
			PAPIERROR( "pfm_get_event_mask_code(%#x,%d,%p): %s", gete->event,
					   i, &code, pfm_strerror( ret ) );
		}
	}
	return ( tmp );
}
void perfctr_init() {
  int i;
  if (!initialized) {
    char *buf = malloc(64);
    printf("Initializing performance counters\n");
    printf("Stack address %p, heap %p\n", &buf, buf);
    free(buf);
    get_events();
    /*
     * Initialize libpfm library (required before we can use it)
     */
    int ret = pfm_initialize();
    if (ret != PFM_SUCCESS) errx(1, "cannot initialize library: %s", pfm_strerror(ret));
    for (i = 0; i < g_num_events; i++) {
      g_event_counts[i] = 0;
    }
    pthread_mutex_init(&count_lock, NULL);
    initialized = 1;
  }
}
Example #11
0
  EXTERNAL void sysPerfEventInit(int numEvents)
  {
    int i;
    TRACE_PRINTF("%s: sysPerfEventInit\n", Me);
    int ret = pfm_initialize();
    if (ret != PFM_SUCCESS) {
      errx(1, "error in pfm_initialize: %s", pfm_strerror(ret));
    }

    perf_event_fds = (int*)checkCalloc(numEvents, sizeof(int));
    if (!perf_event_fds) {
      errx(1, "error allocating perf_event_fds");
    }
    perf_event_attrs = (struct perf_event_attr *)checkCalloc(numEvents, sizeof(struct perf_event_attr));
    if (!perf_event_attrs) {
      errx(1, "error allocating perf_event_attrs");
    }
    for(i = 0; i < numEvents; i++) {
      perf_event_attrs[i].size = sizeof(struct perf_event_attr);
    }
    enabled = 1;
  }
Example #12
0
int
_papi_libpfm_ntv_code_to_name( unsigned int EventCode, char *ntv_name, int len )
{
	int ret;
	unsigned int event, umask;
	pfmlib_event_t gete;

	memset( &gete, 0, sizeof ( gete ) );

	if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
		return ( PAPI_ENOEVNT );

	gete.event = event;
	gete.num_masks = ( unsigned int ) prepare_umask( umask, gete.unit_masks );
	if ( gete.num_masks == 0 )
		ret = pfm_get_event_name( gete.event, ntv_name, ( size_t ) len );
	else
		ret = pfm_get_full_event_name( &gete, ntv_name, ( size_t ) len );
	if ( ret != PFMLIB_SUCCESS ) {
		char tmp[PAPI_2MAX_STR_LEN];
		pfm_get_event_name( gete.event, tmp, sizeof ( tmp ) );
		/* Skip error message if event is not supported by host cpu;
		 * we don't need to give this info away for papi_native_avail util */
		if ( ret != PFMLIB_ERR_BADHOST )
			PAPIERROR
				( "pfm_get_full_event_name(%p(event %d,%s,%d masks),%p,%d): %d -- %s",
				  &gete, gete.event, tmp, gete.num_masks, ntv_name, len, ret,
				  pfm_strerror( ret ) );
		if ( ret == PFMLIB_ERR_FULL ) {
			return PAPI_EBUF;
		}

		return PAPI_EMISC;
	}
	return PAPI_OK;
}
Example #13
0
File: lib.c Project: s-kanev/misc
int pfm_init_counters(const char** counters) {
#ifdef VERBOSE
    fprintf(stderr, "Initing counters\n");
#endif
    /* Initialize pfm library */
    int i,ret;

    ret = pfm_initialize();

    if (ret != PFM_SUCCESS) {
        fprintf(stderr, "Cannot initialize libpfm: %s\n", pfm_strerror(ret));
        return -1;
    }

    ret = perf_setup_argv_events(counters, &fds, &num_fds);
    if (ret || !num_fds) {
        fprintf(stderr, "Cannot setup events\n");
        return -1;
    }

    fds[0].fd = -1;
    for (i=0; i < num_fds; i++) {
        /* request timing information necessary for scaling */
        fds[i].hw.read_format = PERF_FORMAT_SCALE;
        fds[i].hw.disabled = 1; /* start paused */

        fds[i].fd = perf_event_open(&fds[i].hw, 0, -1, -1, 0);
        if (fds[i].fd == -1) {
            fprintf(stderr, "Cannot open event %d\n", i);
            return -1;
        }
    }

    counter_values = (uint64_t*) malloc(num_fds * sizeof(uint64_t));
    return 0;
}
Example #14
0
int
_papi_libpfm_ntv_code_to_descr( unsigned int EventCode, char *ntv_descr, int len )
{
	unsigned int event, umask;
	char *eventd, **maskd, *tmp;
	int i, ret;
	pfmlib_event_t gete;
	size_t total_len = 0;

	memset( &gete, 0, sizeof ( gete ) );

	if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
		return ( PAPI_ENOEVNT );

	ret = pfm_get_event_description( event, &eventd );
	if ( ret != PFMLIB_SUCCESS ) {
		PAPIERROR( "pfm_get_event_description(%d,%p): %s",
				   event, &eventd, pfm_strerror( ret ) );
		return ( PAPI_ENOEVNT );
	}

	if ( ( gete.num_masks =
		   ( unsigned int ) prepare_umask( umask, gete.unit_masks ) ) ) {
		maskd = ( char ** ) malloc( gete.num_masks * sizeof ( char * ) );
		if ( maskd == NULL ) {
			free( eventd );
			return ( PAPI_ENOMEM );
		}
		for ( i = 0; i < ( int ) gete.num_masks; i++ ) {
			ret =
				pfm_get_event_mask_description( event, gete.unit_masks[i],
												&maskd[i] );
			if ( ret != PFMLIB_SUCCESS ) {
				PAPIERROR( "pfm_get_event_mask_description(%d,%d,%p): %s",
						   event, umask, &maskd, pfm_strerror( ret ) );
				free( eventd );
				for ( ; i >= 0; i-- )
					free( maskd[i] );
				free( maskd );
				return ( PAPI_EINVAL );
			}
			total_len += strlen( maskd[i] );
		}
		tmp =
			( char * ) malloc( strlen( eventd ) + strlen( ", masks:" ) +
							   total_len + gete.num_masks + 1 );
		if ( tmp == NULL ) {
			for ( i = ( int ) gete.num_masks - 1; i >= 0; i-- )
				free( maskd[i] );
			free( maskd );
			free( eventd );
		}
		tmp[0] = '\0';
		strcat( tmp, eventd );
		strcat( tmp, ", masks:" );
		for ( i = 0; i < ( int ) gete.num_masks; i++ ) {
			if ( i != 0 )
				strcat( tmp, "," );
			strcat( tmp, maskd[i] );
			free( maskd[i] );
		}
		free( maskd );
	} else {
		tmp = ( char * ) malloc( strlen( eventd ) + 1 );
		if ( tmp == NULL ) {
			free( eventd );
			return ( PAPI_ENOMEM );
		}
		tmp[0] = '\0';
		strcat( tmp, eventd );
		free( eventd );
	}
	strncpy( ntv_descr, tmp, ( size_t ) len );
	if ( ( int ) strlen( tmp ) > len - 1 )
		ret = PAPI_EBUF;
	else
		ret = PAPI_OK;
	free( tmp );
	return ( ret );
}
Example #15
0
// Setup the counters and populate the counters struct with their data
void pc_init(counters_t *counters, int pid)
{

#ifndef __arm__
  return;

#else
  int ret;
  ret = pfm_initialize();

  if (ret != PFM_SUCCESS) {
    errx(1, "cannot initialize library: %s", pfm_strerror(ret));
  }

  // Set values for getting cycle count
  memset(&counters->cycles.attr, 0, sizeof(counters->cycles.attr));
  memset(&counters->l1_misses.attr, 0, sizeof(counters->l1_misses.attr));
  memset(&counters->ic.attr, 0, sizeof(counters->ic.attr));

  memset(&counters->cycles.arg, 0, sizeof(counters->cycles.arg));
  memset(&counters->l1_misses.arg, 0, sizeof(counters->l1_misses.arg));
  memset(&counters->ic.arg, 0, sizeof(counters->ic.arg));

  counters->cycles.count = 0;
  counters->l1_misses.count = 0;
  counters->ic.count = 0;

  counters->cycles.arg.size = sizeof(counters->cycles.arg);
  counters->l1_misses.arg.size = sizeof(counters->l1_misses.arg);
  counters->ic.arg.size = sizeof(counters->ic.arg);

  counters->cycles.arg.attr = &counters->cycles.attr;
  counters->l1_misses.arg.attr = &counters->l1_misses.attr;
  counters->ic.arg.attr = &counters->ic.attr;

  // Get the encoding for the events
  // cycles
  ret = pfm_get_os_event_encoding("cycles", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->cycles.arg);
  if (ret != PFM_SUCCESS) {
    err(1,"Cycles: cannot get encoding %s", pfm_strerror(ret));
  }
  // l1 cache misses
  ret = pfm_get_os_event_encoding("l1-dcache-load-misses", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->l1_misses.arg);
  if (ret != PFM_SUCCESS) {
    err(1,"L1 Cache Misses:cannot get encoding %s", pfm_strerror(ret));
  }

  // instruction count misses
  ret = pfm_get_os_event_encoding("instructions", PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT, &counters->ic.arg);
  if (ret != PFM_SUCCESS) {
    err(1,"Instruction Count:cannot get encoding %s", pfm_strerror(ret));
  }

  // Set more options
  counters->cycles.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
  counters->l1_misses.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
  counters->ic.attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;

  // do not start immediately after perf_event_open()
  counters->cycles.attr.disabled = 1;
  counters->l1_misses.attr.disabled = 1;
  counters->ic.attr.disabled = 1;

  // Open the counters
  counters->cycles.fd = perf_event_open(&counters->cycles.attr, pid, -1, -1, 0);
  if (counters->cycles.fd < 0) {
    err(1, "Cycle: cannot create event");
  }

  counters->l1_misses.fd = perf_event_open(&counters->l1_misses.attr, pid, -1, -1, 0);
  if (counters->l1_misses.fd < 0) {
    err(1, "L1 miss: cannot create event");
  }

  counters->ic.fd = perf_event_open(&counters->ic.attr, pid, -1, -1, 0);
  if (counters->ic.fd < 0) {
    err(1, "Instruction count: cannot create event");
  }
  return;
#endif
}
Example #16
0
int
parent(char **arg)
{
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_context_t ctx[1];
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_load_t load_args;
	unsigned int i, num_counters;
	int status, ret;
	int ctx_fd;
	pid_t pid;
	char name[MAX_EVT_NAME_LEN];

	memset(pc, 0, sizeof(ctx));
	memset(pd, 0, sizeof(ctx));
	memset(ctx, 0, sizeof(ctx));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(&load_args,0, sizeof(load_args));

	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");
	i = 2;

	if (num_counters < i) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;

	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * now create a context. we will later attach it to the task we are creating.
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/*
	 * extract the identifier for our context
	 */
	ctx_fd = ctx[0].ctx_fd;

	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */
	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	/*
	 * the PMC controlling the event ALWAYS come first, that's why this loop
	 * is safe even when extra PMC are needed to support a particular event.
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
	}

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann counting monitors.
	 */

	if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}

	if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Create the child task
	 */
	if ((pid=fork()) == -1) fatal_error("Cannot fork process\n");

	/*
	 * and launch the child code
	 */
	if (pid == 0) exit(child(arg));

	/*
	 * wait for the child to exec
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * check if process exited early
	 */
	if (WIFEXITED(status)) {
		fatal_error("command %s exited too early with status %d\n", arg[0], WEXITSTATUS(status));
	}
	/*
	 * the task is stopped at this point
	 */
	
	
	/*
	 * now we load (i.e., attach) the context to ourself
	 */
	load_args.load_pid = pid;

	if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}

	/*
	 * activate monitoring. The task is still STOPPED at this point. Monitoring
	 * will not take effect until the execution of the task is resumed.
	 */
	if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_START errno %d\n",errno);
	}

	/*
	 * now resume execution of the task, effectively activating
	 * monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);

	/*
	 * now the task is running
	 */

	/*
	 * simply wait for completion
	 */
	waitpid(pid, &status, 0);

	/*
	 * the task has disappeared at this point but our context is still
	 * present and contains all the latest counts.
	 */

	/*
	 * now simply read the results.
	 */
	if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error READ_PMDS errno %d\n",errno);
		return -1;
	}
	/*
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN);
		printf("PMD%u %20"PRIu64" %s\n",
			pd[i].reg_num,
			pd[i].reg_value,
			name);
	}
	/*
	 * free the context
	 */
	close(ctx_fd);

	return 0;
}
int
parent(pid_t pid, unsigned long delay)
{
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_context_t ctx[1];
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_load_t load_args;
	struct pollfd pollfd;
	pfm_msg_t msg;
	unsigned int i, num_counters;
	int status, ret;
	int ctx_fd;
	char name[MAX_EVT_NAME_LEN];


	memset(pc, 0, sizeof(ctx));
	memset(pd, 0, sizeof(ctx));
	memset(ctx, 0, sizeof(ctx));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(&load_args,0, sizeof(load_args));

	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");
	i = 2;

	if (num_counters < i) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;

	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * now create a context. we will later attach it to the task we are creating.
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/*
	 * extract the identifier for our context
	 */
	ctx_fd = ctx[0].ctx_fd;

	/*
	 * use our file descriptor for the poll.
	 * we are interested in read events only.
	 */
	pollfd.fd     = ctx_fd;
	pollfd.events = POLLIN;


	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */
	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	/*
	 * the PMC controlling the event ALWAYS come first, that's why this loop
	 * is safe even when extra PMC are needed to support a particular event.
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
	}

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann counting monitors.
	 */

	if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}

	if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
	if (ret == -1) {
		fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));
	}

	/*
	 * wait for the child to be actually stopped
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * check if process exited early
	 */
	if (WIFEXITED(status)) {
		fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status));
	}

	/*
	 * the task is stopped at this point
	 */
	
	/*
	 * now we load (i.e., attach) the context to ourself
	 */
	load_args.load_pid = pid;

	if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}

	/*
	 * activate monitoring. The task is still STOPPED at this point. Monitoring
	 * will not take effect until the execution of the task is resumed.
	 */
	if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_START errno %d\n",errno);
	}

	/*
	 * now resume execution of the task, effectively activating
	 * monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);
	printf("attached to [%d], timeout set to %lu seconds\n", pid, delay);

	/*
	 * now the task is running
	 */

	/*
	 * We cannot simply do a waitpid() because we may be attaching to a process
	 * totally unrelated to our program. Instead we use a perfmon facility that
	 * notifies us when the monitoring task is exiting.
	 *
	 * When a task with a monitoring context attached to it exits, a PFM_MSG_END
	 * is generated. It can be retrieve with a simple read() on the context's descriptor.
	 *
	 * Another reason why you might return from the read is if there was a counter
	 * overflow, unlikely in this example.
	 *
	 * To measure only for short period of time, use select or poll with a timeout,
	 * see task_attach_timeout.c
	 *
	 */
	ret = poll(&pollfd, 1, delay*1000);
	switch( ret ) {
		case -1:
			fatal_error("cannot read from descriptor: %s\n", strerror(errno));
			/* no return */
		case  1:
			/*
	 		 * there is a message, i.e., the program exited before our timeout
	 		 */
			if (ret == 1) {
				/*
		 		* extract message
		 		*/
				ret = read(ctx_fd, &msg, sizeof(msg));

				if (msg.type != PFM_MSG_END) {
					fatal_error("unexpected msg type : %d\n", msg.type);
				}
			}
			break;
		case   0:
			/*
			 * we timed out, we need to stop the task to unload
			 */
			ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
			if (ret == -1) {
				fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));
			}
			/*
			 * wait for task to be actually stopped
			 */
			waitpid(pid, &status, WUNTRACED);

			/*
	 		 * check if process exited, then no need to unload
	 		 */
			if (WIFEXITED(status)) goto read_results;

			if (perfmonctl(ctx_fd, PFM_UNLOAD_CONTEXT, NULL, 0) == -1) {
				fatal_error("perfmonctl error PFM_UNLOAD_CONTEXT errno %d\n",errno);
			}

			/*
			 * let it run free again
			 */
			ptrace(PTRACE_DETACH, pid, NULL, 0);
			break;
		default:
			fatal_error("unexpected return from poll: %d\n", ret);
	}

read_results:
	/*
	 * now simply read the results.
	 */
	if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error READ_PMDS errno %d\n",errno);
		return -1;
	}

	/*
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN);
		printf("PMD%u %20"PRIu64" %s\n",
			pd[i].reg_num,
			pd[i].reg_value,
			name);
	}
	/*
	 * free the context
	 */
	close(ctx_fd);

	return 0;
}
Example #18
0
int
main(int argc, char **argv)
{
	char **p;
	int i, ret;
	pid_t pid = getpid();
	pfmlib_param_t evt;
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_context_t ctx[1];
	pfmlib_options_t pfmlib_options;

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	if (pfm_initialize() != PFMLIB_SUCCESS) {
		printf("Can't initialize library\n");
		exit(1);
	}
	
	/* 
	 * check that the user did not specify too many events
	 */
	if (argc-1 > pfm_get_num_counters()) {
		printf("Too many events specified\n");
		exit(1);
	}

	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
	pfm_set_options(&pfmlib_options);

	memset(pd, 0, sizeof(pd));
	memset(ctx, 0, sizeof(ctx));

	/*
	 * prepare parameters to library. we don't use any Itanium
	 * specific features here. so the pfp_model is NULL.
	 */
	memset(&evt,0, sizeof(evt));

	/*
	 * be nice to user!
	 */
	p = argc > 1 ? argv+1 : event_list;
	for (i=0; *p ; i++, p++) {
		if (pfm_find_event(*p, &evt.pfp_events[i].event) != PFMLIB_SUCCESS) {
			fatal_error("Cannot find %s event\n", *p);
		}
	}

	/*
	 * set the default privilege mode for all counters:
	 * 	PFM_PLM3 : user level only
	 */
	evt.pfp_dfl_plm   = PFM_PLM3; 

	/*
	 * how many counters we use
	 */
	evt.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&evt)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * for this example, we have decided not to get notified
	 * on counter overflows and the monitoring is not to be inherited
	 * in derived tasks.
	 */
	ctx[0].ctx_flags = PFM_FL_INHERIT_NONE;

	/*
	 * now create the context for self monitoring/per-task
	 */
	if (perfmonctl(pid, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/* 
	 * Must be done before any PMD/PMD calls (unfreeze PMU). Initialize
	 * PMC/PMD to safe values. psr.up is cleared.
	 */
	if (perfmonctl(pid, PFM_ENABLE, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_ENABLE errno %d\n",errno);
	}

	/*
	 * Now prepare the argument to initialize the PMDs.
	 * the memset(pd) initialized the entire array to zero already, so
	 * we just have to fill in the register numbers from the pc[] array.
	 */
	for (i=0; i < evt.pfp_event_count; i++) {
		pd[i].reg_num = evt.pfp_pc[i].reg_num;
	}
	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann coutning monitors.
	 */
	if (perfmonctl(pid, PFM_WRITE_PMCS, evt.pfp_pc, evt.pfp_pc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}
	if (perfmonctl(pid, PFM_WRITE_PMDS, pd, evt.pfp_event_count) == -1) {
		{int i; for(i=0; i < evt.pfp_event_count; i++) printf("pmd%d: 0x%x\n", i, pd[i].reg_flags);}
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Let's roll now
	 */
	pfm_start();

	noploop(10000000);

	pfm_stop();

	/* 
	 * now read the results
	 */
	if (perfmonctl(pid, PFM_READ_PMDS, pd, evt.pfp_event_count) == -1) {
		fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno);
		return -1;
	}
	/* 
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < evt.pfp_event_count; i++) {
		char *name;
		pfm_get_event_name(evt.pfp_events[i].event, &name);
		printf("PMD%u %20lu %s\n", 
			pd[i].reg_num, 
			pd[i].reg_value, 
			name);
	}
	/* 
	 * let's stop this now
	 */
	if (perfmonctl(pid, PFM_DESTROY_CONTEXT, NULL, 0) == -1) {
		fatal_error( "child: perfmonctl error PFM_DESTROY errno %d\n",errno);
	}

	return 0;
}
int
parent(pid_t pid, unsigned long delay)
{
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_pmr_t pc[NUM_PMCS];
	pfarg_pmr_t pd[NUM_PMDS];
	pfarg_sinfo_t sif;
	struct pollfd pollfd;
	pfarg_msg_t msg;
	unsigned int i, num_counters;
	int status, ret;
	int ctx_fd;
	char name[MAX_EVT_NAME_LEN];

	memset(pc, 0, sizeof(pc));
	memset(pd, 0, sizeof(pd));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(&sif,0, sizeof(sif));

	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");
	i = 2;

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;

	if (i > num_counters) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}
	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * now create a session. we will later attach it to the task we are creating.
	 */
	ctx_fd = pfm_create(0, &sif);
	if (ctx_fd == -1) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("cannot create session %s\n", strerror(errno));
	}
	/*
	 * build the pfp_unavail_pmcs bitmask by looking
	 * at what perfmon has available. It is not always
	 * the case that all PMU registers are actually available
	 * to applications. For instance, on IA-32 platforms, some
	 * registers may be reserved for the NMI watchdog timer.
	 *
	 * With this bitmap, the library knows which registers NOT to
	 * use. Of source, it is possible that no valid assignement may
	 * be possible if certina PMU registers  are not available.
	 */
	detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL);

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * use our file descriptor for the poll.
	 * we are interested in read events only.
	 */
	pollfd.fd     = ctx_fd;
	pollfd.events = POLLIN;

	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */
	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}
	for (i=0; i < outp.pfp_pmd_count; i++)
		pd[i].reg_num = outp.pfp_pmds[i].reg_num;
	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann counting monitors.
	 */

	if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1)
		fatal_error("pfm_write error errno %d\n",errno);

	/*
	 * To be read, each PMD must be either written or declared
	 * as being part of a sample (reg_smpl_pmds)
	 */
	if (pfm_write(ctx_fd, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1)
		fatal_error("pfm_write(PMD) error errno %d\n",errno);

	ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
	if (ret == -1)
		fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));

	/*
	 * wait for the child to be actually stopped
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * check if process exited early
	 */
	if (WIFEXITED(status))
		fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status));

	/*
	 * the task is stopped at this point
	 */
	
	/*
	 * now we attach (i.e., attach) the session to ourself
	 */
	if (pfm_attach(ctx_fd, 0, pid) == -1)
		fatal_error("pfm_attach error errno %d\n",errno);

	/*
	 * activate monitoring. The task is still STOPPED at this point. Monitoring
	 * will not take effect until the execution of the task is resumed.
	 */
	if (pfm_set_state(ctx_fd, 0, PFM_ST_START) == -1)
		fatal_error("pfm_set_state(start) error errno %d\n",errno);

	/*
	 * now resume execution of the task, effectively activating
	 * monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);
	printf("attached to [%d], timeout set to %lu seconds\n", pid, delay);

	/*
	 * now the task is running
	 */

	/*
	 * We cannot simply do a waitpid() because we may be attaching to a process
	 * totally unrelated to our program. Instead we use a perfmon facility that
	 * notifies us when the monitoring task is exiting.
	 *
	 * When a task with a monitoring session attached to it exits, a PFM_MSG_END
	 * is generated. It can be retrieve with a simple read() on the session's descriptor.
	 *
	 * Another reason why you might return from the read is if there was a counter
	 * overflow, unlikely in this example.
	 *
	 * To measure only for short period of time, use select or poll with a timeout,
	 * see task_attach_timeout.c
	 *
	 */
	ret = poll(&pollfd, 1, delay*1000);
	switch( ret ) {
		case -1:
			fatal_error("cannot read from descriptor: %s\n", strerror(errno));
			/* no return */
		case  1:
			/*
	 		 * there is a message, i.e., the program exited before our timeout
	 		 */
			if (ret == 1) {
				/*
		 		* extract message
		 		*/
				ret = read(ctx_fd, &msg, sizeof(msg));

				if (msg.type != PFM_MSG_END)
					fatal_error("unexpected msg type : %d\n", msg.type);
			}
			break;
		case   0:
			/*
			 * we timed out, we need to stop the task to unload
			 */
			ret = ptrace(PTRACE_ATTACH, pid, NULL, 0);
			if (ret == -1)
				fatal_error("cannot attach to %d: %s\n", pid, strerror(errno));
			/*
			 * wait for task to be actually stopped
			 */
			waitpid(pid, &status, WUNTRACED);

			/*
	 		 * check if process exited, then no need to unload
	 		 */
			if (WIFEXITED(status)) goto read_results;

			if (pfm_attach(ctx_fd, 0, PFM_NO_TARGET) == -1)
				fatal_error("pfm_detach error errno %d\n",errno);

			/*
			 * let it run free again
			 */
			ptrace(PTRACE_DETACH, pid, NULL, 0);
			break;
		default:
			fatal_error("unexpected return from poll: %d\n", ret);
	}

read_results:
	/*
	 * now simply read the results.
	 */
	if (pfm_read(ctx_fd, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) {
		fatal_error("pfm_read(PMD) error errno %d\n",errno);
		return -1;
	}

	/*
	 * print the results
	 *
	 * It is important to realize, that the first event we specified may not
	 * be in PMD4. Not all events can be measured by any monitor. That's why
	 * we need to use the pc[] array to figure out where event i was allocated.
	 *
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN);
		printf("PMD%-3u %20"PRIu64" %s\n",
			pd[i].reg_num,
			pd[i].reg_value,
			name);
	}
	/*
	 * free the session 
	 */
	close(ctx_fd);

	return 0;
}
Example #20
0
int
main(int argc, char **argv)
{
	struct sigaction act;
	uint64_t *val;
	size_t sz, pgsz;
	int ret, i;

	setlocale(LC_ALL, "");

	ret = pfm_initialize();
	if (ret != PFM_SUCCESS)
		errx(1, "Cannot initialize library: %s", pfm_strerror(ret));

	pgsz = sysconf(_SC_PAGESIZE);

	/*
	 * Install the signal handler (SIGIO)
	 * need SA_SIGINFO because we need the fd
	 * in the signal handler
	 */
	memset(&act, 0, sizeof(act));
	act.sa_sigaction = sigio_handler;
	act.sa_flags = SA_SIGINFO;
	sigaction (SIGIO, &act, 0);

	/*
 	 * allocates fd for us
 	 */
	ret = perf_setup_list_events("cycles,"
				       "instructions",
					&fds, &num_fds);
	if (ret || (num_fds == 0))
		exit(1);

	fds[0].fd = -1;
	for(i=0; i < num_fds; i++) {

		/* want a notification for every each added to the buffer */
		fds[i].hw.disabled = !i;
		if (!i) {
			fds[i].hw.wakeup_events = 1;
			fds[i].hw.sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_READ|PERF_SAMPLE_PERIOD;
			fds[i].hw.sample_period = SMPL_PERIOD;

			/* read() returns event identification for signal handler */
			fds[i].hw.read_format = PERF_FORMAT_GROUP|PERF_FORMAT_ID|PERF_FORMAT_SCALE;
		}

		fds[i].fd = perf_event_open(&fds[i].hw, 0, -1, fds[0].fd, 0);
		if (fds[i].fd == -1)
			err(1, "cannot attach event %s", fds[i].name);
	}
	
	sz = (3+2*num_fds)*sizeof(uint64_t);
	val = malloc(sz);
	if (!val)
		err(1, "cannot allocated memory");
	/*
	 * On overflow, the non lead events are stored in the sample.
	 * However we need some key to figure the order in which they
	 * were laid out in the buffer. The file descriptor does not
	 * work for this. Instead, we extract a unique ID for each event.
	 * That id will be part of the sample for each event value.
	 * Therefore we will be able to match value to events
	 *
	 * PERF_FORMAT_ID: returns unique 64-bit identifier in addition
	 * to event value.
	 */
	ret = read(fds[0].fd, val, sz);
	if (ret == -1)
		err(1, "cannot read id %zu", sizeof(val));

	/*
	 * we are using PERF_FORMAT_GROUP, therefore the structure
	 * of val is as follows:
	 *
	 *      { u64           nr;
	 *        { u64         time_enabled; } && PERF_FORMAT_ENABLED
	 *        { u64         time_running; } && PERF_FORMAT_RUNNING
	 *        { u64         value;                  
	 *          { u64       id;           } && PERF_FORMAT_ID
	 *        }             cntr[nr];               
	 * We are skipping the first 3 values (nr, time_enabled, time_running)
	 * and then for each event we get a pair of values.
	 */ 
	for(i=0; i < num_fds; i++) {
		fds[i].id = val[2*i+1+3];
		printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
	}
	 
	fds[0].buf = mmap(NULL, (buffer_pages+1)*pgsz, PROT_READ|PROT_WRITE, MAP_SHARED, fds[0].fd, 0);
	if (fds[0].buf == MAP_FAILED)
		err(1, "cannot mmap buffer");
	
	fds[0].pgmsk = (buffer_pages * pgsz) - 1;

	/*
	 * setup asynchronous notification on the file descriptor
	 */
	ret = fcntl(fds[0].fd, F_SETFL, fcntl(fds[0].fd, F_GETFL, 0) | O_ASYNC);
	if (ret == -1)
		err(1, "cannot set ASYNC");

	/*
 	 * necessary if we want to get the file descriptor for
 	 * which the SIGIO is sent in siginfo->si_fd.
 	 * SA_SIGINFO in itself is not enough
 	 */
	ret = fcntl(fds[0].fd, F_SETSIG, SIGIO);
	if (ret == -1)
		err(1, "cannot setsig");

	/*
	 * get ownership of the descriptor
	 */
	ret = fcntl(fds[0].fd, F_SETOWN, getpid());
	if (ret == -1)
		err(1, "cannot setown");

	/*
	 * enable the group for one period
	 */
	ret = ioctl(fds[0].fd, PERF_EVENT_IOC_REFRESH , 1);
	if (ret == -1)
		err(1, "cannot refresh");

	busyloop();

	ret = ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 1);
	if (ret == -1)
		err(1, "cannot disable");

	/*
	 * destroy our session
	 */
	for(i=0; i < num_fds; i++)
		close(fds[i].fd);

	perf_free_fds(fds, num_fds);
	free(val);

	/* free libpfm resources cleanly */
	pfm_terminate();

	return 0;
}
int
main(void)
{
	int ret;
	int type = 0;
	pid_t pid = getpid();
	pfmlib_ita2_param_t ita_param;
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_context_t ctx[1];
	pfmlib_options_t pfmlib_options;
	struct sigaction act;

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	if (pfm_initialize() != PFMLIB_SUCCESS) {
		fatal_error("Can't initialize library\n");
	}

	/*
	 * Let's make sure we run this on the right CPU
	 */
	pfm_get_pmu_type(&type);
	if (type != PFMLIB_ITANIUM2_PMU) {
		char *model; 
		pfm_get_pmu_name(&model);
		fatal_error("this program does not work with %s PMU\n", model);
	}

	/*
	 * Install the overflow handler (SIGPROF)
	 */
	memset(&act, 0, sizeof(act));
	act.sa_handler = (sig_t)overflow_handler;
	sigaction (SIGPROF, &act, 0);


	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
	pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */
	pfm_set_options(&pfmlib_options);



	memset(pd, 0, sizeof(pd));
	memset(ctx, 0, sizeof(ctx));

	/*
	 * prepare parameters to library. we don't use any Itanium
	 * specific features here. so the pfp_model is NULL.
	 */
	memset(&evt,0, sizeof(evt));
	memset(&ita_param,0, sizeof(ita_param));


	/*
	 * because we use a model specific feature, we must initialize the
	 * model specific pfmlib parameter structure and link it to the
	 * common structure.
	 * The magic number is a simple mechanism used by the library to check
	 * that the model specific data structure is decent. You must set it manually
	 * otherwise the model specific feature won't work.
	 */
	ita_param.pfp_magic = PFMLIB_ITA2_PARAM_MAGIC;
	evt.pfp_model       = &ita_param;

	/*
	 * Before calling pfm_find_dispatch(), we must specify what kind
	 * of branches we want to capture. We are interesteed in all the mispredicted branches, 
	 * therefore we program we set the various fields of the BTB config to:
	 */
	ita_param.pfp_ita2_btb.btb_used = 1;

	ita_param.pfp_ita2_btb.btb_ds  = 0;
	ita_param.pfp_ita2_btb.btb_tm  = 0x3;
	ita_param.pfp_ita2_btb.btb_ptm = 0x3;
	ita_param.pfp_ita2_btb.btb_ppm = 0x3;
	ita_param.pfp_ita2_btb.btb_brt = 0x0;
	ita_param.pfp_ita2_btb.btb_plm = PFM_PLM3;

	/*
	 * To count the number of occurence of this instruction, we must
	 * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8
	 * event.
	 */
	if (pfm_find_event_byname("BRANCH_EVENT", &evt.pfp_events[0].event) != PFMLIB_SUCCESS) {
		fatal_error("cannot find event BRANCH_EVENT\n");
	}

	/*
	 * set the (global) privilege mode:
	 * 	PFM_PLM3 : user level only
	 */
	evt.pfp_dfl_plm   = PFM_PLM3; 
	/*
	 * how many counters we use
	 */
	evt.pfp_event_count = 1;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&evt)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * for this example, we will get notified ONLY when the sampling
	 * buffer is full. The monitoring is not to be inherited
	 * in derived tasks
	 */
	ctx[0].ctx_flags        = PFM_FL_INHERIT_NONE;
	ctx[0].ctx_notify_pid   = getpid();
	ctx[0].ctx_smpl_entries = SMPL_BUF_NENTRIES;
	ctx[0].ctx_smpl_regs[0] = smpl_regs = BTB_REGS_MASK;


	/*
	 * now create the context for self monitoring/per-task
	 */
	if (perfmonctl(pid, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}

	printf("Sampling buffer mapped at %p\n", ctx[0].ctx_smpl_vaddr);

	smpl_vaddr = ctx[0].ctx_smpl_vaddr;

	/* 
	 * Must be done before any PMD/PMD calls (unfreeze PMU). Initialize
	 * PMC/PMD to safe values. psr.up is cleared.
	 */
	if (perfmonctl(pid, PFM_ENABLE, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_ENABLE errno %d\n",errno);
	}

	/*
	 * indicate we want notification when buffer is full
	 */
	evt.pfp_pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY;

	/*
	 * Now prepare the argument to initialize the PMD and the sampling period
	 */
	pd[0].reg_num         = evt.pfp_pc[0].reg_num;
	pd[0].reg_value       = (~0UL) - SMPL_PERIOD +1;
	pd[0].reg_long_reset  = (~0UL) - SMPL_PERIOD +1;
	pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD +1;

	/*
	 * When our counter overflows, we want to BTB index to be reset, so that we keep
	 * in sync. This is required to make it possible to interpret pmd16 on overflow
	 * to avoid repeating the same branch several times.
	 */
	evt.pfp_pc[0].reg_reset_pmds[0] = M_PMD(16);

	/*
	 * reset pmd16, short and long reset value are set to zero as well
	 */
	pd[1].reg_num         = 16;
	pd[1].reg_value       = 0UL;

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann coutning monitors.
	 */
	if (perfmonctl(pid, PFM_WRITE_PMCS, evt.pfp_pc, evt.pfp_pc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}
	if (perfmonctl(pid, PFM_WRITE_PMDS, pd, 2) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Let's roll now.
	 */

	do_test(100000);

	/*
	 * We must call the processing routine to cover the last entries recorded
	 * in the sampling buffer, i.e. which may not be full
	 */
	process_smpl_buffer();

	/* 
	 * let's stop this now
	 */
	if (perfmonctl(pid, PFM_DESTROY_CONTEXT, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_DESTROY errno %d\n",errno);
	}
	return 0;
}
Example #22
0
int
main(void)
{
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfmlib_ita2_input_param_t ita2_inp;
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_context_t ctx[1];
	pfarg_load_t load_args;
	pfmlib_options_t pfmlib_options;
	int ret;
	int type = 0;
	int id;
	unsigned int i;
	char name[MAX_EVT_NAME_LEN];

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	if (pfm_initialize() != PFMLIB_SUCCESS) {
		fatal_error("Can't initialize library\n");
	}

	/*
	 * Let's make sure we run this on the right CPU
	 */
	pfm_get_pmu_type(&type);
	if (type != PFMLIB_ITANIUM2_PMU) {
		char model[MAX_PMU_NAME_LEN];
		pfm_get_pmu_name(model, MAX_PMU_NAME_LEN);
		fatal_error("this program does not work with the %s PMU\n", model);
	}

	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug   = 0; /* set to 1 for debug */
	pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */
	pfm_set_options(&pfmlib_options);

	memset(pd, 0, sizeof(pd));
	memset(pc, 0, sizeof(pc));
	memset(ctx, 0, sizeof(ctx));
	memset(&load_args, 0, sizeof(load_args));

	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(&ita2_inp,0, sizeof(ita2_inp));

	/*
	 * We indicate that we are using the PMC8 opcode matcher. This is required
	 * otherwise the library add PMC8 to the list of PMC to pogram during
	 * pfm_dispatch_events().
	 */
	ita2_inp.pfp_ita2_pmc8.opcm_used = 1;

	/*
	 * We want to match all the br.cloop in our test function.
	 * This branch is an IP-relative branch for which the major
	 * opcode (bits [40-37]=4) and the btype field is 5 (which represents
	 * bits[6-8]) so it is included in the match/mask fields of PMC8.
	 * It is necessarily in a B slot.
	 *
	 * We don't care which operands are used with br.cloop therefore
	 * the mask field of pmc8 is set such that only the 4 bits of the
	 * opcode and 3 bits of btype must match exactly. This is accomplished by
	 * clearing the top 4 bits and bits [6-8] of the mask field and setting the
	 * remaining bits.  Similarly, the match field only has the opcode value  and btype
	 * set according to the encoding of br.cloop, the
	 * remaining bits are zero. Bit 60 of PMC8 is set to indicate
	 * that we look only in B slots  (this is the only possibility for
	 * this instruction anyway).
	 *
	 * So the binary representation of the value for PMC8 is as follows:
	 *
	 * 6666555555555544444444443333333333222222222211111111110000000000
	 * 3210987654321098765432109876543210987654321098765432109876543210
	 * ----------------------------------------------------------------
	 * 0001010000000000000000101000000000000011111111111111000111111000
	 *
	 * which yields a value of 0x1400028003fff1f8.
	 *
	 * Depending on the level of optimization to compile this code, it may
	 * be that the count reported could be zero, if the compiler uses a br.cond
	 * instead of br.cloop.
	 *
	 *
	 * The 0x1 sets the ig_ad field to make sure we ignore any range restriction.
	 * Also bit 2 must always be set
	 */
	ita2_inp.pfp_ita2_pmc8.pmc_val = 0x1400028003fff1fa;

	/*
	 * To count the number of occurence of this instruction, we must
	 * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8
	 * event.
	 */
	if (pfm_find_full_event("IA64_TAGGED_INST_RETIRED_IBRP0_PMC8", &inp.pfp_events[0]) != PFMLIB_SUCCESS) {
		fatal_error("cannot find event IA64_TAGGED_INST_RETIRED_IBRP0_PMC8\n");
	}

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level only
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;
	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = 1;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}

	/*
	 * now create the context for self monitoring/per-task
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/*
	 * extract the unique identifier for our context, a regular file descriptor
	 */
	id = ctx[0].ctx_fd;

	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */

	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	/*
	 * the PMC controlling the event ALWAYS come first, that's why this loop
	 * is safe even when extra PMC are needed to support a particular event.
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
	}
	printf("event_count=%d id=%d\n",  inp.pfp_event_count, id);

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann coutning monitors.
	 */
	if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}
	if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}
	/*
	 * now we load (i.e., attach) the context to ourself
	 */
	load_args.load_pid = getpid();

	if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}

	/*
	 * Let's roll now.
	 */
	pfm_self_start(id);

	do_test(100UL);

	pfm_self_stop(id);

	/*
	 * now read the results
	 */
	if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error READ_PMDS errno %d\n",errno);
	}

	/*
	 * print the results
	 */
	pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN);
	printf("PMD%u %20lu %s\n",
			pd[0].reg_num,
			pd[0].reg_value,
			name);

	if (pd[0].reg_value != 0)
		printf("compiler used br.cloop\n");
	else
		printf("compiler did not use br.cloop\n");

	/*
	 * let's stop this now
	 */
	close(id);
	return 0;
}
Example #23
0
int
main(int argc, char **argv)
{
	pfarg_ctx_t ctx;
	pfarg_pmc_t pc[NUM_PMCS];
	pfarg_pmd_t *pd;
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	uint64_t cpu_list;
	void *desc;
	unsigned int num_counters;
	uint32_t i, j, k, l, ncpus, npmds;
	size_t len;
	int ret;
	char *name;

	if (pfm_initialize() != PFMLIB_SUCCESS)
		fatal_error("cannot initialize libpfm\n");

	if (pfms_initialize())
		fatal_error("cannot initialize libpfms\n");

	pfm_get_num_counters(&num_counters);
	pfm_get_max_event_name_len(&len);

	name = malloc(len+1);
	if (name == NULL)
		fatal_error("cannot allocate memory for event name\n");

	memset(&ctx, 0, sizeof(ctx));
	memset(pc, 0, sizeof(pc));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));

	cpu_list = argc > 1 ? strtoul(argv[1], NULL, 0) : 0x3;

	ncpus = popcount(cpu_list);

		if (pfm_get_cycle_event(&inp.pfp_events[0].event) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1].event) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");

	i = 2;

	inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0;

	if (i > num_counters) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}
	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * indicate we are using the monitors for a system-wide session.
	 * This may impact the way the library sets up the PMC values.
	 */
	inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS)
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));

	npmds = ncpus * inp.pfp_event_count;
	dprint("ncpus=%u npmds=%u\n", ncpus, npmds);

	pd = calloc(npmds, sizeof(pfarg_pmd_t));
	if (pd == NULL)
		fatal_error("cannot allocate pd array\n");

	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	for(l=0, k = 0; l < ncpus; l++) {
		for (i=0, j=0; i < inp.pfp_event_count; i++, k++) {
			pd[k].reg_num   = outp.pfp_pmcs[j].reg_pmd_num;
			for(; j < outp.pfp_pmc_count; j++)  if (outp.pfp_pmcs[j].reg_evt_idx != i) break;
		}
	}

	/*
	 * create a context on all CPUs we asked for
	 *
	 * libpfms only works for system-wide, so we set the flag in
	 * the master context. the context argument is not modified by
	 * call.
	 *
	 * desc is an opaque descriptor used to identify session.
	 */
	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;

	ret = pfms_create(&cpu_list, 1, &ctx, NULL, &desc);
	if (ret == -1)
		fatal_error("create error %d\n", ret);

	/*
	 * program the PMC registers on all CPUs of interest
	 */
	ret = pfms_write_pmcs(desc, pc, outp.pfp_pmc_count);
	if (ret == -1)
		fatal_error("write_pmcs error %d\n", ret);

	/*
	 * program the PMD registers on all CPUs of interest
	 */
	ret = pfms_write_pmds(desc, pd, inp.pfp_event_count);
	if (ret == -1)
		fatal_error("write_pmds error %d\n", ret);

	/*
	 * load context on all CPUs of interest
	 */
	ret = pfms_load(desc);
	if (ret == -1)
		fatal_error("load error %d\n", ret);

	/*
	 * start monitoring on all CPUs of interest
	 */
	ret = pfms_start(desc);
	if (ret == -1)
		fatal_error("start error %d\n", ret);

	/*
	 * simulate some work
	 */
	sleep(10);

	/*
	 * stop monitoring on all CPUs of interest
	 */
	ret = pfms_stop(desc);
	if (ret == -1)
		fatal_error("stop error %d\n", ret);
	
	/*
	 * read the PMD registers on all CPUs of interest.
	 * The pd[] array must be organized such that to
	 * read 2 PMDs on each CPU you need:
	 * 	- 2 * number of CPUs of interest
	 * 	- the first 2 elements of pd[] read on 1st CPU
	 * 	- the next  2 elements of pd[] read on the 2nd CPU
	 * 	- and so on
	 */
	ret = pfms_read_pmds(desc, pd, npmds);
	if (ret == -1)
		fatal_error("read_pmds error %d\n", ret);

	/*
	 * pre per-CPU results
	 */
	for(j=0, k= 0; j < ncpus; j++) {
		for (i=0; i < inp.pfp_event_count; i++, k++) {
			pfm_get_full_event_name(&inp.pfp_events[i], name, len);
			printf("CPU%-3d PMD%u %20"PRIu64" %s\n",
			j,
			pd[k].reg_num,
			pd[k].reg_value,
			name);
		}
	}

	/*
	 * destroy context  on all CPUs of interest.
	 * After this call desc is invalid
	 */
	ret = pfms_close(desc);
	if (ret == -1)
		fatal_error("close error %d\n", ret);

	free(name);

	return 0;
}
/* the **fd parameter must point to a null pointer on the first call
 * max_fds and num_fds must both point to a zero value on the first call
 * The return value is success (0) vs. failure (non-zero)
 */
int
perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int *num_fds)
{
	perf_event_desc_t *fd;
	pfm_perf_encode_arg_t arg;
	int new_max, ret, num, max_fds;
	int group_leader;

	if (!(argv && fds && num_fds))
		return -1;

	fd = *fds;
	if (fd) {
		max_fds = fd[0].max_fds;
		if (max_fds < 2)
			return -1;
		num = *num_fds;
	} else {
		max_fds = num = 0; /* bootstrap */
	}
	group_leader = num;

	while(*argv) {
		if (num == max_fds) {
			if (max_fds == 0)
				new_max = 2;
			else
				new_max = max_fds << 1;

			if (new_max < max_fds) {
				warn("too many entries");
				goto error;
			}
			fd = realloc(fd, new_max * sizeof(*fd));
			if (!fd) {
				warn("cannot allocate memory");
				goto error;
			}
			/* reset newly allocated chunk */
			memset(fd + max_fds, 0, (new_max - max_fds) * sizeof(*fd));
			max_fds = new_max;

			/* update max size */
			fd[0].max_fds = max_fds;
		}
		/* ABI compatibility, set before calling libpfm */
		fd[num].hw.size = sizeof(fd[num].hw);

		memset(&arg, 0, sizeof(arg));
		arg.attr = &fd[num].hw;
		arg.fstr = &fd[num].fstr; /* fd[].fstr is NULL */

		ret = pfm_get_os_event_encoding(*argv, PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT_EXT, &arg);
		if (ret != PFM_SUCCESS) {
			warnx("event %s: %s", *argv, pfm_strerror(ret));
			goto error;
		}

		fd[num].name = strdup(*argv);
		fd[num].group_leader = group_leader;
		fd[num].idx = arg.idx;
		fd[num].cpu = arg.cpu;

		num++;
		argv++;
	}
	*num_fds = num;
	*fds = fd;
	return 0;
error:
	perf_free_fds(fd, num);
	return -1;
}
Example #25
0
int
main(int argc, char **argv)
{
	int c, ret;

	setlocale(LC_ALL, "");

	options.cpu = -1;

	while ((c=getopt(argc, argv,"hc:e:d:xPpG:")) != -1) {
		switch(c) {
			case 'x':
				options.excl = 1;
				break;
 			case 'p':
 				options.interval = 1;
 				break;
			case 'e':
				if (options.num_groups < MAX_GROUPS) {
					options.events[options.num_groups++] = optarg;
				} else {
					errx(1, "you cannot specify more than %d groups.\n",
						MAX_GROUPS);
				}
				break;
			case 'c':
				options.cpu = atoi(optarg);
				break;
			case 'd':
				options.delay = atoi(optarg);
				break;
			case 'P':
				options.pin = 1;
				break;
			case 'h':
				usage();
				exit(0);
			case 'G':
				options.cgroup_name = optarg;
				break;
			default:
				errx(1, "unknown error");
		}
	}
	if (!options.delay)
		options.delay = 20;

	if (!options.events[0]) {
		options.events[0] = "cycles,instructions";
		options.num_groups = 1;
	}

	ret = pfm_initialize();
	if (ret != PFM_SUCCESS)
		errx(1, "libpfm initialization failed: %s\n", pfm_strerror(ret));
	
	measure();

	/* free libpfm resources cleanly */
	pfm_terminate();

	return 0;
}
int
main(void)
{
	int ret;
	int type = 0;
	char *name;
	pid_t pid = getpid();
	pfmlib_param_t evt;
	pfmlib_ita2_param_t ita2_param;
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_context_t ctx[1];
	pfmlib_options_t pfmlib_options;

	/*
	 * Initialize pfm library (required before we can use it)
	 */
	if (pfm_initialize() != PFMLIB_SUCCESS) {
		fatal_error("Can't initialize library\n");
	}

	/*
	 * Let's make sure we run this on the right CPU
	 */
	pfm_get_pmu_type(&type);
	if (type != PFMLIB_ITANIUM2_PMU) {
		char *model; 
		pfm_get_pmu_name(&model);
		fatal_error("this program does not work with the %s PMU\n", model);
	}

	/*
	 * pass options to library (optional)
	 */
	memset(&pfmlib_options, 0, sizeof(pfmlib_options));
	pfmlib_options.pfm_debug = 0; /* set to 1 for debug */
	pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */
	pfm_set_options(&pfmlib_options);



	memset(pd, 0, sizeof(pd));
	memset(ctx, 0, sizeof(ctx));

	memset(&evt,0, sizeof(evt));
	memset(&ita2_param,0, sizeof(ita2_param));

	/*
	 * because we use a model specific feature, we must initialize the
	 * model specific pfmlib parameter structure and link it to the
	 * common structure.
	 * The magic number is a simple mechanism used by the library to check
	 * that the model specific data structure is decent. You must set it manually
	 * otherwise the model specific feature won't work.
	 */
	ita2_param.pfp_magic = PFMLIB_ITA2_PARAM_MAGIC;
	evt.pfp_model       = &ita2_param;

	/*
	 * We indicate that we are using the PMC8 opcode matcher. This is required
	 * otherwise the library add PMC8 to the list of PMC to pogram during
	 * pfm_dispatch_events().
	 */
	ita2_param.pfp_ita2_pmc8.opcm_used = 1;

	/*
	 * We want to match all the br.cloop in our test function.
	 * This branch is an IP-relative branch for which the major
	 * opcode (bits [40-37]=4) and the btype field is 5 (which represents
	 * bits[6-8]) so it is included in the match/mask fields of PMC8. 
	 * It is necessarily in a B slot.
	 *
	 * We don't care which operands are used with br.cloop therefore
	 * the mask field of pmc8 is set such that only the 4 bits of the
	 * opcode and 3 bits of btype must match exactly. This is accomplished by 
	 * clearing the top 4 bits and bits [6-8] of the mask field and setting the 
	 * remaining bits.  Similarly, the match field only has the opcode value  and btype
	 * set according to the encoding of br.cloop, the
	 * remaining bits are zero. Bit 60 of PMC8 is set to indicate
	 * that we look only in B slots  (this is the only possibility for
	 * this instruction anyway). 
	 *
	 * So the binary representation of the value for PMC8 is as follows:
	 *
	 * 6666555555555544444444443333333333222222222211111111110000000000
	 * 3210987654321098765432109876543210987654321098765432109876543210
	 * ----------------------------------------------------------------
	 * 0001010000000000000000101000000000000011111111111111000111111000
	 * 
	 * which yields a value of 0x1400028003fff1f8.
	 *
	 * Depending on the level of optimization to compile this code, it may 
	 * be that the count reported could be zero, if the compiler uses a br.cond 
	 * instead of br.cloop.
	 *
	 *
	 * The 0x1 sets the ig_ad field to make sure we ignore any range restriction.
	 * Also bit 2 must always be set
	 */
	ita2_param.pfp_ita2_pmc8.pmc_val = 0x1400028003fff1fa;

	/*
	 * To count the number of occurence of this instruction, we must
	 * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8
	 * event.
	 */
	if (pfm_find_event_byname("IA64_TAGGED_INST_RETIRED_IBRP0_PMC8", &evt.pfp_events[0].event) != PFMLIB_SUCCESS) {
		fatal_error("cannot find event IA64_TAGGED_INST_RETIRED_IBRP0_PMC8\n");
	}

	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level only
	 */
	evt.pfp_dfl_plm   = PFM_PLM3; 
	/*
	 * how many counters we use
	 */
	evt.pfp_event_count = 1;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&evt)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	/*
	 * for this example, we have decided not to get notified
	 * on counter overflows and the monitoring is not to be inherited
	 * in derived tasks
	 */
	ctx[0].ctx_flags = PFM_FL_INHERIT_NONE;

	/*
	 * now create the context for self monitoring/per-task
	 */
	if (perfmonctl(pid, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}
	/* 
	 * Must be done before any PMD/PMD calls (unfreeze PMU). Initialize
	 * PMC/PMD to safe values. psr.up is cleared.
	 */
	if (perfmonctl(pid, PFM_ENABLE, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_ENABLE errno %d\n",errno);
	}

	/*
	 * Now prepare the argument to initialize the PMD.
	 */
	pd[0].reg_num = evt.pfp_pc[0].reg_num;

	/*
	 * Now program the registers
	 *
	 * We don't use the save variable to indicate the number of elements passed to
	 * the kernel because, as we said earlier, pc may contain more elements than
	 * the number of events we specified, i.e., contains more thann coutning monitors.
	 */
	if (perfmonctl(pid, PFM_WRITE_PMCS, evt.pfp_pc, evt.pfp_pc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}
	if (perfmonctl(pid, PFM_WRITE_PMDS, pd, evt.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Let's roll now.
	 */
	pfm_start();

	do_test(100UL);

	pfm_stop();

	/* 
	 * now read the results
	 */
	if (perfmonctl(pid, PFM_READ_PMDS, pd, evt.pfp_event_count) == -1) {
		fatal_error("perfmonctl error READ_PMDS errno %d\n",errno);
	}

	/* 
	 * print the results
	 */
	pfm_get_event_name(evt.pfp_events[0].event, &name);
	printf("PMD%u %20lu %s\n", 
			pd[0].reg_num, 
			pd[0].reg_value, 
			name);

	if (pd[0].reg_value != 0) 
		printf("compiler used br.cloop\n");
	else
		printf("compiler did not use br.cloop\n");

	/* 
	 * let's stop this now
	 */
	if (perfmonctl(pid, PFM_DESTROY_CONTEXT, NULL, 0) == -1) {
		fatal_error("perfmonctl error PFM_DESTROY errno %d\n",errno);
	}
	return 0;
}
Example #27
0
int
_papi_libpfm_init(papi_vector_t *my_vector, int cidx) {

   int retval;
   unsigned int ncnt;
   unsigned int version;
   char pmu_name[PAPI_MIN_STR_LEN];


   /* The following checks the version of the PFM library
      against the version PAPI linked to... */
   SUBDBG( "pfm_initialize()\n" );
   if ( ( retval = pfm_initialize(  ) ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_initialize(): %s", pfm_strerror( retval ) );
      return PAPI_ESYS;
   }

   /* Get the libpfm3 version */
   SUBDBG( "pfm_get_version(%p)\n", &version );
   if ( pfm_get_version( &version ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_version(%p): %s", version, pfm_strerror( retval ) );
      return PAPI_ESYS;
   }

   /* Set the version */
   sprintf( my_vector->cmp_info.support_version, "%d.%d",
	    PFM_VERSION_MAJOR( version ), PFM_VERSION_MINOR( version ) );

   /* Complain if the compiled-against version doesn't match current version */
   if ( PFM_VERSION_MAJOR( version ) != PFM_VERSION_MAJOR( PFMLIB_VERSION ) ) {
      PAPIERROR( "Version mismatch of libpfm: compiled %#x vs. installed %#x\n",
				   PFM_VERSION_MAJOR( PFMLIB_VERSION ),
				   PFM_VERSION_MAJOR( version ) );
      return PAPI_ESYS;
   }

   /* Always initialize globals dynamically to handle forks properly. */

   _perfmon2_pfm_pmu_type = -1;

   /* Opened once for all threads. */
   SUBDBG( "pfm_get_pmu_type(%p)\n", &_perfmon2_pfm_pmu_type );
   if ( pfm_get_pmu_type( &_perfmon2_pfm_pmu_type ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_pmu_type(%p): %s", _perfmon2_pfm_pmu_type,
				   pfm_strerror( retval ) );
      return PAPI_ESYS;
   }

   pmu_name[0] = '\0';
   if ( pfm_get_pmu_name( pmu_name, PAPI_MIN_STR_LEN ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_pmu_name(%p,%d): %s", pmu_name, PAPI_MIN_STR_LEN,
				   pfm_strerror( retval ) );
      return PAPI_ESYS;
   }
   SUBDBG( "PMU is a %s, type %d\n", pmu_name, _perfmon2_pfm_pmu_type );

   /* Setup presets */
   retval = _papi_load_preset_table( pmu_name, _perfmon2_pfm_pmu_type, cidx );
   if ( retval )
      return retval;

   /* Fill in cmp_info */

   SUBDBG( "pfm_get_num_events(%p)\n", &ncnt );
   if ( ( retval = pfm_get_num_events( &ncnt ) ) != PFMLIB_SUCCESS ) {
      PAPIERROR( "pfm_get_num_events(%p): %s\n", &ncnt,
				   pfm_strerror( retval ) );
      return PAPI_ESYS;
   }
   SUBDBG( "pfm_get_num_events: %d\n", ncnt );
   my_vector->cmp_info.num_native_events = ncnt;
   num_native_events = ncnt;

   pfm_get_num_counters( ( unsigned int * ) &my_vector->cmp_info.num_cntrs );
   SUBDBG( "pfm_get_num_counters: %d\n", my_vector->cmp_info.num_cntrs );


   if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) {
     /* Pentium4 */
     if ( _papi_hwi_system_info.hw_info.cpuid_family == 15 ) {
       PAPI_NATIVE_EVENT_AND_MASK = 0x000000ff;
       PAPI_NATIVE_UMASK_AND_MASK = 0x0fffff00;
       PAPI_NATIVE_UMASK_SHIFT = 8;
       /* Itanium2 */
     } else if ( _papi_hwi_system_info.hw_info.cpuid_family == 31 ||
		 _papi_hwi_system_info.hw_info.cpuid_family == 32 ) {
       PAPI_NATIVE_EVENT_AND_MASK = 0x00000fff;
       PAPI_NATIVE_UMASK_AND_MASK = 0x0ffff000;
       PAPI_NATIVE_UMASK_SHIFT = 12;
     }
   }


   return PAPI_OK;
}
Example #28
0
/*
 * Open a file descriptor for perf events with `event_name', mmap it, and set
 * things up so that the calling thread receives SIGIO signals from it.
 *
 * Returns the perf_event_handle on success, else folly::none.
 */
folly::Optional<perf_event_handle> enable_event(const char* event_name,
                                                uint64_t sample_freq) {
  struct perf_event_attr attr = {};
  pfm_perf_encode_arg_t arg = {};
  arg.attr = &attr;
  arg.size = sizeof(arg);

  // Populate the `type', `config', and `exclude_*' members on `attr'.
  auto const pfmr = pfm_get_os_event_encoding(event_name, PFM_PLM3,
                                              PFM_OS_PERF_EVENT, &arg);
  if (pfmr != PFM_SUCCESS) {
    Logger::Warning("perf_event: failed to get encoding for %s: %s",
                    event_name, pfm_strerror(pfmr));
    return folly::none;
  }

  // Finish setting up `attr' and open the event.
  attr.size = sizeof(attr);
  attr.disabled = 1;
  attr.sample_freq = sample_freq;
  attr.freq = 1;
  attr.watermark = 0;
  attr.wakeup_events = 1;
  attr.precise_ip = 2;  // request zero skid

  attr.sample_type = PERF_SAMPLE_IP
                   | PERF_SAMPLE_TID
                   | PERF_SAMPLE_ADDR
                   | PERF_SAMPLE_CALLCHAIN
                   ;

  auto const ret = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
  if (ret < 0) {
    // Some machines might not have PEBS support (needed for precise_ip > 0),
    // but then PERF_SAMPLE_ADDR will always return zeros instead of the target
    // memory address.  Just fail silently in this case.
    Logger::Verbose("perf_event: perf_event_open failed with: %s",
                    folly::errnoStr(errno).c_str());
    return folly::none;
  }
  auto const fd = safe_cast<int>(ret);

  // Recent versions of Linux have a CLOEXEC flag for perf_event_open(), but
  // use fcntl() for portability.  Note that since we do this after we open the
  // event, this could in theory race with an exec() from another thread---but
  // that shouldn't be happening anyway.
  fcntl(fd, F_SETFD, O_CLOEXEC);

  // Make sure that any SIGIO sent from `fd' is handled by the calling thread.
  f_owner_ex owner;
  owner.type = F_OWNER_TID;
  owner.pid = syscall(__NR_gettid);

  // Set up `fd' to send SIGIO with sigaction info.
  if (fcntl(fd, F_SETFL, O_ASYNC) < 0 ||
      fcntl(fd, F_SETSIG, SIGIO) < 0 ||
      fcntl(fd, F_SETOWN_EX, &owner) < 0) {
    Logger::Warning("perf_event: failed to set up asynchronous I/O: %s",
                    folly::errnoStr(errno).c_str());
    close(fd);
    return folly::none;
  }

  // Map the ring buffer for our samples.
  auto const base = mmap(nullptr, mmap_sz(), PROT_READ | PROT_WRITE,
                         MAP_SHARED, fd, 0);
  if (base == MAP_FAILED) {
    Logger::Warning("perf_event: failed to mmap perf_event: %s",
                    folly::errnoStr(errno).c_str());
    close(fd);
    return folly::none;
  }
  auto const meta = reinterpret_cast<struct perf_event_mmap_page*>(base);

  auto const pe = perf_event_handle { fd, meta };

  // Reset the event.  This seems to be present in most examples, but it's
  // unclear if it's necessary or just good hygeine.  (It's possible that it's
  // necessary on successive opens.)
  if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) < 0) {
    Logger::Warning("perf_event: failed to reset perf_event: %s",
                    folly::errnoStr(errno).c_str());
    close_event(pe);
    return folly::none;
  }

  // Enable the event.  The man page and other examples of usage all suggest
  // that the right thing to do is to start with the event disabled and then
  // enable it manually afterwards, so we do the same here even though it seems
  // strange and circuitous.
  if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
    Logger::Warning("perf_event: failed to enable perf_event: %s",
                    folly::errnoStr(errno).c_str());
    close_event(pe);
    return folly::none;
  }

  return pe;
}
Example #29
0
int
mainloop(char **arg)
{
	ctx_arg_t ctx;
	pfmlib_input_param_t inp;
	pfmlib_output_param_t outp;
	pfarg_reg_t pd[NUM_PMDS];
	pfarg_reg_t pc[NUM_PMCS];
	pfarg_load_t load_args;
	pfm_msg_t msg;
	unsigned long ovfl_count = 0UL;
	unsigned long sample_period;
	unsigned long smpl_pmd_mask = 0UL;
	pid_t pid;
	int status, ret, fd;
	unsigned int i, num_counters;

	/*
	 * intialize all locals
	 */
	memset(&ctx, 0, sizeof(ctx));
	memset(&inp,0, sizeof(inp));
	memset(&outp,0, sizeof(outp));
	memset(pd, 0, sizeof(pd));
	memset(pc, 0, sizeof(pc));

	/*
	 * locate events
	 */
	pfm_get_num_counters(&num_counters);

	if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS)
		fatal_error("cannot find cycle event\n");

	if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS)
		fatal_error("cannot find inst retired event\n");

	i = 2;

	if (i > num_counters) {
		i = num_counters;
		printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i);
	}
	/*
	 * set the privilege mode:
	 * 	PFM_PLM3 : user level
	 * 	PFM_PLM0 : kernel level
	 */
	inp.pfp_dfl_plm   = PFM_PLM3;
	/*
	 * how many counters we use
	 */
	inp.pfp_event_count = i;

	/*
	 * let the library figure out the values for the PMCS
	 */
	if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) {
		fatal_error("cannot configure events: %s\n", pfm_strerror(ret));
	}
	
	/*
	 * Now prepare the argument to initialize the PMDs and PMCS.
	 * We must pfp_pmc_count to determine the number of PMC to intialize.
	 * We must use pfp_event_count to determine the number of PMD to initialize.
	 * Some events causes extra PMCs to be used, so  pfp_pmc_count may be >= pfp_event_count.
	 *
	 * This step is new compared to libpfm-2.x. It is necessary because the library no
	 * longer knows about the kernel data structures.
	 */

	for (i=0; i < outp.pfp_pmc_count; i++) {
		pc[i].reg_num   = outp.pfp_pmcs[i].reg_num;
		pc[i].reg_value = outp.pfp_pmcs[i].reg_value;
	}

	/*
	 * the PMC controlling the event ALWAYS come first, that's why this loop
	 * is safe even when extra PMC are needed to support a particular event.
	 */
	for (i=0; i < inp.pfp_event_count; i++) {
		pd[i].reg_num   = pc[i].reg_num;
		/* build sampling mask */
		smpl_pmd_mask  |= 1UL << pc[i].reg_num;
	}

	printf("smpl_pmd_mask=0x%lx\n", smpl_pmd_mask);

	/*
	 * now we indicate what to record when each counter overflows.
	 * In our case, we only have one sampling period and it is set for the
	 * first event. Here we indicate that when the sampling period expires
	 * then we want to record the value of all the other counters.
	 *
	 * We exclude the first counter in this case.
	 */
	smpl_pmd_mask  &= ~(1UL << pc[0].reg_num);

	pc[0].reg_smpl_pmds[0] = smpl_pmd_mask;

	/*
	 * we our sampling counter overflow, we want to be notified.
	 * The notification will come ONLY when the sampling buffer
	 * becomes full.
	 *
	 * We also activate randomization of the sampling period.
	 */
	pc[0].reg_flags	|= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM;

	/*
	 * we also want to reset the other PMDs on
	 * every overflow. If we do not set
	 * this, the non-overflowed counters
	 * will be untouched.
	 */
	pc[0].reg_reset_pmds[0] |= smpl_pmd_mask;

	sample_period = 1000000UL;

	pd[0].reg_value       = (~0) - sample_period + 1;
	pd[0].reg_short_reset = (~0) - sample_period + 1;
	pd[0].reg_long_reset  = (~0) - sample_period + 1;
	/*
	 * setup randomization parameters, we allow a range of up to +256 here.
	 */
	pd[0].reg_random_seed = 5;
	pd[0].reg_random_mask = 0xff;


	printf("programming %u PMCS and %u PMDS\n", outp.pfp_pmc_count, inp.pfp_event_count);

	/*
	 * prepare context structure.
	 *
	 * format specific parameters MUST be concatenated to the regular
	 * pfarg_context_t structure. For convenience, the default sampling
	 * format provides a data structure that already combines the pfarg_context_t
	 * with what is needed fot this format.
	 */

	 /*
	  * We initialize the format specific information.
	  * The format is identified by its UUID which must be copied
	  * into the ctx_buf_fmt_id field.
	  */
	memcpy(ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t));

	/*
	 * the size of the buffer is indicated in bytes (not entries).
	 *
	 * The kernel will record into the buffer up to a certain point.
	 * No partial samples are ever recorded.
	 */
	ctx.buf_arg.buf_size = 8192;

	/*
	 * now create our perfmon context.
	 */
	if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) {
		if (errno == ENOSYS) {
			fatal_error("Your kernel does not have performance monitoring support!\n");
		}
		fatal_error("Can't create PFM context %s\n", strerror(errno));
	}

	/*
	 * extract the file descriptor we will use to
	 * identify this newly created context
	 */
	fd = ctx.ctx_arg.ctx_fd;

	/*
	 * retrieve the virtual address at which the sampling
	 * buffer has been mapped
	 */
	buf_addr = ctx.ctx_arg.ctx_smpl_vaddr;

	printf("context [%d] buffer mapped @%p\n", fd, buf_addr);

	/*
	 * Now program the registers
	 */
	if (perfmonctl(fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno);
	}
	/*
	 * initialize the PMDs
	 */
	if (perfmonctl(fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) {
		fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno);
	}

	/*
	 * Create the child task
	 */
	if ((pid=fork()) == -1) fatal_error("Cannot fork process\n");

	/*
	 * In order to get the PFM_END_MSG message, it is important
	 * to ensure that the child task does not inherit the file
	 * descriptor of the context. By default, file descriptor
	 * are inherited during exec(). We explicitely close it
	 * here. We could have set it up through fcntl(FD_CLOEXEC)
	 * to achieve the same thing.
	 */
	if (pid == 0) {
		close(fd);
		child(arg);
	}

	/*
	 * wait for the child to exec
	 */
	waitpid(pid, &status, WUNTRACED);

	/*
	 * process is stopped at this point
	 */
	if (WIFEXITED(status)) {
		warning("task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status));
		goto terminate_session;
	}

	/*
	 * attach context to stopped task
	 */
	load_args.load_pid = pid;
	if (perfmonctl(fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) {
		fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno);
	}
	/*
	 * activate monitoring for stopped task.
	 * (nothing will be measured at this point
	 */
	if (perfmonctl(fd, PFM_START, NULL, 0) == -1) {
		fatal_error(" perfmonctl error PFM_START errno %d\n",errno);
	}
	/*
	 * detach child. Side effect includes
	 * activation of monitoring.
	 */
	ptrace(PTRACE_DETACH, pid, NULL, 0);

	/*
	 * core loop
	 */
	for(;;) {
		/*
		 * wait for overflow/end notification messages
		 */
		ret = read(fd, &msg, sizeof(msg));
		if (ret == -1) {
			fatal_error("cannot read perfmon msg: %s\n", strerror(errno));
		}
		switch(msg.type) {
			case PFM_MSG_OVFL: /* the sampling buffer is full */
				process_smpl_buf(fd, smpl_pmd_mask, 1);
				ovfl_count++;
				break;
			case PFM_MSG_END: /* monitored task terminated */
				printf("task terminated\n");
				goto terminate_session;
			default: fatal_error("unknown message type %d\n", msg.type);
		}
	}
terminate_session:
	/*
	 * cleanup child
	 */
	waitpid(pid, &status, 0);

	/*
	 * check for any leftover samples
	 */
	process_smpl_buf(fd, smpl_pmd_mask, 0);

	/*
	 * destroy perfmon context
	 */
	close(fd);

	printf("%lu samples collected in %lu buffer overflows\n", collect_samples, ovfl_count);

	return 0;
}
Example #30
0
int
main(int argc, char **argv)
{
	struct perf_event_attr attr;
	int fd, ret;
	uint64_t count = 0, values[3];

	setlocale(LC_ALL, "");

	/*
	 * Initialize libpfm library (required before we can use it)
	 */
	ret = pfm_initialize();
	if (ret != PFM_SUCCESS)
		errx(1, "cannot initialize library: %s", pfm_strerror(ret));

	memset(&attr, 0, sizeof(attr));

	/*
 	 * 1st argument: event string
 	 * 2nd argument: default privilege level (used if not specified in the event string)
 	 * 3rd argument: the perf_event_attr to initialize
 	 */
	ret = pfm_get_perf_event_encoding("cycles", PFM_PLM0|PFM_PLM3, &attr, NULL, NULL);
	if (ret != PFM_SUCCESS)
		errx(1, "cannot find encoding: %s", pfm_strerror(ret));

	/*
	 * request timing information because event may be multiplexed
	 * and thus it may not count all the time. The scaling information
	 * will be used to scale the raw count as if the event had run all
	 * along
	 */
	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING;

	/* do not start immediately after perf_event_open() */
	attr.disabled = 1;

	/*
 	 * create the event and attach to self
 	 * Note that it attaches only to the main thread, there is no inheritance
 	 * to threads that may be created subsequently.
 	 *
 	 * if mulithreaded, then getpid() must be replaced by gettid()
 	 */
	fd = perf_event_open(&attr, getpid(), -1, -1, 0);
	if (fd < 0) 
		err(1, "cannot create event");

	/*
 	 * start counting now
 	 */
	ret = ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
	if (ret)
		err(1, "ioctl(enable) failed");

	printf("Fibonacci(%d)=%lu\n", N, fib(N));

	/*
 	 * stop counting
 	 */
	ret = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
	if (ret)
		err(1, "ioctl(disable) failed");

	/*
 	 * read the count + scaling values
 	 *
 	 * It is not necessary to stop an event to read its value
 	 */
	ret = read(fd, values, sizeof(values));
	if (ret != sizeof(values))
		err(1, "cannot read results: %s", strerror(errno));

	/*
 	 * scale count
	 *
	 * values[0] = raw count
	 * values[1] = TIME_ENABLED
	 * values[2] = TIME_RUNNING
 	 */
	if (values[2])
		count = (uint64_t)((double)values[0] * values[1]/values[2]);

	printf("count=%'"PRIu64"\n", count);

	close(fd);

	/* free libpfm resources cleanly */
	pfm_terminate();

	return 0;
}