int _papi_libpfm_ntv_code_to_name( unsigned int EventCode, char *ntv_name, int len ) { int ret; unsigned int event, umask; pfmlib_event_t gete; memset( &gete, 0, sizeof ( gete ) ); if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) return ( PAPI_ENOEVNT ); gete.event = event; gete.num_masks = ( unsigned int ) prepare_umask( umask, gete.unit_masks ); if ( gete.num_masks == 0 ) ret = pfm_get_event_name( gete.event, ntv_name, ( size_t ) len ); else ret = pfm_get_full_event_name( &gete, ntv_name, ( size_t ) len ); if ( ret != PFMLIB_SUCCESS ) { char tmp[PAPI_2MAX_STR_LEN]; pfm_get_event_name( gete.event, tmp, sizeof ( tmp ) ); /* Skip error message if event is not supported by host cpu; * we don't need to give this info away for papi_native_avail util */ if ( ret != PFMLIB_ERR_BADHOST ) PAPIERROR ( "pfm_get_full_event_name(%p(event %d,%s,%d masks),%p,%d): %d -- %s", &gete, gete.event, tmp, gete.num_masks, ntv_name, len, ret, pfm_strerror( ret ) ); if ( ret == PFMLIB_ERR_FULL ) { return PAPI_EBUF; } return PAPI_EMISC; } return PAPI_OK; }
static int show_info(regex_t *preg) { unsigned int i, count = 0, match = 0; int ret; pfm_get_num_events(&count); for(i=0; i < count; i++) { ret = pfm_get_event_name(i, name, max_len+1); /* skip unsupported events */ if (ret != PFMLIB_SUCCESS) continue; if (regexec(preg, name, 0, NULL, 0) == 0) { show_event_info(name, i); match++; } } return match; }
static int show_info_sorted(regex_t *preg) { unsigned int i, n, count = 0, match = 0; int code, ret; code_info_t *codes = NULL; pfm_get_num_events(&count); codes = malloc(count * sizeof(*codes)); if (!codes) fatal_error("cannot allocate memory\n"); for(i=0, n = 0; i < count; i++, n++) { ret = pfm_get_event_code(i, &code); /* skip unsupported events */ if (ret != PFMLIB_SUCCESS) continue; codes[n].idx = i; codes[n].code = code; } qsort(codes, n, sizeof(*codes), compare_codes); for(i=0; i < n; i++) { ret = pfm_get_event_name(codes[i].idx, name, max_len+1); /* skip unsupported events */ if (ret != PFMLIB_SUCCESS) continue; if (regexec(preg, name, 0, NULL, 0) == 0) { show_event_info(name, codes[i].idx); match++; } } free(codes); return match; }
static int pfmon_ita_print_header(FILE *fp) { pfmon_event_set_t *set; pfmlib_ita_input_param_t *mod_in; unsigned int i, k, l; int isn; char *name; size_t len; static const char *insn_str[]={ "ia32/ia64", "ia32", "ia64" }; len = 1 + options.max_event_name_len; name = malloc(len); if (!name) fatal_error("cannot allocate string buffer"); for(k=0, set = options.sets; set; k++, set = set->next) { mod_in = (pfmlib_ita_input_param_t *)set->mod_inp; fprintf(fp, "#\n#\n# instruction sets for set%u:\n", k); for(i=0; i < set->event_count; i++) { pfm_get_event_name(set->inp.pfp_events[i].event, name, len); isn =mod_in->pfp_ita_counters[i].ism; fprintf(fp, "#\tPMD%d: %-*s = %s\n", set->outp.pfp_pmcs[i].reg_num, (int)options.max_event_name_len, name, insn_str[isn]); l--; } fprintf(fp, "#\n"); } free(name); return 0; }
/* * This function checks the configuration to verify * that the user does not try to combine features with * events that are incompatible.The library does this also * but it's hard to then detail the cause of the error. */ static void check_ita_event_combinations(pfmon_event_set_t *set) { unsigned int i, use_opcm, ev; pfmlib_ita_input_param_t *param = set->mod_inp; pfmon_ita_args_t *ita_args; char *name; ita_args = set->mod_args; name = options.ev_name1; use_opcm = param->pfp_ita_pmc8.opcm_used || param->pfp_ita_pmc9.opcm_used; for (i=0; i < set->event_count; i++) { ev = set->inp.pfp_events[i].event; pfm_get_event_name(ev, name, options.max_event_name_len+1); if (use_opcm && pfm_ita_support_opcm(ev) == 0) fatal_error("event %s does not support opcode matching\n", name); if (param->pfp_ita_irange.rr_used && pfm_ita_support_iarr(ev) == 0) fatal_error("event %s does not support instruction address range restrictions\n", name); if (param->pfp_ita_drange.rr_used && pfm_ita_support_darr(ev) == 0) fatal_error("event %s does not support data address range restrictions\n", name); if (ita_args->opt_ia32 && ita_args->opt_ia64 == 0 && pfm_ita_is_btb(ev)) fatal_error("cannot use BTB event (%s) when only monitoring IA-32 execution\n", name); } /* * we do not call check_counter_conflict() because Itanium does not have events * which can only be measured on one counter, therefore this routine would not * catch anything at all. */ }
int main(int argc, char **argv) { char **p; int i, ret; pid_t pid = getpid(); pfmlib_param_t evt; pfarg_reg_t pd[NUM_PMDS]; pfarg_context_t ctx[1]; pfmlib_options_t pfmlib_options; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { printf("Can't initialize library\n"); exit(1); } /* * check that the user did not specify too many events */ if (argc-1 > pfm_get_num_counters()) { printf("Too many events specified\n"); exit(1); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&evt,0, sizeof(evt)); /* * be nice to user! */ p = argc > 1 ? argv+1 : event_list; for (i=0; *p ; i++, p++) { if (pfm_find_event(*p, &evt.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("Cannot find %s event\n", *p); } } /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ evt.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ evt.pfp_event_count = i; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&evt)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * for this example, we have decided not to get notified * on counter overflows and the monitoring is not to be inherited * in derived tasks. */ ctx[0].ctx_flags = PFM_FL_INHERIT_NONE; /* * now create the context for self monitoring/per-task */ if (perfmonctl(pid, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * Must be done before any PMD/PMD calls (unfreeze PMU). Initialize * PMC/PMD to safe values. psr.up is cleared. */ if (perfmonctl(pid, PFM_ENABLE, NULL, 0) == -1) { fatal_error("perfmonctl error PFM_ENABLE errno %d\n",errno); } /* * Now prepare the argument to initialize the PMDs. * the memset(pd) initialized the entire array to zero already, so * we just have to fill in the register numbers from the pc[] array. */ for (i=0; i < evt.pfp_event_count; i++) { pd[i].reg_num = evt.pfp_pc[i].reg_num; } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (perfmonctl(pid, PFM_WRITE_PMCS, evt.pfp_pc, evt.pfp_pc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(pid, PFM_WRITE_PMDS, pd, evt.pfp_event_count) == -1) { {int i; for(i=0; i < evt.pfp_event_count; i++) printf("pmd%d: 0x%x\n", i, pd[i].reg_flags);} fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * Let's roll now */ pfm_start(); noploop(10000000); pfm_stop(); /* * now read the results */ if (perfmonctl(pid, PFM_READ_PMDS, pd, evt.pfp_event_count) == -1) { fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); return -1; } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < evt.pfp_event_count; i++) { char *name; pfm_get_event_name(evt.pfp_events[i].event, &name); printf("PMD%u %20lu %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * let's stop this now */ if (perfmonctl(pid, PFM_DESTROY_CONTEXT, NULL, 0) == -1) { fatal_error( "child: perfmonctl error PFM_DESTROY errno %d\n",errno); } return 0; }
int main(void) { int ret; int type = 0; char *name; pid_t pid = getpid(); pfmlib_param_t evt; pfmlib_ita2_param_t ita2_param; pfarg_reg_t pd[NUM_PMDS]; pfarg_context_t ctx[1]; pfmlib_options_t pfmlib_options; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { fatal_error("Can't initialize library\n"); } /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char *model; pfm_get_pmu_name(&model); fatal_error("this program does not work with the %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); memset(&evt,0, sizeof(evt)); memset(&ita2_param,0, sizeof(ita2_param)); /* * because we use a model specific feature, we must initialize the * model specific pfmlib parameter structure and link it to the * common structure. * The magic number is a simple mechanism used by the library to check * that the model specific data structure is decent. You must set it manually * otherwise the model specific feature won't work. */ ita2_param.pfp_magic = PFMLIB_ITA2_PARAM_MAGIC; evt.pfp_model = &ita2_param; /* * We indicate that we are using the PMC8 opcode matcher. This is required * otherwise the library add PMC8 to the list of PMC to pogram during * pfm_dispatch_events(). */ ita2_param.pfp_ita2_pmc8.opcm_used = 1; /* * We want to match all the br.cloop in our test function. * This branch is an IP-relative branch for which the major * opcode (bits [40-37]=4) and the btype field is 5 (which represents * bits[6-8]) so it is included in the match/mask fields of PMC8. * It is necessarily in a B slot. * * We don't care which operands are used with br.cloop therefore * the mask field of pmc8 is set such that only the 4 bits of the * opcode and 3 bits of btype must match exactly. This is accomplished by * clearing the top 4 bits and bits [6-8] of the mask field and setting the * remaining bits. Similarly, the match field only has the opcode value and btype * set according to the encoding of br.cloop, the * remaining bits are zero. Bit 60 of PMC8 is set to indicate * that we look only in B slots (this is the only possibility for * this instruction anyway). * * So the binary representation of the value for PMC8 is as follows: * * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 * ---------------------------------------------------------------- * 0001010000000000000000101000000000000011111111111111000111111000 * * which yields a value of 0x1400028003fff1f8. * * Depending on the level of optimization to compile this code, it may * be that the count reported could be zero, if the compiler uses a br.cond * instead of br.cloop. * * * The 0x1 sets the ig_ad field to make sure we ignore any range restriction. * Also bit 2 must always be set */ ita2_param.pfp_ita2_pmc8.pmc_val = 0x1400028003fff1fa; /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_event_byname("IA64_TAGGED_INST_RETIRED_IBRP0_PMC8", &evt.pfp_events[0].event) != PFMLIB_SUCCESS) { fatal_error("cannot find event IA64_TAGGED_INST_RETIRED_IBRP0_PMC8\n"); } /* * set the privilege mode: * PFM_PLM3 : user level only */ evt.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ evt.pfp_event_count = 1; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&evt)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * for this example, we have decided not to get notified * on counter overflows and the monitoring is not to be inherited * in derived tasks */ ctx[0].ctx_flags = PFM_FL_INHERIT_NONE; /* * now create the context for self monitoring/per-task */ if (perfmonctl(pid, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * Must be done before any PMD/PMD calls (unfreeze PMU). Initialize * PMC/PMD to safe values. psr.up is cleared. */ if (perfmonctl(pid, PFM_ENABLE, NULL, 0) == -1) { fatal_error("perfmonctl error PFM_ENABLE errno %d\n",errno); } /* * Now prepare the argument to initialize the PMD. */ pd[0].reg_num = evt.pfp_pc[0].reg_num; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (perfmonctl(pid, PFM_WRITE_PMCS, evt.pfp_pc, evt.pfp_pc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(pid, PFM_WRITE_PMDS, pd, evt.pfp_event_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * Let's roll now. */ pfm_start(); do_test(100UL); pfm_stop(); /* * now read the results */ if (perfmonctl(pid, PFM_READ_PMDS, pd, evt.pfp_event_count) == -1) { fatal_error("perfmonctl error READ_PMDS errno %d\n",errno); } /* * print the results */ pfm_get_event_name(evt.pfp_events[0].event, &name); printf("PMD%u %20lu %s\n", pd[0].reg_num, pd[0].reg_value, name); if (pd[0].reg_value != 0) printf("compiler used br.cloop\n"); else printf("compiler did not use br.cloop\n"); /* * let's stop this now */ if (perfmonctl(pid, PFM_DESTROY_CONTEXT, NULL, 0) == -1) { fatal_error("perfmonctl error PFM_DESTROY errno %d\n",errno); } return 0; }