int main( int argc, char **argv ) { int i; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ // if ( ( retval = // PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) // test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); printf( "\n----------------------------------\n" ); printf( "For PAPI Version: %d.%d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ), PAPI_VERSION_INCREMENT( PAPI_VERSION ) ); printf( "There are %d error codes defined\n", PAPI_NUM_ERRORS ); printf( "----------------------------------\n" ); for (i = 0; i < PAPI_NUM_ERRORS; i++) { char *errstr, *errdescr; errstr = PAPI_strerror( -i ); errdescr = PAPI_descr_error( -i ); printf( "Error code %4d: %-15s | %s\n", -i, errstr, errdescr ); } printf( "----------------------------------\n\n" ); exit( 1 ); }
/* Support routine to display header information to the screen from the hardware info data structure. The same code was duplicated in a number of tests and utilities. Seems to make sense to refactor. This may not be the best place for it to live, but it works for now. */ int papi_print_header( char *prompt, const PAPI_hw_info_t ** hwinfo ) { if ( ( *hwinfo = PAPI_get_hardware_info( ) ) == NULL ) return ( PAPI_ESBSTR ); printf( "%s", prompt ); printf ( "--------------------------------------------------------------------------------\n" ); printf( "PAPI Version : %d.%d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ), PAPI_VERSION_INCREMENT( PAPI_VERSION ) ); printf( "Vendor string and code : %s (%d)\n", ( *hwinfo )->vendor_string, ( *hwinfo )->vendor ); printf( "Model string and code : %s (%d)\n", ( *hwinfo )->model_string, ( *hwinfo )->model ); printf( "CPU Revision : %f\n", ( *hwinfo )->revision ); if ( ( *hwinfo )->cpuid_family > 0 ) printf ( "CPUID Info : Family: %d Model: %d Stepping: %d\n", ( *hwinfo )->cpuid_family, ( *hwinfo )->cpuid_model, ( *hwinfo )->cpuid_stepping ); printf( "CPU Megahertz : %f\n", ( *hwinfo )->mhz ); printf( "CPU Clock Megahertz : %d\n", ( *hwinfo )->clock_mhz ); if ( ( *hwinfo )->threads > 0 ) printf( "Hdw Threads per core : %d\n", ( *hwinfo )->threads ); if ( ( *hwinfo )->cores > 0 ) printf( "Cores per Socket : %d\n", ( *hwinfo )->cores ); if ( ( *hwinfo )->sockets > 0 ) printf( "Sockets : %d\n", ( *hwinfo )->sockets ); if ( ( *hwinfo )->nnodes > 0 ) printf( "NUMA Nodes : %d\n", ( *hwinfo )->nnodes ); printf( "CPUs per Node : %d\n", ( *hwinfo )->ncpu ); printf( "Total CPUs : %d\n", ( *hwinfo )->totalcpus ); printf( "Running in a VM : %s\n", ( *hwinfo )->virtualized? "yes":"no"); if ( (*hwinfo)->virtualized) { printf( "VM Vendor: : %s\n", (*hwinfo)->virtual_vendor_string); } printf( "Number Hardware Counters : %d\n", PAPI_get_opt( PAPI_MAX_HWCTRS, NULL ) ); printf( "Max Multiplex Counters : %d\n", PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL ) ); printf ( "--------------------------------------------------------------------------------\n" ); printf( "\n" ); return PAPI_OK; }
/* Support routine to display header information to the screen from the hardware info data structure. The same code was duplicated in a number of tests and utilities. Seems to make sense to refactor. This may not be the best place for it to live, but it works for now. */ int papi_print_header( char *prompt, int event_flag, const PAPI_hw_info_t ** hwinfo ) { if ( ( *hwinfo = PAPI_get_hardware_info( ) ) == NULL ) return ( PAPI_ESBSTR ); printf( "%s", prompt ); printf ( "--------------------------------------------------------------------------------\n" ); printf( "PAPI Version : %d.%d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ), PAPI_VERSION_INCREMENT( PAPI_VERSION ) ); printf( "Vendor string and code : %s (%d)\n", ( *hwinfo )->vendor_string, ( *hwinfo )->vendor ); printf( "Model string and code : %s (%d)\n", ( *hwinfo )->model_string, ( *hwinfo )->model ); printf( "CPU Revision : %f\n", ( *hwinfo )->revision ); if ( ( *hwinfo )->cpuid_family > 0 ) printf ( "CPUID Info : Family: %d Model: %d Stepping: %d\n", ( *hwinfo )->cpuid_family, ( *hwinfo )->cpuid_model, ( *hwinfo )->cpuid_stepping ); printf( "CPU Megahertz : %f\n", ( *hwinfo )->mhz ); printf( "CPU Clock Megahertz : %d\n", ( *hwinfo )->clock_mhz ); if ( ( *hwinfo )->threads > 0 ) printf( "Hdw Threads per core : %d\n", ( *hwinfo )->threads ); if ( ( *hwinfo )->cores > 0 ) printf( "Cores per Socket : %d\n", ( *hwinfo )->cores ); if ( ( *hwinfo )->sockets > 0 ) printf( "Sockets : %d\n", ( *hwinfo )->sockets ); if ( ( *hwinfo )->nnodes > 0 ) printf( "NUMA Nodes : %d\n", ( *hwinfo )->nnodes ); printf( "CPU's per Node : %d\n", ( *hwinfo )->ncpu ); printf( "Total CPU's : %d\n", ( *hwinfo )->totalcpus ); printf( "Number Hardware Counters : %d\n", PAPI_get_opt( PAPI_MAX_HWCTRS, NULL ) ); printf( "Max Multiplex Counters : %d\n", PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL ) ); printf ( "--------------------------------------------------------------------------------\n" ); if ( event_flag ) printf ( "The following correspond to fields in the PAPI_event_info_t structure.\n" ); printf( "\n" ); return ( PAPI_OK ); }
/** * Called by the CBTF collector service in order to start data collection. */ void cbtf_collector_start(const CBTF_DataHeader* header) { /** * Start sampling. * * Starts hardware counter (HWC) sampling for the thread executing this * function. Initializes the appropriate thread-local data structures and * then enables the sampling counter. * * @param arguments Encoded function arguments. */ /* Create and access our thread-local storage */ #ifdef USE_EXPLICIT_TLS TLS* tls = malloc(sizeof(TLS)); Assert(tls != NULL); CBTF_SetTLS(TLSKey, tls); #else TLS* tls = &the_tls; #endif Assert(tls != NULL); tls->defer_sampling=false; #ifndef NDEBUG IsCollectorDebugEnabled = (getenv("CBTF_DEBUG_COLLECTOR") != NULL); IsCollectorDetailsDebugEnabled = (getenv("CBTF_DEBUG_COLLECTOR_DETAILS") != NULL); #if defined (HAVE_OMPT) IsOMPTDebugEnabled = (getenv("CBTF_DEBUG_COLLECTOR_OMPT") != NULL); #endif #endif /* Decode the passed function arguments */ // Need to handle the arguments... CBTF_hwcsamp_start_sampling_args args; memset(&args, 0, sizeof(args)); args.sampling_rate = 100; /* First set defaults */ int hwcsamp_rate = 100; char* hwcsamp_papi_event = "PAPI_TOT_CYC,PAPI_TOT_INS"; #if defined (CBTF_SERVICE_USE_OFFLINE) char* hwcsamp_event_param = getenv("CBTF_HWCSAMP_EVENTS"); if (hwcsamp_event_param != NULL) { hwcsamp_papi_event=hwcsamp_event_param; } const char* sampling_rate = getenv("CBTF_HWCSAMP_RATE"); if (sampling_rate != NULL) { hwcsamp_rate=atoi(sampling_rate); } args.collector = 1; args.experiment = 0; tls->data.interval = (uint64_t)(1000000000) / (uint64_t)(hwcsamp_rate);; #endif /* Initialize the actual data blob */ memcpy(&tls->header, header, sizeof(CBTF_DataHeader)); initialize_data(tls); /* We can not assign mpi rank in the header at this point as it may not * be set yet. assign an integer tid value. omp_tid is used regardless of * whether the application is using openmp threads. * libmonitor uses the same numbering scheme as openmp. */ tls->header.omp_tid = monitor_get_thread_num(); tls->header.id = strdup(cbtf_collector_unique_id); tls->header.time_begin = CBTF_GetTime(); #ifndef NDEBUG if (IsCollectorDebugEnabled) { fprintf(stderr,"[%ld,%d] ENTER cbtf_collector_start\n",tls->header.pid,tls->header.omp_tid); } #endif if(hwcsamp_papi_init_done == 0) { #ifndef NDEBUG if (IsCollectorDebugEnabled) { fprintf(stderr,"[%ld,%d] cbtf_collector_start: initialize papi\n",tls->header.pid,tls->header.omp_tid); } #endif CBTF_init_papi(); tls->EventSet = PAPI_NULL; tls->data.clock_mhz = (float) hw_info->mhz; hwcsamp_papi_init_done = 1; } else { tls->data.clock_mhz = (float) hw_info->mhz; } /* PAPI SETUP */ CBTF_Create_Eventset(&tls->EventSet); int rval = PAPI_OK; #ifndef NDEBUG if (IsCollectorDebugEnabled) { fprintf(stderr, "PAPI Version: %d.%d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ), PAPI_VERSION_INCREMENT( PAPI_VERSION ) ); fprintf(stderr,"System has %d hardware counters.\n", PAPI_num_counters()); } #endif /* In Component PAPI, EventSets must be assigned a component index * before you can fiddle with their internals. 0 is always the cpu component */ #if (PAPI_VERSION_MAJOR(PAPI_VERSION)>=4) rval = PAPI_assign_eventset_component( tls->EventSet, 0 ); if (rval != PAPI_OK) { CBTF_PAPIerror(rval,"CBTF_Create_Eventset assign_eventset_component"); return; } #endif /* NOTE: if multiplex is turned on, papi internaly uses a SIGPROF handler. * Since we are sampling potentially with SIGPROF or now SIGRTMIN and we * prefer to limit our events to 6, we do not need multiplexing. */ if (getenv("CBTF_HWCSAMP_MULTIPLEX") != NULL) { #if !defined(RUNTIME_PLATFORM_BGP) rval = PAPI_set_multiplex( tls->EventSet ); if ( rval == PAPI_ENOSUPP) { fprintf(stderr,"CBTF_Create_Eventset: Multiplex not supported\n"); } else if (rval != PAPI_OK) { CBTF_PAPIerror(rval,"CBTF_Create_Eventset set_multiplex"); } #endif } /* TODO: check return values of direct PAPI calls * and handle them as needed. */ /* Rework the code here to call PAPI directly rather than * call any OPENSS helper functions due to inconsitent * behaviour seen on various lab systems */ int eventcode = 0; rval = PAPI_OK; if (hwcsamp_papi_event != NULL) { char *tfptr, *saveptr=NULL, *tf_token; tfptr = strdup(hwcsamp_papi_event); for (tf_token = strtok_r(tfptr, ",", &saveptr); tf_token != NULL; tf_token = strtok_r(NULL, ",", &saveptr) ) { PAPI_event_name_to_code(tf_token,&eventcode); rval = PAPI_add_event(tls->EventSet,eventcode); if (rval != PAPI_OK) { CBTF_PAPIerror(rval,"CBTF_Create_Eventset PAPI_event_name_to_code"); } } if (tfptr) free(tfptr); } else { PAPI_event_name_to_code("PAPI_TOT_CYC",&eventcode); rval = PAPI_add_event(tls->EventSet,eventcode); PAPI_event_name_to_code("PAPI_TOT_INS",&eventcode); rval = PAPI_add_event(tls->EventSet,eventcode); } #if defined (HAVE_OMPT) /* these are ompt specific.*/ /* initialize the flags and counts for idle,wait_barrier. */ tls->thread_idle = tls->thread_wait_barrier = tls->thread_barrier = false; #endif /* Begin sampling */ tls->header.time_begin = CBTF_GetTime(); CBTF_Start(tls->EventSet); CBTF_Timer(tls->data.interval, hwcsampTimerHandler); }