void Thread( int n ) { int retval, num_tests = 1, tmp; int EventSet1 = PAPI_NULL; int mask1 = 0x5; int num_events1; long long **values; long long elapsed_us, elapsed_cyc; EventSet1 = add_test_events( &num_events1, &mask1 ); /* num_events1 is greater than num_events2 so don't worry. */ values = allocate_test_space( num_tests, num_events1 ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_start( EventSet1 ); if ( retval >= PAPI_OK ) exit( 1 ); do_flops( n ); retval = PAPI_stop( EventSet1, values[0] ); if ( retval >= PAPI_OK ) exit( 1 ); elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; remove_test_events( &EventSet1, mask1 ); printf( "Thread %#x PAPI_FP_INS : \t%lld\n", pthread_self( ), ( values[0] )[0] ); printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", pthread_self( ), ( values[0] )[1] ); printf( "Thread %#x Real usec : \t%lld\n", pthread_self( ), elapsed_us ); printf( "Thread %#x Real cycles : \t%lld\n", pthread_self( ), elapsed_cyc ); free_test_space( values, num_tests ); }
int add_two_events( int *num_events, int *papi_event, int *mask ) { /* query and set up the right event to monitor */ int EventSet = PAPI_NULL; PAPI_event_info_t info; unsigned int potential_evt_to_add[3][2] = { {( unsigned int ) PAPI_FP_INS, MASK_FP_INS}, {( unsigned int ) PAPI_FP_OPS, MASK_FP_OPS}, {( unsigned int ) PAPI_TOT_INS, MASK_TOT_INS} }; int i = 0; int counters, event_found = 0; *mask = 0; counters = PAPI_num_hwctrs( ); if (counters<=0) { test_fail(__FILE__,__LINE__,"Zero Counters Available! PAPI Won't like this!\n",0); } /* This code tries to ensure that the event generated will fit in the */ /* number of available counters. It doesn't account for the number */ /* of counters used by the cycle counter. */ for(i=0;i<3;i++) { if ( PAPI_query_event( (int) potential_evt_to_add[i][0] ) == PAPI_OK ) { if ( PAPI_get_event_info( (int) potential_evt_to_add[i][0], &info ) == PAPI_OK ) { if ( ( info.count > 0 ) && ( (unsigned) counters > info.count ) ) { event_found = 1; break; } } } } if ( event_found ) { *papi_event = ( int ) potential_evt_to_add[i][0]; *mask = ( int ) potential_evt_to_add[i][1] | MASK_TOT_CYC; EventSet = add_test_events( num_events, mask, 1 ); } else { test_fail( __FILE__, __LINE__, "Not enough room to add an event!", 0 ); } return EventSet; }
int add_two_nonderived_events( int *num_events, int *papi_event, int *mask ) { /* query and set up the right event to monitor */ int EventSet = PAPI_NULL; #define POTENTIAL_EVENTS 3 unsigned int potential_evt_to_add[POTENTIAL_EVENTS][2] = { {( unsigned int ) PAPI_FP_INS, MASK_FP_INS}, {( unsigned int ) PAPI_FP_OPS, MASK_FP_OPS}, {( unsigned int ) PAPI_TOT_INS, MASK_TOT_INS} }; int event_found = 0,i; *mask = 0; for(i=0;i<POTENTIAL_EVENTS;i++) { if ( PAPI_query_event( ( int ) potential_evt_to_add[i][0] ) == PAPI_OK ) { if ( !is_event_derived(potential_evt_to_add[i][0])) { event_found = 1; break; } } } if ( event_found ) { *papi_event = ( int ) potential_evt_to_add[i][0]; *mask = ( int ) potential_evt_to_add[i][1] | MASK_TOT_CYC; EventSet = add_test_events( num_events, mask, 0 ); } else { test_fail( __FILE__, __LINE__, "Not enough room to add an event!", 0 ); } return EventSet; }
int add_two_nonderived_events( int *num_events, int *papi_event, int *mask ) { /* query and set up the right event to monitor */ int EventSet = PAPI_NULL; #define POTENTIAL_EVENTS 3 unsigned int potential_evt_to_add[POTENTIAL_EVENTS][2] = { {( unsigned int ) PAPI_FP_INS, MASK_FP_INS}, {( unsigned int ) PAPI_FP_OPS, MASK_FP_OPS}, {( unsigned int ) PAPI_TOT_INS, MASK_TOT_INS} }; int i; *mask = 0; /* could leak up to two event sets. */ for(i=0;i<POTENTIAL_EVENTS;i++) { if ( PAPI_query_event( ( int ) potential_evt_to_add[i][0] ) == PAPI_OK ) { if ( !is_event_derived(potential_evt_to_add[i][0])) { *papi_event = ( int ) potential_evt_to_add[i][0]; *mask = ( int ) potential_evt_to_add[i][1] | MASK_TOT_CYC; EventSet = add_test_events( num_events, mask, 0 ); if ( *num_events == 2 ) break; } } } if ( i == POTENTIAL_EVENTS ) { test_fail( __FILE__, __LINE__, "Can't find a non-derived event!", 0 ); } return EventSet; }
int main(int argc, char **argv) { int i, num_events, num_tests = 6, mask = 0x1; int EventSet = PAPI_NULL; unsigned short **buf = (unsigned short **)profbuf; unsigned long length, blength; int num_buckets; PAPI_sprofil_t sprof[3]; int retval; const PAPI_hw_info_t *hw_info; const PAPI_exe_info_t *prginfo; caddr_t start, end; prof_init(argc, argv, &hw_info, &prginfo); start = prginfo->address_info.text_start; end = prginfo->address_info.text_end; if (start > end) test_fail(__FILE__, __LINE__, "Profile length < 0!", PAPI_ESBSTR); length = end - start; prof_print_address("Test case sprofile: POSIX compatible profiling over multiple regions.\n",prginfo); blength = prof_size(length, FULL_SCALE, PAPI_PROFIL_BUCKET_16, &num_buckets); prof_alloc(3, blength); /* First half */ sprof[0].pr_base = buf[0]; sprof[0].pr_size = blength; sprof[0].pr_off = (caddr_t) DO_FLOPS; #if defined(linux) && defined(__ia64__) if (!TESTS_QUIET) fprintf(stderr, "do_flops is at %p %p\n", &do_flops, sprof[0].pr_off); #endif sprof[0].pr_scale = FULL_SCALE; /* Second half */ sprof[1].pr_base = buf[1]; sprof[1].pr_size = blength; sprof[1].pr_off = (caddr_t) DO_READS; #if defined(linux) && defined(__ia64__) if (!TESTS_QUIET) fprintf(stderr, "do_reads is at %p %p\n", &do_reads, sprof[1].pr_off); #endif sprof[1].pr_scale = FULL_SCALE; /* Overflow bin */ sprof[2].pr_base = buf[2]; sprof[2].pr_size = 1; sprof[2].pr_off = 0; sprof[2].pr_scale = 0x2; EventSet = add_test_events(&num_events, &mask); values = allocate_test_space(num_tests, num_events); if ((retval = PAPI_sprofil(sprof, 3, EventSet, PAPI_TOT_CYC, THRESHOLD, PAPI_PROFIL_POSIX | PAPI_PROFIL_BUCKET_16)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_sprofil", retval); do_stuff(); if ((retval = PAPI_start(EventSet)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_start", retval); do_stuff(); if ((retval = PAPI_stop(EventSet, values[1])) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_stop", retval); /* clear the profile flag before removing the event */ if ((retval = PAPI_sprofil(sprof, 3, EventSet, PAPI_TOT_CYC, 0, PAPI_PROFIL_POSIX | PAPI_PROFIL_BUCKET_16)) != PAPI_OK) test_fail(__FILE__, __LINE__, "PAPI_sprofil", retval); remove_test_events(&EventSet, mask); if (!TESTS_QUIET) { printf("Test case: PAPI_sprofil()\n"); printf("---------Buffer 1--------\n"); for (i = 0; i < length / 2; i++) { if (buf[0][i]) printf("0x%lx\t%d\n", DO_FLOPS + 2 * i, buf[0][i]); } printf("---------Buffer 2--------\n"); for (i = 0; i < length / 2; i++) { if (buf[1][i]) printf("0x%lx\t%d\n", DO_READS + 2 * i, buf[1][i]); } printf("-------------------------\n"); printf("%u samples fell outside the regions.\n", *buf[2]); } retval = prof_check(2, PAPI_PROFIL_BUCKET_16, num_buckets); for (i=0;i<3;i++) { free(profbuf[i]); } if (retval == 0) test_fail(__FILE__, __LINE__, "No information in buffers", 1); test_pass(__FILE__, values, num_tests); exit(1); }
int main( int argc, char **argv ) { long length; int mask; int retval; const PAPI_exe_info_t *prginfo; caddr_t start, end; prof_init( argc, argv, &prginfo ); hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); mask = MASK_TOT_CYC | MASK_TOT_INS | MASK_FP_OPS | MASK_L2_TCM; #if defined(__powerpc__) if ( strcmp( hw_info->model_string, "POWER6" ) == 0 ) mask = MASK_TOT_CYC | MASK_FP_INS; else mask = MASK_TOT_CYC | MASK_TOT_INS | MASK_FP_INS; #endif #if defined(ITANIUM2) mask = MASK_TOT_CYC | MASK_FP_OPS | MASK_L2_TCM | MASK_L1_DCM; #endif EventSet = add_test_events( &num_events, &mask, 0 ); values = allocate_test_space( 1, num_events ); /* profile the cleara and my_main address space */ start = ( caddr_t ) cleara; end = ( caddr_t ) my_dummy; /* Itanium and PowerPC64 processors return function descriptors instead * of function addresses. You must dereference the descriptor to get the address. */ #if defined(ITANIUM1) || defined(ITANIUM2) || defined(__powerpc64__) start = ( caddr_t ) ( ( ( struct fdesc * ) start )->ip ); end = ( caddr_t ) ( ( ( struct fdesc * ) end )->ip ); #endif /* call dummy so it doesn't get optimized away */ retval = my_dummy( 1 ); length = end - start; if ( length < 0 ) test_fail( __FILE__, __LINE__, "Profile length < 0!", ( int ) length ); prof_print_address ( "Test case byte_profile: Multi-event profiling at byte resolution.\n", prginfo ); prof_print_prof_info( start, end, THRESHOLD, event_name ); retval = do_profile( start, ( unsigned ) length, FULL_SCALE * 2, THRESHOLD, PAPI_PROFIL_BUCKET_32, mask ); remove_test_events( &EventSet, mask ); if ( retval ) test_pass( __FILE__, values, 1 ); else test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); return 1; }
int main( int argc, char **argv ) { int retval, num_tests = 30, tmp; int EventSet1 = PAPI_NULL; int EventSet2 = PAPI_NULL; int EventSet3 = PAPI_NULL; int mask1 = MASK1; int mask2 = MASK2; int mask3 = MASK3; int num_events1; int num_events2; int num_events3; long long **values; int i, j; long long min[3]; long long max[3]; long long sum[3]; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); /* Make sure that required resources are available */ /* Skip (don't fail!) if they are not */ retval = PAPI_query_event( EVT1 ); if ( retval != PAPI_OK ) test_skip( __FILE__, __LINE__, EVT1_STR, retval ); retval = PAPI_query_event( EVT2 ); if ( retval != PAPI_OK ) test_skip( __FILE__, __LINE__, EVT2_STR, retval ); retval = PAPI_query_event( EVT3 ); if ( retval != PAPI_OK ) test_skip( __FILE__, __LINE__, EVT3_STR, retval ); EventSet1 = add_test_events( &num_events1, &mask1, 1 ); EventSet2 = add_test_events( &num_events2, &mask2, 1 ); EventSet3 = add_test_events( &num_events3, &mask3, 1 ); values = allocate_test_space( num_tests, 1 ); /* Warm me up */ do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); for ( i = 0; i < 10; i++ ) { retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); retval = PAPI_stop( EventSet1, values[( i * 3 ) + 0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_start( EventSet2 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); retval = PAPI_stop( EventSet2, values[( i * 3 ) + 1] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_start( EventSet3 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); retval = PAPI_stop( EventSet3, values[( i * 3 ) + 2] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } remove_test_events( &EventSet1, mask1 ); remove_test_events( &EventSet2, mask2 ); remove_test_events( &EventSet3, mask3 ); for ( j = 0; j < 3; j++ ) { min[j] = 65535; max[j] = sum[j] = 0; } for ( i = 0; i < 10; i++ ) { for ( j = 0; j < 3; j++ ) { if ( min[j] > values[( i * 3 ) + j][0] ) min[j] = values[( i * 3 ) + j][0]; if ( max[j] < values[( i * 3 ) + j][0] ) max[j] = values[( i * 3 ) + j][0]; sum[j] += values[( i * 3 ) + j][0]; } } if ( !TESTS_QUIET ) { printf( "Test case 10: start, stop for derived event %s.\n", CACHE_LEVEL ); printf( "--------------------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", ITERS ); printf( "Repeated 10 times\n" ); printf ( "-------------------------------------------------------------------------\n" ); /* for (i=0;i<10;i++) { printf("Test type : %12s%13s%13s\n", "1", "2", "3"); printf(TAB3, EVT1_STR, values[(i*3)+0][0], (long long)0, (long long)0); printf(TAB3, EVT2_STR, (long long)0, values[(i*3)+1][0], (long long)0); printf(TAB3, EVT3_STR, (long long)0, (long long)0, values[(i*3)+2][0]); printf ("-------------------------------------------------------------------------\n"); } */ printf( "Test type : %12s%13s%13s\n", "min", "max", "sum" ); printf( TAB3, EVT1_STR, min[0], max[0], sum[0] ); printf( TAB3, EVT2_STR, min[1], max[1], sum[1] ); printf( TAB3, EVT3_STR, min[2], max[2], sum[2] ); printf ( "-------------------------------------------------------------------------\n" ); printf( "Verification:\n" ); #if defined(sun) && defined(sparc) printf( TAB1, "Sum 1 approximately equals sum 2 - sum 3 or", ( sum[1] - sum[2] ) ); #else printf( TAB1, "Sum 1 approximately equals sum 2 + sum 3 or", ( sum[1] + sum[2] ) ); #endif } { long long tmin, tmax; #if defined(sun) && defined(sparc) tmax = ( long long ) ( sum[1] - sum[2] ); #else tmax = ( long long ) ( sum[1] + sum[2] ); #endif printf( "percent error: %f\n", ( float ) ( abs( ( int ) ( tmax - sum[0] ) ) * 100 / sum[0] ) ); tmin = ( long long ) ( ( double ) tmax * 0.8 ); tmax = ( long long ) ( ( double ) tmax * 1.2 ); if ( sum[0] > tmax || sum[0] < tmin ) test_fail( __FILE__, __LINE__, CACHE_LEVEL, 1 ); } test_pass( __FILE__, values, num_tests ); exit( 1 ); }