int _papi_hwi_lookup_or_create_cpu( CpuInfo_t **here, unsigned int cpu_num ) { APIDBG("Entry: here: %p\n", here); CpuInfo_t *tmp = NULL; int retval = PAPI_OK; _papi_hwi_lock( CPUS_LOCK ); tmp = _papi_hwi_lookup_cpu(cpu_num); if ( tmp == NULL ) { retval = _papi_hwi_initialize_cpu( &tmp, cpu_num ); } /* Increment use count */ tmp->num_users++; if ( retval == PAPI_OK ) { *here = tmp; } _papi_hwi_unlock( CPUS_LOCK ); return retval; }
int _papi_hwi_init_global_threads( void ) { int retval; ThreadInfo_t *tmp; _papi_hwi_lock( GLOBAL_LOCK ); #if defined(HAVE_THREAD_LOCAL_STORAGE) _papi_hwi_my_thread = NULL; #endif _papi_hwi_thread_head = NULL; _papi_hwi_thread_id_fn = NULL; #if defined(ANY_THREAD_GETS_SIGNAL) _papi_hwi_thread_kill_fn = NULL; #endif retval = _papi_hwi_initialize_thread( &tmp , 0); if ( retval == PAPI_OK ) retval = lookup_and_set_thread_symbols( ); _papi_hwi_unlock( GLOBAL_LOCK ); return ( retval ); }
static void insert_cpu( CpuInfo_t * entry ) { THRDBG("Entry: entry: %p\n", entry); _papi_hwi_lock( CPUS_LOCK ); if ( _papi_hwi_cpu_head == NULL ) { /* 0 elements */ THRDBG( "_papi_hwi_cpu_head is NULL\n" ); entry->next = entry; } else if ( _papi_hwi_cpu_head->next == _papi_hwi_cpu_head ) { /* 1 elements */ THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); _papi_hwi_cpu_head->next = entry; entry->next = ( CpuInfo_t * ) _papi_hwi_cpu_head; } else { /* 2+ elements */ THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); entry->next = _papi_hwi_cpu_head->next; _papi_hwi_cpu_head->next = entry; } _papi_hwi_cpu_head = entry; THRDBG( "_papi_hwi_cpu_head now cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); _papi_hwi_unlock( CPUS_LOCK ); }
static int _papi_hwi_thread_free_eventsets(long tid) { EventSetInfo_t *ESI; ThreadInfo_t *master; DynamicArray_t *map = &_papi_hwi_system_info.global_eventset_map; int i; master = _papi_hwi_lookup_thread( tid ); _papi_hwi_lock( INTERNAL_LOCK ); for( i = 0; i < map->totalSlots; i++ ) { ESI = map->dataSlotArray[i]; if ( ( ESI ) && (ESI->master!=NULL) ) { if ( ESI->master == master ) { THRDBG("Attempting to remove %d from tid %ld\n",ESI->EventSetIndex,tid); /* Code copied from _papi_hwi_remove_EventSet(ESI); */ /* We can't just call that, as it uses INTERNAL_LOCK too */ _papi_hwi_free_EventSet( ESI ); map->dataSlotArray[i] = NULL; map->availSlots++; map->fullSlots--; } } } _papi_hwi_unlock( INTERNAL_LOCK ); return PAPI_OK; }
int _papi_hwi_broadcast_signal( unsigned int mytid ) { int i, retval, didsomething = 0; volatile ThreadInfo_t *foo = NULL; _papi_hwi_lock( THREADS_LOCK ); for ( foo = _papi_hwi_thread_head; foo != NULL; foo = foo->next ) { /* xxxx Should this be hardcoded to index 0 or walk the list or what? */ for ( i = 0; i < papi_num_components; i++ ) { if ( ( foo->tid != mytid ) && ( foo->running_eventset[i] ) && ( foo->running_eventset[i]-> state & ( PAPI_OVERFLOWING | PAPI_MULTIPLEXING ) ) ) { /* xxxx mpx_info inside _papi_mdi_t _papi_hwi_system_info is commented out. See papi_internal.h for details. The multiplex_timer_sig value is now part of that structure */ THRDBG("Thread %ld sending signal %d to thread %ld\n",mytid,foo->tid, (foo->running_eventset[i]->state & PAPI_OVERFLOWING ? _papi_hwd[i]->cmp_info.hardware_intr_sig : _papi_hwd[i]->cmp_info.itimer_sig)); retval = (*_papi_hwi_thread_kill_fn)(foo->tid, (foo->running_eventset[i]->state & PAPI_OVERFLOWING ? _papi_hwd[i]->cmp_info.hardware_intr_sig : _papi_hwd[i]->cmp_info.itimer_sig)); if (retval != 0) return(PAPI_EMISC); } } if ( foo->next == _papi_hwi_thread_head ) break; } _papi_hwi_unlock( THREADS_LOCK ); return ( PAPI_OK ); }
static void free_cpu( CpuInfo_t **cpu ) { APIDBG( "Entry: *cpu: %p, cpu_num: %d, cpu_users: %d\n", *cpu, ( *cpu )->cpu_num, (*cpu)->num_users); int i,users,retval; _papi_hwi_lock( CPUS_LOCK ); (*cpu)->num_users--; users=(*cpu)->num_users; /* Remove from linked list if no users */ if (!users) remove_cpu( *cpu ); _papi_hwi_unlock( CPUS_LOCK ); /* Exit early if still users of this CPU */ if (users!=0) return; THRDBG( "Shutting down cpu %d at %p\n", (*cpu)->cpu_num, cpu ); for ( i = 0; i < papi_num_components; i++ ) { if (_papi_hwd[i]->cmp_info.disabled) continue; retval = _papi_hwd[i]->shutdown_thread( (*cpu)->context[i] ); if ( retval != PAPI_OK ) { // failure = retval; } } for ( i = 0; i < papi_num_components; i++ ) { if ( ( *cpu )->context[i] ) { papi_free( ( *cpu )->context[i] ); } } if ( ( *cpu )->context ) { papi_free( ( *cpu )->context ); } if ( ( *cpu )->running_eventset ) { papi_free( ( *cpu )->running_eventset ); } /* why do we clear this? */ memset( *cpu, 0x00, sizeof ( CpuInfo_t ) ); papi_free( *cpu ); *cpu = NULL; }
static int remove_thread( ThreadInfo_t * entry ) { ThreadInfo_t *tmp = NULL, *prev = NULL; _papi_hwi_lock( THREADS_LOCK ); THRDBG( "_papi_hwi_thread_head was thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); /* Find the preceding element and the matched element, short circuit if we've seen the head twice */ for ( tmp = ( ThreadInfo_t * ) _papi_hwi_thread_head; ( entry != tmp ) || ( prev == NULL ); tmp = tmp->next ) { prev = tmp; } if ( tmp != entry ) { THRDBG( "Thread %ld at %p was not found in the thread list!\n", entry->tid, entry ); return ( PAPI_EBUG ); } /* Only 1 element in list */ if ( prev == tmp ) { _papi_hwi_thread_head = NULL; tmp->next = NULL; THRDBG( "_papi_hwi_thread_head now NULL\n" ); } else { prev->next = tmp->next; /* If we're removing the head, better advance it! */ if ( _papi_hwi_thread_head == tmp ) { _papi_hwi_thread_head = tmp->next; THRDBG( "_papi_hwi_thread_head now thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); } THRDBG( "Removed thread %p from list\n", tmp ); } _papi_hwi_unlock( THREADS_LOCK ); #if defined(HAVE_THREAD_LOCAL_STORAGE) _papi_hwi_my_thread = NULL; THRDBG( "TLS for thread %ld is now %p\n", entry->tid, _papi_hwi_my_thread ); #endif return PAPI_OK; }
static int remove_cpu( CpuInfo_t * entry ) { THRDBG("Entry: entry: %p\n", entry); CpuInfo_t *tmp = NULL, *prev = NULL; _papi_hwi_lock( CPUS_LOCK ); THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); /* Find the preceding element and the matched element, short circuit if we've seen the head twice */ for ( tmp = ( CpuInfo_t * ) _papi_hwi_cpu_head; ( entry != tmp ) || ( prev == NULL ); tmp = tmp->next ) { prev = tmp; } if ( tmp != entry ) { THRDBG( "Cpu %d at %p was not found in the cpu list!\n", entry->cpu_num, entry ); return ( PAPI_EBUG ); } /* Only 1 element in list */ if ( prev == tmp ) { _papi_hwi_cpu_head = NULL; tmp->next = NULL; THRDBG( "_papi_hwi_cpu_head now NULL\n" ); } else { prev->next = tmp->next; /* If we're removing the head, better advance it! */ if ( _papi_hwi_cpu_head == tmp ) { _papi_hwi_cpu_head = tmp->next; THRDBG( "_papi_hwi_cpu_head now cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); } THRDBG( "Removed cpu %p from list\n", tmp ); } _papi_hwi_unlock( CPUS_LOCK ); return ( PAPI_OK ); }
/** @internal * This function is called to determine the state of the system. * We may as well set the HighLevelInfo so you don't have to look it * up again. */ int _internal_check_state( HighLevelInfo ** outgoing ) { int retval; HighLevelInfo *state = NULL; /* Only allow one thread at a time in here */ if ( init_level == PAPI_NOT_INITED ) { retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { return ( retval ); } else { _papi_hwi_lock( HIGHLEVEL_LOCK ); init_level = PAPI_HIGH_LEVEL_INITED; _papi_hwi_unlock( HIGHLEVEL_LOCK ); } } /* * Do we have the thread specific data setup yet? */ if ( ( retval = PAPI_get_thr_specific( PAPI_HIGH_LEVEL_TLS, ( void * ) &state ) ) != PAPI_OK || state == NULL ) { state = ( HighLevelInfo * ) papi_malloc( sizeof ( HighLevelInfo ) ); if ( state == NULL ) return ( PAPI_ENOMEM ); memset( state, 0, sizeof ( HighLevelInfo ) ); state->EventSet = -1; if ( ( retval = PAPI_create_eventset( &state->EventSet ) ) != PAPI_OK ) return ( retval ); if ( ( retval = PAPI_set_thr_specific( PAPI_HIGH_LEVEL_TLS, state ) ) != PAPI_OK ) return ( retval ); } *outgoing = state; return ( PAPI_OK ); }
static void insert_thread( ThreadInfo_t * entry, int tid ) { _papi_hwi_lock( THREADS_LOCK ); if ( _papi_hwi_thread_head == NULL ) { /* 0 elements */ THRDBG( "_papi_hwi_thread_head is NULL\n" ); entry->next = entry; } else if ( _papi_hwi_thread_head->next == _papi_hwi_thread_head ) { /* 1 elements */ THRDBG( "_papi_hwi_thread_head was thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); _papi_hwi_thread_head->next = entry; entry->next = ( ThreadInfo_t * ) _papi_hwi_thread_head; } else { /* 2+ elements */ THRDBG( "_papi_hwi_thread_head was thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); entry->next = _papi_hwi_thread_head->next; _papi_hwi_thread_head->next = entry; } _papi_hwi_thread_head = entry; THRDBG( "_papi_hwi_thread_head now thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); _papi_hwi_unlock( THREADS_LOCK ); #if defined(HAVE_THREAD_LOCAL_STORAGE) /* Don't set the current local thread if we are a fake attach thread */ if (tid==0) { _papi_hwi_my_thread = entry; THRDBG( "TLS for thread %ld is now %p\n", entry->tid, _papi_hwi_my_thread ); } #endif }
int _papi_hwi_gather_all_thrspec_data( int tag, PAPI_all_thr_spec_t * where ) { int didsomething = 0; ThreadInfo_t *foo = NULL; _papi_hwi_lock( THREADS_LOCK ); for ( foo = ( ThreadInfo_t * ) _papi_hwi_thread_head; foo != NULL; foo = foo->next ) { /* If we want thread ID's */ if ( where->id ) memcpy( &where->id[didsomething], &foo->tid, sizeof ( where->id[didsomething] ) ); /* If we want data pointers */ if ( where->data ) where->data[didsomething] = foo->thread_storage[tag]; didsomething++; if ( ( where->id ) || ( where->data ) ) { if ( didsomething >= where->num ) break; } if ( foo->next == _papi_hwi_thread_head ) break; } where->num = didsomething; _papi_hwi_unlock( THREADS_LOCK ); return ( PAPI_OK ); }
static void mpx_handler( int signal ) { int retval; MasterEvent *mev, *head; Threadlist *me = NULL; #ifdef REGENERATE int lastthread; #endif #ifdef MPX_DEBUG_OVERHEAD long long usec; int didwork = 0; usec = PAPI_get_real_usec( ); #endif #ifdef MPX_DEBUG_TIMER long long thiscall; #endif signal = signal; /* unused */ MPXDBG( "Handler in thread\n" ); /* This handler can be invoked either when a timer expires * or when another thread in this handler responding to the * timer signals other threads. We have to distinguish * these two cases so that we don't get infinite loop of * handler calls. To do that, we look at the value of * threads_responding. We assume that only one thread can * be active in this signal handler at a time, since the * invoking signal is blocked while the handler is active. * If threads_responding == 0, the current thread caught * the original timer signal. (This thread may not have * any active event lists itself, though.) This first * thread sends a signal to each of the other threads in * our list of threads that have master events lists. If * threads_responding != 0, then this thread was signaled * by another thread. We decrement that value and look * for an active events. threads_responding should * reach zero when all active threads have handled their * signal. It's probably possible for a thread to die * before it responds to a signal; if that happens, * threads_responding won't reach zero until the next * timer signal happens. Then the signalled thread won't * signal any other threads. If that happens only * occasionally, there should be no harm. Likewise if * a new thread is added that fails to get signalled. * As for locking, we have to lock this list to prevent * another thread from modifying it, but if *this* thread * is trying to update the list (from another function) and * is signaled while it holds the lock, we will have deadlock. * Therefore, noninterrupt functions that update *this* list * must disable the signal that invokes this handler. */ #ifdef PTHREADS _papi_hwi_lock( MULTIPLEX_LOCK ); if ( threads_responding == 0 ) { /* this thread caught the timer sig */ /* Signal the other threads with event lists */ #ifdef MPX_DEBUG_TIMER thiscall = _papi_hwd_get_real_usec( ); MPXDBG( "last signal was %lld usec ago\n", thiscall - lastcall ); lastcall = thiscall; #endif MPXDBG( "%#x caught it, tlist is %p\n", self, tlist ); for ( t = tlist; t != NULL; t = t->next ) { if ( pthread_equal( t->thr, self ) == 0 ) { ++threads_responding; retval = pthread_kill( t->thr, _papi_os_info.itimer_sig ); assert( retval == 0 ); #ifdef MPX_DEBUG_SIGNALS MPXDBG( "%#x signaling %#x\n", self, t->thr ); #endif } } } else { #ifdef MPX_DEBUG_SIGNALS MPXDBG( "%#x was tapped, tr = %d\n", self, threads_responding ); #endif --threads_responding; } #ifdef REGENERATE lastthread = ( threads_responding == 0 ); #endif _papi_hwi_unlock( MULTIPLEX_LOCK ); #endif /* See if this thread has an active event list */ head = get_my_threads_master_event_list( ); if ( head != NULL ) { /* Get the thread header for this master event set. It's * always in the first record of the set (and maybe in others) * if any record in the set is active. */ me = head->mythr; /* Find the event that's currently active, stop and read * it, then start the next event in the list. * No need to lock the list because other functions * disable the timer interrupt before they update the list. */ if ( me != NULL && me->cur_event != NULL ) { long long counts[2]; MasterEvent *cur_event = me->cur_event; long long cycles = 0, total_cycles = 0; retval = PAPI_stop( cur_event->papi_event, counts ); MPXDBG( "retval=%d, cur_event=%p, I'm tid=%lx\n", retval, cur_event, me->tid ); if ( retval == PAPI_OK ) { MPXDBG( "counts[0] = %lld counts[1] = %lld\n", counts[0], counts[1] ); cur_event->count += counts[0]; cycles = ( cur_event->pi.event_type == SCALE_EVENT ) ? counts[0] : counts[1]; me->total_c += cycles; total_cycles = me->total_c - cur_event->prev_total_c; cur_event->prev_total_c = me->total_c; /* If it's a rate, count occurrences & average later */ if ( !cur_event->is_a_rate ) { cur_event->cycles += cycles; if ( cycles >= MPX_MINCYC ) { /* Only update current rate on a decent slice */ cur_event->rate_estimate = ( double ) counts[0] / ( double ) cycles; } cur_event->count_estimate += ( long long ) ( ( double ) total_cycles * cur_event->rate_estimate ); MPXDBG("New estimate = %lld (%lld cycles * %lf rate)\n", cur_event->count_estimate,total_cycles, cur_event->rate_estimate); } else { /* Make sure we ran long enough to get a useful measurement (otherwise * potentially inaccurate rate measurements get averaged in with * the same weight as longer, more accurate ones.) */ if ( cycles >= MPX_MINCYC ) { cur_event->cycles += 1; } else { cur_event->count -= counts[0]; } } } else { MPXDBG( "%lx retval = %d, skipping\n", me->tid, retval ); MPXDBG( "%lx value = %lld cycles = %lld\n\n", me->tid, cur_event->count, cur_event->cycles ); } MPXDBG ( "tid(%lx): value = %lld (%lld) cycles = %lld (%lld) rate = %lf\n\n", me->tid, cur_event->count, cur_event->count_estimate, cur_event->cycles, total_cycles, cur_event->rate_estimate ); /* Start running the next event; look for the * next one in the list that's marked active. * It's possible that this event is the only * one active; if so, we should restart it, * but only after considerating all the other * possible events. */ if ( ( retval != PAPI_OK ) || ( ( retval == PAPI_OK ) && ( cycles >= MPX_MINCYC ) ) ) { for ( mev = ( cur_event->next == NULL ) ? head : cur_event->next; mev != cur_event; mev = ( mev->next == NULL ) ? head : mev->next ) { /* Found the next one to start */ if ( mev->active ) { me->cur_event = mev; break; } } } if ( me->cur_event->active ) { retval = PAPI_start( me->cur_event->papi_event ); } #ifdef MPX_DEBUG_OVERHEAD didwork = 1; #endif } } #ifdef ANY_THREAD_GETS_SIGNAL else { Threadlist *t; for ( t = tlist; t != NULL; t = t->next ) { if ( ( t->tid == _papi_hwi_thread_id_fn( ) ) || ( t->head == NULL ) ) continue; MPXDBG( "forwarding signal to thread %lx\n", t->tid ); retval = ( *_papi_hwi_thread_kill_fn ) ( t->tid, _papi_os_info.itimer_sig ); if ( retval != 0 ) { MPXDBG( "forwarding signal to thread %lx returned %d\n", t->tid, retval ); } } } #endif #ifdef REGENERATE /* Regenerating the signal each time through has the * disadvantage that if any thread ever drops a signal, * the whole time slicing system will stop. Using * an automatically regenerated signal may have the * disadvantage that a new signal can arrive very * soon after all the threads have finished handling * the last one, so the interval may be too small for * accurate data collection. However, using the * MIN_CYCLES check above should alleviate this. */ /* Reset the timer once all threads have responded */ if ( lastthread ) { retval = setitimer( _papi_os_info.itimer_num, &itime, NULL ); assert( retval == 0 ); #ifdef MPX_DEBUG_TIMER MPXDBG( "timer restarted by %lx\n", me->tid ); #endif } #endif #ifdef MPX_DEBUG_OVERHEAD usec = _papi_hwd_get_real_usec( ) - usec; MPXDBG( "handler %#x did %swork in %lld usec\n", self, ( didwork ? "" : "no " ), usec ); #endif }
int mpx_add_event( MPX_EventSet ** mpx_events, int EventCode, int domain, int granularity ) { MPX_EventSet *newset = *mpx_events; int retval, alloced_newset = 0; Threadlist *t; /* Get the global list of threads */ MPXDBG("Adding %p %#x\n",newset,EventCode); _papi_hwi_lock( MULTIPLEX_LOCK ); t = tlist; /* If there are no threads in the list at all, then allocate the new Threadlist */ if ( t == NULL ) { new_thread: t = ( Threadlist * ) papi_malloc( sizeof ( Threadlist ) ); if ( t == NULL ) { _papi_hwi_unlock( MULTIPLEX_LOCK ); return ( PAPI_ENOMEM ); } /* If we're actually threaded, fill the * field with the thread_id otherwise * use getpid() as a placeholder. */ if ( _papi_hwi_thread_id_fn ) { MPXDBG( "New thread at %p\n", t ); t->tid = _papi_hwi_thread_id_fn( ); } else { MPXDBG( "New process at %p\n", t ); t->tid = ( unsigned long ) getpid( ); } /* Fill in the fields */ t->head = NULL; t->cur_event = NULL; t->next = tlist; tlist = t; MPXDBG( "New head is at %p(%lu).\n", tlist, ( long unsigned ) tlist->tid ); /* alloced_thread = 1; */ } else if ( _papi_hwi_thread_id_fn ) { /* If we are threaded, AND there exists threads in the list, * then try to find our thread in the list. */ unsigned long tid = _papi_hwi_thread_id_fn( ); while ( t ) { if ( t->tid == tid ) { MPXDBG( "Found thread %#lx\n", t->tid ); break; } t = t->next; } /* Our thread is not in the list, so make a new * thread entry. */ if ( t == NULL ) { MPXDBG( "New thread %lx\n", tid ); goto new_thread; } } /* Now t & tlist points to our thread, also at the head of the list */ /* Allocate a the MPX_EventSet if necessary */ if ( newset == NULL ) { newset = mpx_malloc( t ); if ( newset == NULL ) { _papi_hwi_unlock( MULTIPLEX_LOCK ); return ( PAPI_ENOMEM ); } alloced_newset = 1; } /* Now we're finished playing with the thread list */ _papi_hwi_unlock( MULTIPLEX_LOCK ); /* Removed newset->num_events++, moved to mpx_insert_events() */ mpx_hold( ); /* Create PAPI events (if they don't already exist) and link * the new event set to them, add them to the master list for the thread, reset master event list for this thread */ retval = mpx_insert_events( newset, &EventCode, 1, domain, granularity ); if ( retval != PAPI_OK ) { if ( alloced_newset ) { papi_free( newset ); newset = NULL; } } mpx_release( ); /* Output the new or existing EventSet */ *mpx_events = newset; return retval; }