static void tMPI_Init_initers(void) { int state; /* we can pre-check because it's atomic */ if (tMPI_Atomic_get(&init_inited) == 0) { /* this can be a spinlock because the chances of collision are low. */ tMPI_Spinlock_lock( &init_init ); state=tMPI_Atomic_get(&init_inited); tMPI_Atomic_memory_barrier_acq(); if (state == 0) { InitializeCriticalSection(&mutex_init); InitializeCriticalSection(&once_init); InitializeCriticalSection(&cond_init); InitializeCriticalSection(&barrier_init); tMPI_Atomic_memory_barrier_rel(); tMPI_Atomic_set(&init_inited, 1); } tMPI_Spinlock_unlock( &init_init ); } }
void tMPI_Spinlock_wait(tMPI_Spinlock_t *x) { tMPI_Spinlock_init_once(x); tMPI_Spinlock_lock(x); /* Got the lock now, so the waiting is over */ tMPI_Spinlock_unlock(x); }
int tMPI_Thread_create_aff(tMPI_Thread_t *thread, void *(*start_routine)(void *), void *arg) { int ret; /* set the calling thread's affinity mask */ if (tMPI_Atomic_get(&main_thread_aff_set) == 0) { #ifdef HAVE_PTHREAD_SETAFFINITY cpu_set_t set; #endif /* this can be a spinlock because the chances of collision are low. */ tMPI_Spinlock_lock( &main_thread_aff_lock ); tMPI_Atomic_set( &aff_thread_number, 0); #ifdef HAVE_PTHREAD_SETAFFINITY CPU_ZERO(&set); CPU_SET(0, &set); pthread_setaffinity_np(pthread_self(), sizeof(set), &set); /*fprintf(stderr, "Setting affinity.\n");*/ #endif tMPI_Atomic_set( &main_thread_aff_set, 1); tMPI_Spinlock_unlock( &main_thread_aff_lock ); } if(thread==NULL) { tMPI_Fatal_error(TMPI_FARGS,"Invalid thread pointer."); return EINVAL; } *thread=(struct tMPI_Thread*)malloc(sizeof(struct tMPI_Thread)*1); ret=pthread_create(&((*thread)->th),NULL,start_routine,arg); if(ret!=0) { /* Cannot use tMPI_error() since messages use threads for locking */ tMPI_Fatal_error(TMPI_FARGS,"Failed to create POSIX thread, rc=%d",ret); /* Use system memory allocation routines */ return -1; } else { #ifdef HAVE_PTHREAD_SETAFFINITY int n; cpu_set_t set; n=tMPI_Atomic_add_return(&aff_thread_number, 1); CPU_ZERO(&set); CPU_SET(n, &set); return pthread_setaffinity_np((*thread)->th, sizeof(set), &set); #else return 0; #endif } }
static int tMPI_Init_initers(void) { int state; int ret = 0; /* we can pre-check because it's atomic */ if (tMPI_Atomic_get(&init_inited) == 0) { /* this can be a spinlock because the chances of collision are low. */ tMPI_Spinlock_lock( &init_init ); state = tMPI_Atomic_get(&init_inited); tMPI_Atomic_memory_barrier_acq(); if (state == 0) { InitializeCriticalSection(&mutex_init); InitializeCriticalSection(&once_init); InitializeCriticalSection(&cond_init); InitializeCriticalSection(&barrier_init); InitializeCriticalSection(&thread_id_list_lock); ret = tMPI_Init_NUMA(); if (ret != 0) { goto err; } ret = tMPI_Thread_id_list_init(); if (ret != 0) { goto err; } tMPI_Atomic_memory_barrier_rel(); tMPI_Atomic_set(&init_inited, 1); } tMPI_Spinlock_unlock( &init_init ); } return ret; err: tMPI_Spinlock_unlock( &init_init ); return ret; }
int tMPI_Type_contiguous(int count, tMPI_Datatype oldtype, tMPI_Datatype *newtype) { struct tmpi_datatype_ *ntp; #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Type_contiguous(%d, %p, %p)", count, oldtype, newtype); #endif ntp = (struct tmpi_datatype_*)tMPI_Malloc(sizeof(struct tmpi_datatype_)); ntp->size = count*oldtype->size; ntp->op_functions = NULL; /* establish components */ ntp->N_comp = 1; ntp->comps = (struct tmpi_datatype_component*)tMPI_Malloc( sizeof(struct tmpi_datatype_component)*1); ntp->comps[0].type = oldtype; ntp->comps[0].count = 1; ntp->committed = FALSE; /* now add it to the list. */ tMPI_Spinlock_lock(&(tmpi_global->datatype_lock)); /* check whether there's space */ if (tmpi_global->N_usertypes + 1 >= tmpi_global->Nalloc_usertypes) { /* make space */ tmpi_global->Nalloc_usertypes = Nthreads*(tmpi_global->N_usertypes) + 1; tmpi_global->usertypes = (struct tmpi_datatype_**) tMPI_Realloc(tmpi_global->usertypes, (sizeof(struct tmpi_datatype_ *)* tmpi_global->Nalloc_usertypes) ); } /* add to the list */ tmpi_global->usertypes[tmpi_global->N_usertypes] = ntp; tmpi_global->N_usertypes++; *newtype = ntp; tMPI_Spinlock_unlock(&(tmpi_global->datatype_lock)); return TMPI_SUCCESS; }
/* Set the main thread's affinity */ static int tMPI_Set_main_thread_affinity(void) { /* calling thread PROCESSOR_NUMBER */ PROCESSOR_NUMBER CurrentProcessorNumber; /* calling thread GROUP_AFFINITY */ GROUP_AFFINITY CurrentThreadGroupAffinity; /* calling thread NUMA node */ USHORT CurrentNumaNodeNumber; /* we can pre-check because it's atomic */ if (tMPI_Atomic_get(&main_thread_aff_set) == 0) { /* this can be a spinlock because the chances of collision are low. */ tMPI_Spinlock_lock( &main_thread_aff_lock ); if( g_ulHighestNumaNodeNumber != 0 ) { func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber); /* set the NUMA node affinity for the current thread failures to set the current thread affinity are ignored, as a fringe case can arise on >32 processor systems with a 32bit build/code. */ func_SetThreadIdealProcessorEx(GetCurrentThread(), &CurrentProcessorNumber, NULL); if(func_GetNumaProcessorNodeEx(&CurrentProcessorNumber, &CurrentNumaNodeNumber)) { /* for the NUMA node number associated with the current processor number, get the group affinity mask */ if(func_GetNumaNodeProcessorMaskEx(CurrentNumaNodeNumber, &CurrentThreadGroupAffinity)) { /* set the current thread affinity to prevent it from running on other NUMA nodes */ func_SetThreadGroupAffinity(GetCurrentThread(), &CurrentThreadGroupAffinity, NULL); } } } else { /* No NUMA. For now, we just do a similar thing. */ if ( (func_GetCurrentProcessorNumberEx != NULL) && (func_SetThreadIdealProcessorEx)) { func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber); func_SetThreadIdealProcessorEx(GetCurrentThread(), &CurrentProcessorNumber, NULL); } } tMPI_Atomic_set( &main_thread_aff_set, 1); tMPI_Spinlock_unlock( &main_thread_aff_lock ); } return 0; }
int tMPI_Type_commit(tMPI_Datatype *datatype) { int i, j; struct tmpi_datatype_ *dt = *datatype; #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Type_commit(%p)", datatype); #endif if (dt->committed) { return TMPI_SUCCESS; } /* search the list for a matching committed type, because if there's already a committed type that has the same composition, we just make the datatype pointer point to it, ensuring we share datatype information across threads. */ tMPI_Spinlock_lock(&(tmpi_global->datatype_lock)); for (i = 0; i < tmpi_global->N_usertypes; i++) { struct tmpi_datatype_ *lt = tmpi_global->usertypes[i]; if (lt->committed && lt->N_comp == dt->N_comp) { tmpi_bool found = TRUE; for (j = 0; j < lt->N_comp; j++) { if ( (lt->comps[j].type != dt->comps[j].type) || (lt->comps[j].count != dt->comps[j].count) ) { found = FALSE; break; } } if (found) { dt = lt; } } } if (dt != *datatype) { tmpi_bool found = FALSE; /* we remove the old one from the list */ for (i = 0; i < tmpi_global->N_usertypes; i++) { if (tmpi_global->usertypes[i] == *datatype) { found = TRUE; break; } } if (found) { /* we put the last one in the list in our slot */ tmpi_global->usertypes[i] = tmpi_global-> usertypes[tmpi_global->N_usertypes-1]; tmpi_global->N_usertypes--; } free( (*datatype)->comps); free( *datatype ); /* and overwrite the pointer with the new data type */ *datatype = dt; } else { /* it was the first one of its type */ dt->committed = TRUE; } tMPI_Spinlock_unlock(&(tmpi_global->datatype_lock)); return TMPI_SUCCESS; }