Пример #1
0
static void tMPI_Init_initers(void)
{
    int state;
    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&init_inited) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &init_init );

        state=tMPI_Atomic_get(&init_inited);
        tMPI_Atomic_memory_barrier_acq();
        if (state == 0)
        {
            InitializeCriticalSection(&mutex_init);
            InitializeCriticalSection(&once_init);
            InitializeCriticalSection(&cond_init);
            InitializeCriticalSection(&barrier_init);

            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_set(&init_inited, 1);
        }

        tMPI_Spinlock_unlock( &init_init );
    }
}
Пример #2
0
void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
{
    tMPI_Spinlock_init_once(x);

    tMPI_Spinlock_lock(x);
    /* Got the lock now, so the waiting is over */
    tMPI_Spinlock_unlock(x);
}
Пример #3
0
static int tMPI_Init_initers(void)
{
    int state;
    int ret = 0;

    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&init_inited) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &init_init );

        state = tMPI_Atomic_get(&init_inited);
        tMPI_Atomic_memory_barrier_acq();
        if (state == 0)
        {
            InitializeCriticalSection(&mutex_init);
            InitializeCriticalSection(&once_init);
            InitializeCriticalSection(&cond_init);
            InitializeCriticalSection(&barrier_init);
            InitializeCriticalSection(&thread_id_list_lock);

            ret = tMPI_Init_NUMA();
            if (ret != 0)
            {
                goto err;
            }


            ret = tMPI_Thread_id_list_init();
            if (ret != 0)
            {
                goto err;
            }

            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_set(&init_inited, 1);
        }

        tMPI_Spinlock_unlock( &init_init );
    }
    return ret;
err:
    tMPI_Spinlock_unlock( &init_init );
    return ret;
}
Пример #4
0
int tMPI_Thread_create_aff(tMPI_Thread_t *thread, 
                           void *(*start_routine)(void *),
                           void *arg)
{
    int ret;

    /* set the calling thread's affinity mask */
    if (tMPI_Atomic_get(&main_thread_aff_set) == 0)
    {
#ifdef HAVE_PTHREAD_SETAFFINITY
        cpu_set_t set;
#endif
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &main_thread_aff_lock );
        tMPI_Atomic_set( &aff_thread_number, 0);
#ifdef HAVE_PTHREAD_SETAFFINITY
        CPU_ZERO(&set);
        CPU_SET(0, &set);
        pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
        /*fprintf(stderr, "Setting affinity.\n");*/
#endif
        tMPI_Atomic_set( &main_thread_aff_set, 1);
        tMPI_Spinlock_unlock( &main_thread_aff_lock );
    }


    if(thread==NULL)
    {
        tMPI_Fatal_error(TMPI_FARGS,"Invalid thread pointer.");
        return EINVAL;
    }

    *thread=(struct tMPI_Thread*)malloc(sizeof(struct tMPI_Thread)*1);
    ret=pthread_create(&((*thread)->th),NULL,start_routine,arg);

    if(ret!=0)
    {
        /* Cannot use tMPI_error() since messages use threads for locking */
        tMPI_Fatal_error(TMPI_FARGS,"Failed to create POSIX thread, rc=%d",ret);
        /* Use system memory allocation routines */
        return -1;
    }
    else
    {
#ifdef HAVE_PTHREAD_SETAFFINITY
        int n;
        cpu_set_t set;

        n=tMPI_Atomic_add_return(&aff_thread_number, 1);
        CPU_ZERO(&set);
        CPU_SET(n, &set);
        return pthread_setaffinity_np((*thread)->th, sizeof(set), &set);
#else
        return 0;
#endif
    }
}
Пример #5
0
int tMPI_Type_contiguous(int count, tMPI_Datatype oldtype,
                         tMPI_Datatype *newtype)
{
    struct tmpi_datatype_ *ntp;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Type_contiguous(%d, %p, %p)", count, oldtype,
                     newtype);
#endif
    ntp               = (struct tmpi_datatype_*)tMPI_Malloc(sizeof(struct tmpi_datatype_));
    ntp->size         = count*oldtype->size;
    ntp->op_functions = NULL;

    /* establish components */
    ntp->N_comp = 1;
    ntp->comps  = (struct tmpi_datatype_component*)tMPI_Malloc(
                sizeof(struct tmpi_datatype_component)*1);
    ntp->comps[0].type  = oldtype;
    ntp->comps[0].count = 1;
    ntp->committed      = FALSE;

    /* now add it to the list.  */
    tMPI_Spinlock_lock(&(tmpi_global->datatype_lock));
    /* check whether there's space */
    if (tmpi_global->N_usertypes + 1 >= tmpi_global->Nalloc_usertypes)
    {
        /* make space */
        tmpi_global->Nalloc_usertypes = Nthreads*(tmpi_global->N_usertypes) + 1;
        tmpi_global->usertypes        = (struct tmpi_datatype_**)
            tMPI_Realloc(tmpi_global->usertypes,
                         (sizeof(struct tmpi_datatype_ *)*
                          tmpi_global->Nalloc_usertypes)
                         );

    }
    /* add to the list */
    tmpi_global->usertypes[tmpi_global->N_usertypes] = ntp;
    tmpi_global->N_usertypes++;
    *newtype = ntp;
    tMPI_Spinlock_unlock(&(tmpi_global->datatype_lock));

    return TMPI_SUCCESS;
}
Пример #6
0
/* Set the main thread's affinity */
static int tMPI_Set_main_thread_affinity(void)
{
    /* calling thread PROCESSOR_NUMBER */
    PROCESSOR_NUMBER CurrentProcessorNumber;      
    /* calling thread GROUP_AFFINITY */
    GROUP_AFFINITY CurrentThreadGroupAffinity; 
    /* calling thread NUMA node */
    USHORT CurrentNumaNodeNumber;


    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&main_thread_aff_set) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &main_thread_aff_lock );
        if( g_ulHighestNumaNodeNumber != 0 )
        {
            func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber);


            /* set the NUMA node affinity for the current thread
               failures to set the current thread affinity are ignored, 
               as a fringe case can arise on >32 processor systems with a 32bit 
               build/code.
               */
            func_SetThreadIdealProcessorEx(GetCurrentThread(), 
                                           &CurrentProcessorNumber, 
                                           NULL);

            if(func_GetNumaProcessorNodeEx(&CurrentProcessorNumber, 
                                           &CurrentNumaNodeNumber))
            {
                /* for the NUMA node number associated with the current processor 
                   number, get the group affinity mask */
                if(func_GetNumaNodeProcessorMaskEx(CurrentNumaNodeNumber, 
                                                   &CurrentThreadGroupAffinity))
                {
                    /* set the current thread affinity to prevent it from running on 
                       other NUMA nodes */
                    func_SetThreadGroupAffinity(GetCurrentThread(), 
                                                &CurrentThreadGroupAffinity, 
                                                NULL);
                }
            }
        }
        else
        {
            /* No NUMA. For now, we just do a similar thing. */
            if ( (func_GetCurrentProcessorNumberEx != NULL)  &&
                 (func_SetThreadIdealProcessorEx))
            {
                func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber);
                func_SetThreadIdealProcessorEx(GetCurrentThread(), 
                                               &CurrentProcessorNumber, 
                                               NULL);
            }
        }
        tMPI_Atomic_set( &main_thread_aff_set, 1);
        tMPI_Spinlock_unlock( &main_thread_aff_lock );
    }
    return 0;
}
Пример #7
0
int tMPI_Type_commit(tMPI_Datatype *datatype)
{
    int                    i, j;
    struct tmpi_datatype_ *dt = *datatype;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Type_commit(%p)", datatype);
#endif
    if (dt->committed)
    {
        return TMPI_SUCCESS;
    }

    /* search the list for a matching committed type, because if there's
       already a committed type that has the same composition, we just
       make the datatype pointer point to it, ensuring we share datatype
       information across threads. */
    tMPI_Spinlock_lock(&(tmpi_global->datatype_lock));
    for (i = 0; i < tmpi_global->N_usertypes; i++)
    {
        struct tmpi_datatype_ *lt = tmpi_global->usertypes[i];
        if (lt->committed && lt->N_comp == dt->N_comp)
        {
            tmpi_bool found = TRUE;
            for (j = 0; j < lt->N_comp; j++)
            {
                if ( (lt->comps[j].type  != dt->comps[j].type) ||
                     (lt->comps[j].count != dt->comps[j].count) )
                {
                    found = FALSE;
                    break;
                }
            }
            if (found)
            {
                dt = lt;
            }
        }
    }
    if (dt != *datatype)
    {
        tmpi_bool found = FALSE;
        /* we remove the old one from the list */
        for (i = 0; i < tmpi_global->N_usertypes; i++)
        {
            if (tmpi_global->usertypes[i] == *datatype)
            {
                found = TRUE;
                break;
            }
        }
        if (found)
        {
            /* we put the last one in the list in our slot */
            tmpi_global->usertypes[i] = tmpi_global->
                    usertypes[tmpi_global->N_usertypes-1];
            tmpi_global->N_usertypes--;
        }
        free( (*datatype)->comps);
        free(  *datatype );

        /* and overwrite the pointer with the new data type */
        *datatype = dt;
    }
    else
    {
        /* it was the first one of its type */
        dt->committed = TRUE;
    }
    tMPI_Spinlock_unlock(&(tmpi_global->datatype_lock));
    return TMPI_SUCCESS;
}