コード例 #1
0
ファイル: winthreads.c プロジェクト: chenleo/gromacs453pf
static void tMPI_Init_initers(void)
{
    int state;
    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&init_inited) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &init_init );

        state=tMPI_Atomic_get(&init_inited);
        tMPI_Atomic_memory_barrier_acq();
        if (state == 0)
        {
            InitializeCriticalSection(&mutex_init);
            InitializeCriticalSection(&once_init);
            InitializeCriticalSection(&cond_init);
            InitializeCriticalSection(&barrier_init);

            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_set(&init_inited, 1);
        }

        tMPI_Spinlock_unlock( &init_init );
    }
}
コード例 #2
0
ファイル: once.c プロジェクト: TTarenzi/MMCG-HAdResS
/* once */
int tMPI_Once(tMPI_Comm comm, void (*function)(void*), void *param, 
                int *was_first)
{
    int myrank;
    int ret=TMPI_SUCCESS;
    struct coll_sync *csync;
    struct coll_env *cev;
    int syncs;


    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());

    /* we increase our counter, and determine which coll_env we get */
    csync=&(comm->csync[myrank]);
    csync->syncs++;
    cev=&(comm->cev[csync->syncs % N_COLL_ENV]);

    /* now do a compare-and-swap on the current_syncc */
    syncs=tMPI_Atomic_get( &(cev->coll.current_sync));
    if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number. 
                                         If it is a later number, we can't 
                                         have been the first to arrive here. */
        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
    {
        /* we're the first! */
        function(param);
        if (was_first)
            *was_first=TRUE;
    }
    return ret;
}
コード例 #3
0
ファイル: winthreads.c プロジェクト: chenleo/gromacs453pf
int tMPI_Thread_once(tMPI_Thread_once_t *once_control, 
                     void (*init_routine)(void))
{
#if 0
    /* use once Vista is minimum required version */
    BOOL bStatus;
    bStatus = InitOnceExecuteOnce(once_control, InitHandleWrapperFunction, 
                                  init_routine, NULL);

    if (!bStatus)
    {
        tMPI_Fatal_error(TMPI_FARGS,"Failed to run thread_once routine");
        return -1;
    }
#else
    /* really ugly hack - and it's slow... */
    tMPI_Init_initers();
    EnterCriticalSection(&once_init);
    if (tMPI_Atomic_get(&(once_control->once)) == 0)
    {
        (*init_routine)();
        tMPI_Atomic_set(&(once_control->once), 1);
    }
    LeaveCriticalSection(&once_init);
#endif
    return 0;
}
コード例 #4
0
ファイル: winthreads.c プロジェクト: chenleo/gromacs453pf
int tMPI_Thread_cond_broadcast(tMPI_Thread_cond_t *cond)
{
    /* check whether the condition is initialized */
    if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
    {
        tMPI_Thread_cond_init_once(cond);
    }
    /* The condition variable is now guaranteed to be valid. */
#if 0
    /* use this code once Vista is the minimum version required */
    WakeAllConditionVariable( &(cond->cv) );
#else
    EnterCriticalSection(&(cond->condp->wtr_lock));
    /* check whether there are any waiters */
    if (cond->condp->Nwaiters > 0)
    {
        cond->condp->Nrelease=cond->condp->Nwaiters;
        cond->condp->cycle++;
        if (!SetEvent(cond->condp->ev)) /* actually release the 
                                           waiting threads */
        {
            tMPI_Fatal_error(TMPI_FARGS,"Failed SetEvent, error code=%d",
                             GetLastError());
            return -1;
        }
    }
    LeaveCriticalSection(&(cond->condp->wtr_lock));
#endif
    return 0;
}
コード例 #5
0
ファイル: winthreads.c プロジェクト: yupinov/gromacs
static int tMPI_Init_initers(void)
{
    int state;
    int ret = 0;

    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&init_inited) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &init_init );

        state = tMPI_Atomic_get(&init_inited);
        tMPI_Atomic_memory_barrier_acq();
        if (state == 0)
        {
            InitializeCriticalSection(&mutex_init);
            InitializeCriticalSection(&once_init);
            InitializeCriticalSection(&cond_init);
            InitializeCriticalSection(&barrier_init);
            InitializeCriticalSection(&thread_id_list_lock);

            ret = tMPI_Init_NUMA();
            if (ret != 0)
            {
                goto err;
            }


            ret = tMPI_Thread_id_list_init();
            if (ret != 0)
            {
                goto err;
            }

            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_set(&init_inited, 1);
        }

        tMPI_Spinlock_unlock( &init_init );
    }
    return ret;
err:
    tMPI_Spinlock_unlock( &init_init );
    return ret;
}
コード例 #6
0
int tMPI_Thread_create_aff(tMPI_Thread_t *thread, 
                           void *(*start_routine)(void *),
                           void *arg)
{
    int ret;

    /* set the calling thread's affinity mask */
    if (tMPI_Atomic_get(&main_thread_aff_set) == 0)
    {
#ifdef HAVE_PTHREAD_SETAFFINITY
        cpu_set_t set;
#endif
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &main_thread_aff_lock );
        tMPI_Atomic_set( &aff_thread_number, 0);
#ifdef HAVE_PTHREAD_SETAFFINITY
        CPU_ZERO(&set);
        CPU_SET(0, &set);
        pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
        /*fprintf(stderr, "Setting affinity.\n");*/
#endif
        tMPI_Atomic_set( &main_thread_aff_set, 1);
        tMPI_Spinlock_unlock( &main_thread_aff_lock );
    }


    if(thread==NULL)
    {
        tMPI_Fatal_error(TMPI_FARGS,"Invalid thread pointer.");
        return EINVAL;
    }

    *thread=(struct tMPI_Thread*)malloc(sizeof(struct tMPI_Thread)*1);
    ret=pthread_create(&((*thread)->th),NULL,start_routine,arg);

    if(ret!=0)
    {
        /* Cannot use tMPI_error() since messages use threads for locking */
        tMPI_Fatal_error(TMPI_FARGS,"Failed to create POSIX thread, rc=%d",ret);
        /* Use system memory allocation routines */
        return -1;
    }
    else
    {
#ifdef HAVE_PTHREAD_SETAFFINITY
        int n;
        cpu_set_t set;

        n=tMPI_Atomic_add_return(&aff_thread_number, 1);
        CPU_ZERO(&set);
        CPU_SET(n, &set);
        return pthread_setaffinity_np((*thread)->th, sizeof(set), &set);
#else
        return 0;
#endif
    }
}
コード例 #7
0
ファイル: winthreads.c プロジェクト: chenleo/gromacs453pf
int tMPI_Thread_mutex_lock(tMPI_Thread_mutex_t *mtx)
{
    /* check whether the mutex is initialized */
    if (tMPI_Atomic_get( &(mtx->initialized)  ) == 0)
    {
        tMPI_Thread_mutex_init_once(mtx);
    }

    /* The mutex is now guaranteed to be valid. */
    EnterCriticalSection( &(mtx->mutex->cs) );

    return 0;
}
コード例 #8
0
ファイル: pthreads.c プロジェクト: alexholehouse/gromacs
int tMPI_Thread_cond_broadcast(tMPI_Thread_cond_t *cond)
{
    int ret;

    /* check whether the condition is initialized */
    if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
    {
        tMPI_Thread_cond_init_once(cond);
    }
   
    ret = pthread_cond_broadcast( &(cond->condp->cond) );
    
    return ret;
}
コード例 #9
0
ファイル: pthreads.c プロジェクト: alexholehouse/gromacs
int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
{
    int ret;

    /* check whether the condition is initialized */
    if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
    {
        tMPI_Thread_cond_init_once(cond);
    }
    /* the mutex must have been initialized because it should be locked here */
   
    ret = pthread_cond_wait( &(cond->condp->cond), &(mtx->mutex->mtx) );
    
    return ret;
}
コード例 #10
0
ファイル: pthreads.c プロジェクト: alexholehouse/gromacs
int tMPI_Thread_mutex_unlock(tMPI_Thread_mutex_t *mtx)
{
    int ret;
 
    /* check whether the mutex is initialized */
    if (tMPI_Atomic_get( &(mtx->initialized)  ) == 0)
    {
        ret=tMPI_Thread_mutex_init_once(mtx);
        if (ret)
            return ret;
    }
 
    ret = pthread_mutex_unlock(&(mtx->mutex->mtx));
    
    return ret;
}
コード例 #11
0
ファイル: pthreads.c プロジェクト: alexholehouse/gromacs
int tMPI_Thread_barrier_wait(tMPI_Thread_barrier_t *   barrier)
{
    int    cycle;
    int    rc;
    
    /* check whether the barrier is initialized */
    if (tMPI_Atomic_get( &(barrier->initialized)  ) == 0)
    {
        tMPI_Thread_barrier_init_once(barrier);
    }


    rc = pthread_mutex_lock(&barrier->barrierp->mutex);

    
    if(rc != 0)
        return EBUSY;

    cycle = barrier->cycle;
    
    /* Decrement the count atomically and check if it is zero.
        * This will only be true for the last thread calling us.
        */
    if( --barrier->count <= 0 )
    { 
        barrier->cycle = !barrier->cycle;
        barrier->count = barrier->threshold;
        rc = pthread_cond_broadcast(&barrier->barrierp->cv);
        
        if(rc == 0)
            rc = -1;
    }
    else
    {
        while(cycle == barrier->cycle)
        {
            rc = pthread_cond_wait(&barrier->barrierp->cv,
                                   &barrier->barrierp->mutex);
            if(rc != 0) break;
        }
    }
    
    pthread_mutex_unlock(&barrier->barrierp->mutex);
    return rc;
}
コード例 #12
0
ファイル: collective.c プロジェクト: TTarenzi/MMCG-HAdResS
void tMPI_Wait_for_others(struct coll_env *cev, int myrank)
{
#if defined(TMPI_PROFILE) 
    struct tmpi_thread *cur=tMPI_Get_current();
    tMPI_Profile_wait_start(cur);
#endif

#ifdef USE_COLLECTIVE_COPY_BUFFER
    if (! (cev->met[myrank].using_cb) )
#endif
    {
        /* wait until everybody else is done copying the buffer */
        tMPI_Event_wait( &(cev->met[myrank].send_ev));
        tMPI_Event_process( &(cev->met[myrank].send_ev), 1);
    }
#ifdef USE_COLLECTIVE_COPY_BUFFER
    else
    {
        /* wait until everybody else is done copying the original buffer. 
           We use fetch_add because we want to be sure of coherency.
           This wait is bound to be very short (otherwise it wouldn't 
           be double-buffering) so we always spin here. */
        /*tMPI_Atomic_memory_barrier_rel();*/
#if 0
        while (!tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0,
                                    -100000))
#endif
#if 0
        while (tMPI_Atomic_fetch_add( &(cev->met[myrank].buf_readcount), 0) 
               != 0)
#endif
#if 1
        while (tMPI_Atomic_get( &(cev->met[rank].buf_readcount) )>0)
#endif
        {
        }
        tMPI_Atomic_memory_barrier_acq();
    }
#endif
#if defined(TMPI_PROFILE) 
    tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_send);
#endif
}
コード例 #13
0
ファイル: pthreads.c プロジェクト: alexholehouse/gromacs
int tMPI_Thread_once(tMPI_Thread_once_t *once_control,
                     void (*init_routine)(void))
{
    int ret;
    if (!once_control || !init_routine)
    {
        return EINVAL;
    }

    /* really ugly hack - and it's slow... */
    if ( (ret=pthread_mutex_lock( &once_init )) )
        return ret;
    if (tMPI_Atomic_get(&(once_control->once)) == 0)
    {
        (*init_routine)();
        tMPI_Atomic_set(&(once_control->once), 1);
    }
    pthread_mutex_unlock( &once_init );

    return 0;
}
コード例 #14
0
ファイル: alltoall.c プロジェクト: BradleyDickson/fABMACS
int tMPI_Alltoall(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
                  void* recvbuf, int recvcount, tMPI_Datatype recvtype,
                  tMPI_Comm comm)
{
    int                 synct;
    struct coll_env    *cev;
    int                 myrank;
    int                 ret = TMPI_SUCCESS;
    int                 i;
    size_t              sendsize = sendtype->size*sendcount;
    size_t              recvsize = recvtype->size*recvcount;
    int                 n_remaining;
    struct tmpi_thread *cur = tMPI_Get_current();

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Alltoall(%p, %d, %p, %p, %d, %p, %p)",
                     sendbuf, sendcount, sendtype,
                     recvbuf, recvcount, recvtype, comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!sendbuf || !recvbuf) /* don't do pointer arithmetic on a NULL ptr */
    {
        return tMPI_Error(comm, TMPI_ERR_BUF);
    }

    myrank = tMPI_Comm_seek_rank(comm, cur);

    /* we increase our counter, and determine which coll_env we get */
    cev = tMPI_Get_cev(comm, myrank, &synct);

    /* post our pointers */
    /* we set up multiple posts, so no Post_multi */
    cev->met[myrank].tag      = TMPI_ALLTOALL_TAG;
    cev->met[myrank].datatype = sendtype;
    tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
    for (i = 0; i < comm->grp.N; i++)
    {
        cev->met[myrank].bufsize[i]   = sendsize;
        cev->met[myrank].buf[i]       = (char*)sendbuf+sendsize*i;
        cev->met[myrank].read_data[i] = FALSE;
    }
    tMPI_Atomic_memory_barrier_rel();
    tMPI_Atomic_set(&(cev->met[myrank].current_sync), synct);

    /* post availability */
    for (i = 0; i < cev->N; i++)
    {
        if (i != myrank)
        {
            tMPI_Event_signal( &(cev->met[i].recv_ev) );
        }
    }

    /* we don't do the copy buffer thing here because it's pointless:
       the processes have to synchronize anyway, because they all
       send and receive. */

    /* do root transfer */
    tMPI_Coll_root_xfer(comm, sendtype, recvtype,
                        sendsize, recvsize,
                        (char*)sendbuf+sendsize*myrank,
                        (char*)recvbuf+recvsize*myrank, &ret);
    cev->met[myrank].read_data[myrank] = TRUE;
    /* and poll data availability */
    n_remaining = cev->N-1;
    while (n_remaining > 0)
    {
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_start(cur);
#endif
        tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
        for (i = 0; i < cev->N; i++)
        {
            if ((!cev->met[myrank].read_data[i]) &&
                (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
            {
                tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
                tMPI_Mult_recv(comm, cev, i, myrank, TMPI_ALLTOALL_TAG,
                               recvtype, recvsize, (char*)recvbuf+recvsize*i,
                               &ret);
                if (ret != TMPI_SUCCESS)
                {
                    return ret;
                }
                cev->met[myrank].read_data[i] = TRUE;
                n_remaining--;
            }
        }
    }


    /* and wait until everybody is done copying our data */
    tMPI_Wait_for_others(cev, myrank);

#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Alltoall);
#endif
    return ret;
}
コード例 #15
0
ファイル: winthreads.c プロジェクト: chenleo/gromacs453pf
int tMPI_Thread_barrier_wait(tMPI_Thread_barrier_t *barrier)
{
    int    cycle;
    BOOL    rc=FALSE;
    int     ret=0;
    /*tMPI_Thread_pthread_barrier_t *p;*/

    /* check whether the barrier is initialized */
    if (tMPI_Atomic_get( &(barrier->initialized)  ) == 0)
    {
        tMPI_Thread_barrier_init_once(barrier,barrier->threshold);        
    }

#if 0
    EnterCriticalSection( &(barrier->barrierp->cs)  );
#else
    tMPI_Thread_mutex_lock( &(barrier->barrierp->cs) );
#endif



    cycle = barrier->cycle;

    /* Decrement the count atomically and check if it is zero.
     * This will only be true for the last thread calling us.
     */
    if( --(barrier->count) <= 0 )
    { 
        barrier->cycle = !barrier->cycle;
        barrier->count = barrier->threshold;
#if 0
        WakeAllConditionVariable( &(barrier->barrierp->cv) );
#else
        tMPI_Thread_cond_broadcast( &(barrier->barrierp->cv) );
#endif
    }
    else
    {
        while(cycle == barrier->cycle)
        {
#if 0
            rc=SleepConditionVariableCS (&(barrier->barrierp->cv), 
                                         &(barrier->barrierp->cs), 
                                         INFINITE);
            if(!rc) 
            {
                ret=-1;
                break;
            }
#else
            rc = tMPI_Thread_cond_wait(&barrier->barrierp->cv,
                                       &barrier->barrierp->cs);
            if(rc != 0) break;
#endif
        }
    }
#if 0
    LeaveCriticalSection( &(barrier->barrierp->cs)  );
#else
    tMPI_Thread_mutex_unlock( &(barrier->barrierp->cs) );
#endif
    return ret;
}
コード例 #16
0
ファイル: gather.c プロジェクト: BradleyDickson/fABMACS
int tMPI_Gather(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
                void* recvbuf, int recvcount, tMPI_Datatype recvtype,
                int root, tMPI_Comm comm)
{
    int                 synct;
    struct coll_env    *cev;
    int                 myrank;
    int                 ret = TMPI_SUCCESS;
    struct tmpi_thread *cur = tMPI_Get_current();

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Gather(%p, %d, %p, %p, %d, %p, %d, %p)",
                     sendbuf, sendcount, sendtype,
                     recvbuf, recvcount, recvtype, root, comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    myrank = tMPI_Comm_seek_rank(comm, cur);

    /* we increase our counter, and determine which coll_env we get */
    cev = tMPI_Get_cev(comm, myrank, &synct);

    if (myrank == root)
    {
        int i;
        int n_remaining = comm->grp.N-1;
        /* do root transfer */
        if (sendbuf != TMPI_IN_PLACE)
        {
            tMPI_Coll_root_xfer(comm, sendtype, recvtype,
                                sendtype->size*sendcount,
                                recvtype->size*recvcount,
                                sendbuf,
                                (char*)recvbuf+myrank*recvcount*recvtype->size,
                                &ret);
        }
        for (i = 0; i < comm->grp.N; i++)
        {
            cev->met[myrank].read_data[i] = FALSE;
        }
        cev->met[myrank].read_data[myrank] = TRUE;

        /* wait for data availability as long as there are xfers to be done */
        while (n_remaining > 0)
        {
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
            tMPI_Profile_wait_start(cur);
#endif
            tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
            tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
            /* now check all of them */
            for (i = 0; i < comm->grp.N; i++)
            {
                if (!cev->met[myrank].read_data[i] &&
                    (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
                {
                    tMPI_Mult_recv(comm, cev, i, 0, TMPI_GATHER_TAG, recvtype,
                                   recvcount*recvtype->size,
                                   (char*)recvbuf+i*recvcount*recvtype->size,
                                   &ret);
                    tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
                    if (ret != TMPI_SUCCESS)
                    {
                        return ret;
                    }
                    cev->met[myrank].read_data[i] = TRUE;
                    n_remaining--;
                }
            }
        }
    }
    else
    {
        if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
        {
            return tMPI_Error(comm, TMPI_ERR_BUF);
        }

        /* first set up the data just to root. */
        ret = tMPI_Post_multi(cev, myrank, 0, TMPI_GATHER_TAG, sendtype,
                              sendcount*sendtype->size, sendbuf, 1, synct, root);
        if (ret != TMPI_SUCCESS)
        {
            return ret;
        }
        /* and wait until root is done copying */
        tMPI_Wait_for_others(cev, myrank);
    }
#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Gather);
#endif
    return ret;
}
コード例 #17
0
ファイル: winthreads.c プロジェクト: chenleo/gromacs453pf
int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
{
    BOOL wait_done=FALSE;
    BOOL last_waiter=FALSE;
    int my_cycle;

    /* check whether the condition is initialized */
    if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
    {
        tMPI_Thread_cond_init_once(cond);
    }
    /* the mutex must have been initialized because it should be locked here */

#if 0
    /* use this code once Vista is the minimum version required */
    ret=SleepConditionVariableCS (&(cond->cv), &(mtx->cs), INFINITE);

    if (!ret)
    {
        tMPI_Fatal_error(TMPI_FARGS,"Failed wait for condition, error code=%d",
                         GetLastError());
        return -1;
    }
#else
    /* serially increase waiter count */
    EnterCriticalSection(&(cond->condp->wtr_lock));
    cond->condp->Nwaiters++;
    my_cycle = cond->condp->cycle;
    LeaveCriticalSection(&(cond->condp->wtr_lock));

    /* now it's safe to release the mutex from the fn call */
    LeaveCriticalSection(&(mtx->mutex->cs));

    /* Loop a wait until we found out we've waited for the right event.
       Note that this loop is potentially a busy-wait loop in bad
       circumstances (higher priority threads, for example). */
    do
    {
        /* do the actual waiting */
        if (WaitForSingleObject( cond->condp->ev, INFINITE )== WAIT_FAILED)
        {
            tMPI_Fatal_error(TMPI_FARGS,"Failed event reset, error code=%d",
                             GetLastError());
            return -1;
        }

        /* serially check whether we got the right event.  */
        EnterCriticalSection(&(cond->condp->wtr_lock));
        wait_done = (cond->condp->Nrelease > 0) && 
                    (cond->condp->cycle!=my_cycle);
        LeaveCriticalSection(&(cond->condp->wtr_lock));
    }
    while(!wait_done);

    /* We obtain the mutex from the function call */
    EnterCriticalSection(&(mtx->mutex->cs));

    /* we serially decrease the waiter count and release count */
    EnterCriticalSection(&(cond->condp->wtr_lock));
    cond->condp->Nwaiters--;
    cond->condp->Nrelease--;
    last_waiter=(cond->condp->Nrelease==0);
    LeaveCriticalSection(&(cond->condp->wtr_lock));

    /* manually release the event if everybody's done with it */
    if (last_waiter)
    {
        if (!ResetEvent( cond->condp->ev ))
        {
            tMPI_Fatal_error(TMPI_FARGS,"Failed event reset, error code=%d",
                             GetLastError());
            return -1;
        }
    }
#endif

    return 0;
}
コード例 #18
0
/* Set the main thread's affinity */
static int tMPI_Set_main_thread_affinity(void)
{
    /* calling thread PROCESSOR_NUMBER */
    PROCESSOR_NUMBER CurrentProcessorNumber;      
    /* calling thread GROUP_AFFINITY */
    GROUP_AFFINITY CurrentThreadGroupAffinity; 
    /* calling thread NUMA node */
    USHORT CurrentNumaNodeNumber;


    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&main_thread_aff_set) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &main_thread_aff_lock );
        if( g_ulHighestNumaNodeNumber != 0 )
        {
            func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber);


            /* set the NUMA node affinity for the current thread
               failures to set the current thread affinity are ignored, 
               as a fringe case can arise on >32 processor systems with a 32bit 
               build/code.
               */
            func_SetThreadIdealProcessorEx(GetCurrentThread(), 
                                           &CurrentProcessorNumber, 
                                           NULL);

            if(func_GetNumaProcessorNodeEx(&CurrentProcessorNumber, 
                                           &CurrentNumaNodeNumber))
            {
                /* for the NUMA node number associated with the current processor 
                   number, get the group affinity mask */
                if(func_GetNumaNodeProcessorMaskEx(CurrentNumaNodeNumber, 
                                                   &CurrentThreadGroupAffinity))
                {
                    /* set the current thread affinity to prevent it from running on 
                       other NUMA nodes */
                    func_SetThreadGroupAffinity(GetCurrentThread(), 
                                                &CurrentThreadGroupAffinity, 
                                                NULL);
                }
            }
        }
        else
        {
            /* No NUMA. For now, we just do a similar thing. */
            if ( (func_GetCurrentProcessorNumberEx != NULL)  &&
                 (func_SetThreadIdealProcessorEx))
            {
                func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber);
                func_SetThreadIdealProcessorEx(GetCurrentThread(), 
                                               &CurrentProcessorNumber, 
                                               NULL);
            }
        }
        tMPI_Atomic_set( &main_thread_aff_set, 1);
        tMPI_Spinlock_unlock( &main_thread_aff_lock );
    }
    return 0;
}
コード例 #19
0
ファイル: once.c プロジェクト: TTarenzi/MMCG-HAdResS
void* tMPI_Once_wait(tMPI_Comm comm, void* (*function)(void*), void *param, 
                     int *was_first)
{
    int myrank;
    struct coll_sync *csync;
    struct coll_env *cev;
    int syncs;
    void *ret;


    if (!comm)
    {
        tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
        return NULL;
    }
    myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());

    /* we increase our counter, and determine which coll_env we get */
    csync=&(comm->csync[myrank]);
    csync->syncs++;
    cev=&(comm->cev[csync->syncs % N_COLL_ENV]);

    /* now do a compare-and-swap on the current_syncc */
    syncs=tMPI_Atomic_get( &(cev->coll.current_sync));
    tMPI_Atomic_memory_barrier_acq();
    if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number. 
                                         If it is a later number, we can't 
                                         have been the first to arrive here. 
                                         Calculating the difference instead
                                         of comparing directly avoids ABA 
                                         problems. */
        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
    {
        /* we're the first! */
        ret=function(param);
        if (was_first)
            *was_first=TRUE;

        /* broadcast the output data */
        cev->coll.res=ret;

        tMPI_Atomic_memory_barrier_rel();
        /* signal that we're done */
        tMPI_Atomic_fetch_add(&(cev->coll.current_sync), 1);
        /* we need to keep being in sync */
        csync->syncs++;
    }
    else
    {
        /* we need to wait until the current_syncc gets increased again */
        csync->syncs++;
        do
        {
            /*tMPI_Atomic_memory_barrier();*/
            syncs=tMPI_Atomic_get( &(cev->coll.current_sync) );
        } while (csync->syncs - syncs > 0); /* difference again due to ABA 
                                               problems */
        tMPI_Atomic_memory_barrier_acq();
        ret=cev->coll.res;
    }
    return ret;
}