Exemple #1
0
static void tMPI_Init_initers(void)
{
    int state;
    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&init_inited) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &init_init );

        state=tMPI_Atomic_get(&init_inited);
        tMPI_Atomic_memory_barrier_acq();
        if (state == 0)
        {
            InitializeCriticalSection(&mutex_init);
            InitializeCriticalSection(&once_init);
            InitializeCriticalSection(&cond_init);
            InitializeCriticalSection(&barrier_init);

            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_set(&init_inited, 1);
        }

        tMPI_Spinlock_unlock( &init_init );
    }
}
Exemple #2
0
static int tMPI_Init_initers(void)
{
    int state;
    int ret = 0;

    /* we can pre-check because it's atomic */
    if (tMPI_Atomic_get(&init_inited) == 0)
    {
        /* this can be a spinlock because the chances of collision are low. */
        tMPI_Spinlock_lock( &init_init );

        state = tMPI_Atomic_get(&init_inited);
        tMPI_Atomic_memory_barrier_acq();
        if (state == 0)
        {
            InitializeCriticalSection(&mutex_init);
            InitializeCriticalSection(&once_init);
            InitializeCriticalSection(&cond_init);
            InitializeCriticalSection(&barrier_init);
            InitializeCriticalSection(&thread_id_list_lock);

            ret = tMPI_Init_NUMA();
            if (ret != 0)
            {
                goto err;
            }


            ret = tMPI_Thread_id_list_init();
            if (ret != 0)
            {
                goto err;
            }

            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_set(&init_inited, 1);
        }

        tMPI_Spinlock_unlock( &init_init );
    }
    return ret;
err:
    tMPI_Spinlock_unlock( &init_init );
    return ret;
}
Exemple #3
0
void* tMPI_Once_wait(tMPI_Comm comm, void* (*function)(void*), void *param, 
                     int *was_first)
{
    int myrank;
    struct coll_sync *csync;
    struct coll_env *cev;
    int syncs;
    void *ret;


    if (!comm)
    {
        tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
        return NULL;
    }
    myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());

    /* we increase our counter, and determine which coll_env we get */
    csync=&(comm->csync[myrank]);
    csync->syncs++;
    cev=&(comm->cev[csync->syncs % N_COLL_ENV]);

    /* now do a compare-and-swap on the current_syncc */
    syncs=tMPI_Atomic_get( &(cev->coll.current_sync));
    tMPI_Atomic_memory_barrier_acq();
    if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number. 
                                         If it is a later number, we can't 
                                         have been the first to arrive here. 
                                         Calculating the difference instead
                                         of comparing directly avoids ABA 
                                         problems. */
        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
    {
        /* we're the first! */
        ret=function(param);
        if (was_first)
            *was_first=TRUE;

        /* broadcast the output data */
        cev->coll.res=ret;

        tMPI_Atomic_memory_barrier_rel();
        /* signal that we're done */
        tMPI_Atomic_fetch_add(&(cev->coll.current_sync), 1);
        /* we need to keep being in sync */
        csync->syncs++;
    }
    else
    {
        /* we need to wait until the current_syncc gets increased again */
        csync->syncs++;
        do
        {
            /*tMPI_Atomic_memory_barrier();*/
            syncs=tMPI_Atomic_get( &(cev->coll.current_sync) );
        } while (csync->syncs - syncs > 0); /* difference again due to ABA 
                                               problems */
        tMPI_Atomic_memory_barrier_acq();
        ret=cev->coll.res;
    }
    return ret;
}
Exemple #4
0
void tMPI_Post_multi(struct coll_env *cev, int myrank, int index, 
                     int tag, tMPI_Datatype datatype, size_t bufsize, 
                     void *buf, int n_remaining, int synct, int dest)
{
    int i;
#ifdef USE_COLLECTIVE_COPY_BUFFER
    /* decide based on the number of waiting threads */
    tmpi_bool using_cb=(bufsize < (size_t)(n_remaining*COPY_BUFFER_SIZE));

    cev->met[myrank].using_cb=using_cb;
    if (using_cb)
    {
        /* we set it to NULL initially */
        /*cev->met[myrank].cpbuf[index]=NULL;*/
        tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[index]), NULL);

        tMPI_Atomic_set(&(cev->met[myrank].buf_readcount), 0);
    }
#endif
    cev->met[myrank].tag=tag;
    cev->met[myrank].datatype=datatype;
    cev->met[myrank].buf[index]=buf;
    cev->met[myrank].bufsize[index]=bufsize;
    tMPI_Atomic_set(&(cev->met[myrank].n_remaining), n_remaining);
    tMPI_Atomic_memory_barrier_rel();
    tMPI_Atomic_set(&(cev->met[myrank].current_sync), synct);

    /* publish availability. */
    if (dest<0)
    {
        for(i=0;i<cev->N;i++)
        {
            if (i != myrank)
                tMPI_Event_signal( &(cev->met[i].recv_ev) );
        }
    }
    else
    {
        tMPI_Event_signal( &(cev->met[dest].recv_ev) );
    }

#ifdef USE_COLLECTIVE_COPY_BUFFER
    /* becase we've published availability, we can start copying -- 
       possibly in parallel with the receiver */
    if (using_cb)
    {
        struct tmpi_thread *cur=tMPI_Get_current();
         /* copy the buffer locally. First allocate */
        cev->met[myrank].cb=tMPI_Copy_buffer_list_get( &(cur->cbl_multi) );
        if (cev->met[myrank].cb->size < bufsize)
        {
            fprintf(stderr, "ERROR: cb size too small\n");
            exit(1);
        }
        /* copy to the new buf */
        memcpy(cev->met[myrank].cb->buf, buf, bufsize);

        /* post the new buf */
        tMPI_Atomic_memory_barrier_rel();
        /*cev->met[myrank].cpbuf[index]=cev->met[myrank].cb->buf;*/
        tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[index]), 
                            cev->met[myrank].cb->buf);
    }
#endif
}
Exemple #5
0
void tMPI_Mult_recv(tMPI_Comm comm, struct coll_env *cev, int rank,
                    int index, int expected_tag, tMPI_Datatype recvtype, 
                    size_t recvsize, void *recvbuf, int *ret)
{
    size_t sendsize=cev->met[rank].bufsize[index];

    /* check tags, types */
    if ((cev->met[rank].datatype != recvtype ) || 
        (cev->met[rank].tag != expected_tag))
    {
        *ret=tMPI_Error(comm, TMPI_ERR_MULTI_MISMATCH);
    }
  
    if (sendsize) /* we allow NULL ptrs if there's nothing to xmit */
    {
        void *srcbuf;
#ifdef USE_COLLECTIVE_COPY_BUFFER
        tmpi_bool decrease_ctr=FALSE;
#endif

        if ( sendsize > recvsize ) 
        {
            *ret=tMPI_Error(comm, TMPI_ERR_XFER_BUFSIZE);
            return;
        }

        if ( cev->met[rank].buf == recvbuf )
        {
            *ret=tMPI_Error(TMPI_COMM_WORLD,TMPI_ERR_XFER_BUF_OVERLAP);
            return;
        }
        /* get source buffer */
#ifdef USE_COLLECTIVE_COPY_BUFFER
        if ( !(cev->met[rank].using_cb)) 
#endif
        {
            srcbuf=cev->met[rank].buf[index];
        }
#ifdef USE_COLLECTIVE_COPY_BUFFER
        else
        {
            srcbuf=tMPI_Atomic_ptr_get(&(cev->met[rank].cpbuf[index]));
            tMPI_Atomic_memory_barrier_acq();

            if(!srcbuf)
            { /* there was (as of yet) no copied buffer */
                void *try_again_srcbuf;
                /* we need to try checking the pointer again after we increase
                   the read counter, signaling that one more thread
                   is reading. */
                tMPI_Atomic_add_return(&(cev->met[rank].buf_readcount), 1);
                /* a full memory barrier */
                tMPI_Atomic_memory_barrier();
                try_again_srcbuf=tMPI_Atomic_ptr_get(
                                         &(cev->met[rank].cpbuf[index]));
                if (!try_again_srcbuf)
                {
                    /* apparently the copied buffer is not ready yet. We
                       just use the real source buffer. We have already
                       indicated we're reading from the regular buf. */
                    srcbuf=cev->met[rank].buf[index];
                    decrease_ctr=TRUE;

                }
                else
                {
                    /* We tried again, and this time there was a copied buffer. 
                       We use that, and indicate that we're not reading from the
                       regular buf. This case should be pretty rare.  */
                    tMPI_Atomic_fetch_add(&(cev->met[rank].buf_readcount),-1);
                    tMPI_Atomic_memory_barrier_acq();
                    srcbuf=try_again_srcbuf;
                }
            }

#ifdef TMPI_PROFILE
            if (srcbuf)
                tMPI_Profile_count_buffered_coll_xfer(tMPI_Get_current());
#endif
        }
#endif
        /* copy data */
        memcpy((char*)recvbuf, srcbuf, sendsize);
#ifdef TMPI_PROFILE
        tMPI_Profile_count_coll_xfer(tMPI_Get_current());
#endif

#ifdef USE_COLLECTIVE_COPY_BUFFER
        if (decrease_ctr)
        {
            /* we decrement the read count; potentially releasing the buffer. */
            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_fetch_add( &(cev->met[rank].buf_readcount), -1);
        }
#endif
    }
    /* signal one thread ready */
   {
        int reta;
        tMPI_Atomic_memory_barrier_rel();
        reta=tMPI_Atomic_add_return( &(cev->met[rank].n_remaining), -1);
        if (reta <= 0)
        {
            tMPI_Event_signal( &(cev->met[rank].send_ev) );
        }
    }
}
Exemple #6
0
int tMPI_Alltoall(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
                  void* recvbuf, int recvcount, tMPI_Datatype recvtype,
                  tMPI_Comm comm)
{
    int                 synct;
    struct coll_env    *cev;
    int                 myrank;
    int                 ret = TMPI_SUCCESS;
    int                 i;
    size_t              sendsize = sendtype->size*sendcount;
    size_t              recvsize = recvtype->size*recvcount;
    int                 n_remaining;
    struct tmpi_thread *cur = tMPI_Get_current();

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Alltoall(%p, %d, %p, %p, %d, %p, %p)",
                     sendbuf, sendcount, sendtype,
                     recvbuf, recvcount, recvtype, comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!sendbuf || !recvbuf) /* don't do pointer arithmetic on a NULL ptr */
    {
        return tMPI_Error(comm, TMPI_ERR_BUF);
    }

    myrank = tMPI_Comm_seek_rank(comm, cur);

    /* we increase our counter, and determine which coll_env we get */
    cev = tMPI_Get_cev(comm, myrank, &synct);

    /* post our pointers */
    /* we set up multiple posts, so no Post_multi */
    cev->met[myrank].tag      = TMPI_ALLTOALL_TAG;
    cev->met[myrank].datatype = sendtype;
    tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
    for (i = 0; i < comm->grp.N; i++)
    {
        cev->met[myrank].bufsize[i]   = sendsize;
        cev->met[myrank].buf[i]       = (char*)sendbuf+sendsize*i;
        cev->met[myrank].read_data[i] = FALSE;
    }
    tMPI_Atomic_memory_barrier_rel();
    tMPI_Atomic_set(&(cev->met[myrank].current_sync), synct);

    /* post availability */
    for (i = 0; i < cev->N; i++)
    {
        if (i != myrank)
        {
            tMPI_Event_signal( &(cev->met[i].recv_ev) );
        }
    }

    /* we don't do the copy buffer thing here because it's pointless:
       the processes have to synchronize anyway, because they all
       send and receive. */

    /* do root transfer */
    tMPI_Coll_root_xfer(comm, sendtype, recvtype,
                        sendsize, recvsize,
                        (char*)sendbuf+sendsize*myrank,
                        (char*)recvbuf+recvsize*myrank, &ret);
    cev->met[myrank].read_data[myrank] = TRUE;
    /* and poll data availability */
    n_remaining = cev->N-1;
    while (n_remaining > 0)
    {
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_start(cur);
#endif
        tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
        for (i = 0; i < cev->N; i++)
        {
            if ((!cev->met[myrank].read_data[i]) &&
                (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
            {
                tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
                tMPI_Mult_recv(comm, cev, i, myrank, TMPI_ALLTOALL_TAG,
                               recvtype, recvsize, (char*)recvbuf+recvsize*i,
                               &ret);
                if (ret != TMPI_SUCCESS)
                {
                    return ret;
                }
                cev->met[myrank].read_data[i] = TRUE;
                n_remaining--;
            }
        }
    }


    /* and wait until everybody is done copying our data */
    tMPI_Wait_for_others(cev, myrank);

#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Alltoall);
#endif
    return ret;
}