Beispiel #1
0
int tMPI_Cart_get(tMPI_Comm comm, int maxdims, int *dims, int *periods,
                  int *coords)
{
    int i;
    int myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_get(%p, %d, %p, %p, %p)", comm, maxdims, 
                       dims, periods, coords);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims==0)
        return TMPI_SUCCESS;

    tMPI_Cart_coords(comm, myrank, maxdims, coords);

    for(i=0;i<comm->cart->ndims;i++)
    {
        if (i>=maxdims)
        {
            return tMPI_Error(comm, TMPI_ERR_DIMS);
        }
        dims[i]=comm->cart->dims[i];
        periods[i]=comm->cart->periods[i];
    }

    return TMPI_SUCCESS;
}
Beispiel #2
0
int tMPI_Cart_coords(tMPI_Comm comm, int rank, int maxdims, int *coords)
{
    int i;
    int rank_left=rank;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_coords(%p, %d, %d, %p)", comm, rank, maxdims, 
                     coords);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims==0)
        return TMPI_SUCCESS;
    if (maxdims < comm->cart->ndims)
    {
        return tMPI_Error(comm, TMPI_ERR_DIMS);
    }

    /* again, row-major ordering */
    for(i=comm->cart->ndims-1;i>=0;i--)
    {
        coords[i]=rank_left%comm->cart->dims[i];
        rank_left /= comm->cart->dims[i];
    }   

    return TMPI_SUCCESS;
}
Beispiel #3
0
int tMPI_Cart_create(tMPI_Comm comm_old, int ndims, int *dims, int *periods,
                     int reorder, tMPI_Comm *comm_cart)
{
    int myrank = tMPI_Comm_seek_rank(comm_old, tMPI_Get_current());
    int key    = myrank;
    int color  = 0;
    int Ntot   = 1;
    int i;


#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_create(%p, %d, %p, %p, %d, %p)", comm_old,
                     ndims, dims, periods, reorder, comm_cart);
#endif
    if (!comm_old)
    {
        return tMPI_Error(comm_old, TMPI_ERR_COMM);
    }
    /* calculate the total number of procs in cartesian comm */
    for (i = 0; i < ndims; i++)
    {
        Ntot *= dims[i];
    }
    /* refuse to create if there's not enough procs */
    if (comm_old->grp.N < Ntot)
    {
        *comm_cart = TMPI_COMM_NULL;
#if 1
        return tMPI_Error(comm_old, TMPI_ERR_CART_CREATE_NPROCS);
#endif
    }

    if (key >= Ntot)
    {
        key = TMPI_UNDEFINED;
    }

    if (reorder)
    {
        tMPI_Cart_map(comm_old, ndims, dims, periods, &key);
    }

    if (key == TMPI_UNDEFINED)
    {
        color = TMPI_UNDEFINED;
    }

    tMPI_Comm_split(comm_old, color, key, comm_cart);

    tMPI_Cart_init(comm_cart, ndims, dims, periods);

    return TMPI_SUCCESS;
}
Beispiel #4
0
int tMPI_Barrier(tMPI_Comm comm) 
{
#ifdef TMPI_PROFILE
    struct tmpi_thread *cur=tMPI_Get_current();
    tMPI_Profile_count_start(cur);
#endif

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Barrier(%p, %d, %p, %d, %d, %p, %p)", comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }

    if (comm->grp.N>1)
    {
#if defined(TMPI_PROFILE) 
        tMPI_Profile_wait_start(cur);
#endif

        tMPI_Barrier_wait( &(comm->barrier) );
#if defined(TMPI_PROFILE) 
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Barrier);
#endif
    }

#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Barrier);
#endif
    return TMPI_SUCCESS;
}
Beispiel #5
0
/* once */
int tMPI_Once(tMPI_Comm comm, void (*function)(void*), void *param, 
                int *was_first)
{
    int myrank;
    int ret=TMPI_SUCCESS;
    struct coll_sync *csync;
    struct coll_env *cev;
    int syncs;


    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());

    /* we increase our counter, and determine which coll_env we get */
    csync=&(comm->csync[myrank]);
    csync->syncs++;
    cev=&(comm->cev[csync->syncs % N_COLL_ENV]);

    /* now do a compare-and-swap on the current_syncc */
    syncs=tMPI_Atomic_get( &(cev->coll.current_sync));
    if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number. 
                                         If it is a later number, we can't 
                                         have been the first to arrive here. */
        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
    {
        /* we're the first! */
        function(param);
        if (was_first)
            *was_first=TRUE;
    }
    return ret;
}
Beispiel #6
0
void *tMPI_Realloc(void *p, size_t size)
{
    void *ret=(void*)realloc(p, size);
    if (!ret)
    {
        tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_MALLOC);
    }
    return ret;
}
Beispiel #7
0
void *tMPI_Malloc(size_t size)
{
    void *ret=(void*)malloc(size);

    if (!ret)
    {
        tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_MALLOC);
    }
    return ret;
}
Beispiel #8
0
struct tmpi_thread *tMPI_Get_thread(tMPI_Comm comm, int rank)
{
    /* check destination */
    if ( (rank < 0) || (rank > comm->grp.N) )
    {
        tMPI_Error(comm, TMPI_ERR_GROUP_RANK);
        return NULL;
    }
    return comm->grp.peers[rank];
}
Beispiel #9
0
int tMPI_Cart_rank(tMPI_Comm comm, int *coords, int *rank)
{
    int i, mul = 1, ret = 0;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_get(%p, %p, %p)", comm, coords, rank);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims == 0)
    {
        return TMPI_SUCCESS;
    }

    /* because of row-major ordering, we count the dimensions down */
    for (i = comm->cart->ndims-1; i >= 0; i--)
    {
        int rcoord = coords[i];
        if (comm->cart->periods[i])
        {
            /* apply periodic boundary conditions */
            rcoord = rcoord % comm->cart->dims[i];
            if (rcoord < 0)
            {
                rcoord += comm->cart->dims[i];
            }
        }
        else
        {
            if (rcoord < 0 || rcoord >= comm->cart->dims[i])
            {
                return tMPI_Error(comm, TMPI_ERR_DIMS);
            }
        }
        ret += mul*rcoord;
        mul *= comm->cart->dims[i];
    }
    *rank = ret;
    return TMPI_SUCCESS;
}
Beispiel #10
0
int tMPI_Get_count(tMPI_Status *status, tMPI_Datatype datatype, int *count)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Get_count(%p, %p, %p)", status, datatype, count);
#endif
    if (!status)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_STATUS);
    }
    *count = (int)(status->transferred/datatype->size);
    return TMPI_SUCCESS;
}
Beispiel #11
0
int tMPI_Cart_map(tMPI_Comm comm, int ndims, int *dims, int *periods, 
                  int *newrank)
{
    /* this function doesn't actually do anything beyond returning the current 
       rank (or TMPI_UNDEFINED if it doesn't fit in the new topology */
    int myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());
    int Ntot=1;
    int i;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cart_map(%p, %d, %p, %p, %p)", comm, ndims, dims, 
                     periods, newrank);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!periods)
    {
        return tMPI_Error(comm, TMPI_ERR_DIMS);
    }
 
    /* calculate the total number of procs in cartesian comm */
    for(i=0;i<ndims;i++)
    {
        Ntot *= dims[i];
    }

    if (myrank >= Ntot)
    {
        *newrank=TMPI_UNDEFINED;
    }
    else
    {
        *newrank=myrank;
    }

    return TMPI_SUCCESS;
}
Beispiel #12
0
void tMPI_Coll_root_xfer(tMPI_Comm comm, tMPI_Datatype sendtype, 
                         tMPI_Datatype recvtype, 
                         size_t sendsize, size_t recvsize, 
                         void* sendbuf, void* recvbuf, int *ret)
{
    /* do root transfer */
    if (recvsize < sendsize)
    {
        *ret=tMPI_Error(comm, TMPI_ERR_XFER_BUFSIZE);
        return;
    }
    if (recvtype != sendtype)
    {
        *ret=tMPI_Error(comm, TMPI_ERR_MULTI_MISMATCH);
        return;
    }
    if ( sendbuf == recvbuf )
    {
        *ret=tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_XFER_BUF_OVERLAP);
        return;
    }

    memcpy(recvbuf, sendbuf, sendsize);
}
Beispiel #13
0
int tMPI_Cartdim_get(tMPI_Comm comm, int *ndims)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Cartdim_get(%p, %p)", comm, ndims);
#endif
    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!comm->cart || comm->cart->ndims==0)
    {
        return TMPI_SUCCESS;
    }
    *ndims=comm->cart->ndims;
    return TMPI_SUCCESS;
}
Beispiel #14
0
/* topology functions */
int tMPI_Topo_test(tMPI_Comm comm, int *status)
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Topo_test(%p, %p)", comm, status);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }

    if (comm->cart)
        *status=TMPI_CART;
    /*else if (comm->graph)
        status=MPI_GRAPH;*/
    else 
        *status=TMPI_UNDEFINED;

    return TMPI_SUCCESS;
}
Beispiel #15
0
int tMPI_Gather(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
                void* recvbuf, int recvcount, tMPI_Datatype recvtype,
                int root, tMPI_Comm comm)
{
    int                 synct;
    struct coll_env    *cev;
    int                 myrank;
    int                 ret = TMPI_SUCCESS;
    struct tmpi_thread *cur = tMPI_Get_current();

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Gather(%p, %d, %p, %p, %d, %p, %d, %p)",
                     sendbuf, sendcount, sendtype,
                     recvbuf, recvcount, recvtype, root, comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    myrank = tMPI_Comm_seek_rank(comm, cur);

    /* we increase our counter, and determine which coll_env we get */
    cev = tMPI_Get_cev(comm, myrank, &synct);

    if (myrank == root)
    {
        int i;
        int n_remaining = comm->grp.N-1;
        /* do root transfer */
        if (sendbuf != TMPI_IN_PLACE)
        {
            tMPI_Coll_root_xfer(comm, sendtype, recvtype,
                                sendtype->size*sendcount,
                                recvtype->size*recvcount,
                                sendbuf,
                                (char*)recvbuf+myrank*recvcount*recvtype->size,
                                &ret);
        }
        for (i = 0; i < comm->grp.N; i++)
        {
            cev->met[myrank].read_data[i] = FALSE;
        }
        cev->met[myrank].read_data[myrank] = TRUE;

        /* wait for data availability as long as there are xfers to be done */
        while (n_remaining > 0)
        {
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
            tMPI_Profile_wait_start(cur);
#endif
            tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
            tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
            /* now check all of them */
            for (i = 0; i < comm->grp.N; i++)
            {
                if (!cev->met[myrank].read_data[i] &&
                    (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
                {
                    tMPI_Mult_recv(comm, cev, i, 0, TMPI_GATHER_TAG, recvtype,
                                   recvcount*recvtype->size,
                                   (char*)recvbuf+i*recvcount*recvtype->size,
                                   &ret);
                    tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
                    if (ret != TMPI_SUCCESS)
                    {
                        return ret;
                    }
                    cev->met[myrank].read_data[i] = TRUE;
                    n_remaining--;
                }
            }
        }
    }
    else
    {
        if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
        {
            return tMPI_Error(comm, TMPI_ERR_BUF);
        }

        /* first set up the data just to root. */
        ret = tMPI_Post_multi(cev, myrank, 0, TMPI_GATHER_TAG, sendtype,
                              sendcount*sendtype->size, sendbuf, 1, synct, root);
        if (ret != TMPI_SUCCESS)
        {
            return ret;
        }
        /* and wait until root is done copying */
        tMPI_Wait_for_others(cev, myrank);
    }
#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Gather);
#endif
    return ret;
}
Beispiel #16
0
void tMPI_Mult_recv(tMPI_Comm comm, struct coll_env *cev, int rank,
                    int index, int expected_tag, tMPI_Datatype recvtype, 
                    size_t recvsize, void *recvbuf, int *ret)
{
    size_t sendsize=cev->met[rank].bufsize[index];

    /* check tags, types */
    if ((cev->met[rank].datatype != recvtype ) || 
        (cev->met[rank].tag != expected_tag))
    {
        *ret=tMPI_Error(comm, TMPI_ERR_MULTI_MISMATCH);
    }
  
    if (sendsize) /* we allow NULL ptrs if there's nothing to xmit */
    {
        void *srcbuf;
#ifdef USE_COLLECTIVE_COPY_BUFFER
        tmpi_bool decrease_ctr=FALSE;
#endif

        if ( sendsize > recvsize ) 
        {
            *ret=tMPI_Error(comm, TMPI_ERR_XFER_BUFSIZE);
            return;
        }

        if ( cev->met[rank].buf == recvbuf )
        {
            *ret=tMPI_Error(TMPI_COMM_WORLD,TMPI_ERR_XFER_BUF_OVERLAP);
            return;
        }
        /* get source buffer */
#ifdef USE_COLLECTIVE_COPY_BUFFER
        if ( !(cev->met[rank].using_cb)) 
#endif
        {
            srcbuf=cev->met[rank].buf[index];
        }
#ifdef USE_COLLECTIVE_COPY_BUFFER
        else
        {
            srcbuf=tMPI_Atomic_ptr_get(&(cev->met[rank].cpbuf[index]));
            tMPI_Atomic_memory_barrier_acq();

            if(!srcbuf)
            { /* there was (as of yet) no copied buffer */
                void *try_again_srcbuf;
                /* we need to try checking the pointer again after we increase
                   the read counter, signaling that one more thread
                   is reading. */
                tMPI_Atomic_add_return(&(cev->met[rank].buf_readcount), 1);
                /* a full memory barrier */
                tMPI_Atomic_memory_barrier();
                try_again_srcbuf=tMPI_Atomic_ptr_get(
                                         &(cev->met[rank].cpbuf[index]));
                if (!try_again_srcbuf)
                {
                    /* apparently the copied buffer is not ready yet. We
                       just use the real source buffer. We have already
                       indicated we're reading from the regular buf. */
                    srcbuf=cev->met[rank].buf[index];
                    decrease_ctr=TRUE;

                }
                else
                {
                    /* We tried again, and this time there was a copied buffer. 
                       We use that, and indicate that we're not reading from the
                       regular buf. This case should be pretty rare.  */
                    tMPI_Atomic_fetch_add(&(cev->met[rank].buf_readcount),-1);
                    tMPI_Atomic_memory_barrier_acq();
                    srcbuf=try_again_srcbuf;
                }
            }

#ifdef TMPI_PROFILE
            if (srcbuf)
                tMPI_Profile_count_buffered_coll_xfer(tMPI_Get_current());
#endif
        }
#endif
        /* copy data */
        memcpy((char*)recvbuf, srcbuf, sendsize);
#ifdef TMPI_PROFILE
        tMPI_Profile_count_coll_xfer(tMPI_Get_current());
#endif

#ifdef USE_COLLECTIVE_COPY_BUFFER
        if (decrease_ctr)
        {
            /* we decrement the read count; potentially releasing the buffer. */
            tMPI_Atomic_memory_barrier_rel();
            tMPI_Atomic_fetch_add( &(cev->met[rank].buf_readcount), -1);
        }
#endif
    }
    /* signal one thread ready */
   {
        int reta;
        tMPI_Atomic_memory_barrier_rel();
        reta=tMPI_Atomic_add_return( &(cev->met[rank].n_remaining), -1);
        if (reta <= 0)
        {
            tMPI_Event_signal( &(cev->met[rank].send_ev) );
        }
    }
}
Beispiel #17
0
/* this is the main comm creation function. All other functions that create
    comms use this*/
int tMPI_Comm_split(tMPI_Comm comm, int color, int key, tMPI_Comm *newcomm)
{
    int                 i, j;
    int                 N = tMPI_Comm_N(comm);
    volatile tMPI_Comm *newcomm_list;
    volatile int        colors[MAX_PREALLOC_THREADS]; /* array with the colors
                                                         of each thread */
    volatile int        keys[MAX_PREALLOC_THREADS];   /* same for keys (only one of
                                                         the threads actually suplies
                                                         these arrays to the comm
                                                         structure) */
    tmpi_bool          i_am_first = FALSE;
    int                myrank     = tMPI_Comm_seek_rank(comm, tMPI_Get_current());
    struct tmpi_split *spl;
    int                ret;

#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_split(%p, %d, %d, %p)", comm, color, key,
                     newcomm);
#endif
    if (!comm)
    {
        *newcomm = NULL;
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }

    ret = tMPI_Thread_mutex_lock(&(comm->comm_create_lock));
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    /* first get the colors */
    if (!comm->new_comm)
    {
        /* i am apparently  first */
        comm->split    = (struct tmpi_split*)tMPI_Malloc(sizeof(struct tmpi_split));
        comm->new_comm = (tMPI_Comm*)tMPI_Malloc(N*sizeof(tMPI_Comm));
        if (N <= MAX_PREALLOC_THREADS)
        {
            comm->split->colors = colors;
            comm->split->keys   = keys;
        }
        else
        {
            comm->split->colors = (int*)tMPI_Malloc(N*sizeof(int));
            comm->split->keys   = (int*)tMPI_Malloc(N*sizeof(int));
        }
        comm->split->Ncol_init  = tMPI_Comm_N(comm);
        comm->split->can_finish = FALSE;
        i_am_first              = TRUE;
        /* the main communicator contains a list the size of grp.N */
    }
    newcomm_list        = comm->new_comm; /* we copy it to the local stacks because
                                             we can later erase comm->new_comm safely */
    spl                 = comm->split;    /* we do the same for spl */
    spl->colors[myrank] = color;
    spl->keys[myrank]   = key;
    spl->Ncol_init--;

    if (spl->Ncol_init == 0)
    {
        ret = tMPI_Thread_cond_signal(&(comm->comm_create_prep));
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
    }

    if (!i_am_first)
    {
        /* all other threads can just wait until the creator thread is
           finished */
        while (!spl->can_finish)
        {
            ret = tMPI_Thread_cond_wait(&(comm->comm_create_finish),
                                        &(comm->comm_create_lock) );
            if (ret != 0)
            {
                return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
            }
        }
    }
    else
    {
        int        Ncomms = 0;
        int        comm_color_[MAX_PREALLOC_THREADS];
        int        comm_N_[MAX_PREALLOC_THREADS];
        int       *comm_color = comm_color_; /* there can't be more comms than N*/
        int       *comm_N     = comm_N_;     /* the number of procs in a group */

        int       *comm_groups;              /* the groups */
        tMPI_Comm *comms;                    /* the communicators */

        /* wait for the colors to be done */
        /*if (N>1)*/
        while (spl->Ncol_init > 0)
        {
            ret = tMPI_Thread_cond_wait(&(comm->comm_create_prep),
                                        &(comm->comm_create_lock));
            if (ret != 0)
            {
                return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
            }
        }

        /* reset the state so that a new comm creating function can run */
        spl->Ncol_destroy = N;
        comm->new_comm    = 0;
        comm->split       = 0;

        comm_groups = (int*)tMPI_Malloc(N*N*sizeof(int));
        if (N > MAX_PREALLOC_THREADS)
        {
            comm_color = (int*)tMPI_Malloc(N*sizeof(int));
            comm_N     = (int*)tMPI_Malloc(N*sizeof(int));
        }

        /* count colors, allocate and split up communicators */
        tMPI_Split_colors(N, (int*)spl->colors,
                          (int*)spl->keys,
                          &Ncomms,
                          comm_N, comm_color, comm_groups);


        /* allocate a bunch of communicators */
        comms = (tMPI_Comm*)tMPI_Malloc(Ncomms*sizeof(tMPI_Comm));
        for (i = 0; i < Ncomms; i++)
        {
            ret = tMPI_Comm_alloc(&(comms[i]), comm, comm_N[i]);
            if (ret != TMPI_SUCCESS)
            {
                return ret;
            }
        }

        /* now distribute the comms */
        for (i = 0; i < Ncomms; i++)
        {
            comms[i]->grp.N = comm_N[i];
            for (j = 0; j < comm_N[i]; j++)
            {
                comms[i]->grp.peers[j] =
                    comm->grp.peers[comm_groups[i*comm->grp.N + j]];
            }
        }
        /* and put them into the newcomm_list */
        for (i = 0; i < N; i++)
        {
            newcomm_list[i] = TMPI_COMM_NULL;
            for (j = 0; j < Ncomms; j++)
            {
                if (spl->colors[i] == comm_color[j])
                {
                    newcomm_list[i] = comms[j];
                    break;
                }
            }
        }

#ifdef TMPI_DEBUG
        /* output */
        for (i = 0; i < Ncomms; i++)
        {
            printf("Group %d (color %d) has %d members: ",
                   i, comm_color[i], comm_N[i]);
            for (j = 0; j < comm_N[i]; j++)
            {
                printf(" %d ", comm_groups[comm->grp.N*i + j]);
            }

            printf(" rank: ");
            for (j = 0; j < comm_N[i]; j++)
            {
                printf(" %d ", spl->keys[comm_groups[N*i + j]]);
            }
            printf(" color: ");
            for (j = 0; j < comm_N[i]; j++)
            {
                printf(" %d ", spl->colors[comm_groups[N*i + j]]);
            }
            printf("\n");
        }
#endif
        if (N > MAX_PREALLOC_THREADS)
        {
            free((int*)spl->colors);
            free((int*)spl->keys);
            free(comm_color);
            free(comm_N);
        }
        free(comm_groups);
        free(comms);
        spl->can_finish = TRUE;

        /* tell the waiting threads that there's a comm ready */
        ret = tMPI_Thread_cond_broadcast(&(comm->comm_create_finish));
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
    }
    /* here the individual threads get their comm object */
    *newcomm = newcomm_list[myrank];

    /* free when we have assigned them all, so we can reuse the object*/
    spl->Ncol_destroy--;
    if (spl->Ncol_destroy == 0)
    {
        free((void*)newcomm_list);
        free(spl);
    }

    ret = tMPI_Thread_mutex_unlock(&(comm->comm_create_lock));
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }

    return TMPI_SUCCESS;
}
Beispiel #18
0
int tMPI_Comm_destroy(tMPI_Comm comm, tmpi_bool do_link_lock)
{
    int i;
    int ret;

    free(comm->grp.peers);
    for (i = 0; i < comm->N_reduce_iter; i++)
    {
        free(comm->reduce_barrier[i]);
    }
    free(comm->reduce_barrier);
    free(comm->N_reduce);

    for (i = 0; i < N_COLL_ENV; i++)
    {
        tMPI_Coll_env_destroy( &(comm->cev[i]) );
    }
    for (i = 0; i < comm->grp.N; i++)
    {
        tMPI_Coll_sync_destroy( &(comm->csync[i]) );
    }
    free(comm->cev);
    free(comm->csync);

    ret = tMPI_Thread_mutex_destroy( &(comm->comm_create_lock) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    ret = tMPI_Thread_cond_destroy( &(comm->comm_create_prep) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    ret = tMPI_Thread_cond_destroy( &(comm->comm_create_finish) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }

    free((void*)comm->reduce_sendbuf);
    free((void*)comm->reduce_recvbuf);

    if (comm->cart)
    {
        tMPI_Cart_destroy( comm->cart );
        free(comm->cart);
    }

    /* remove ourselves from the circular list */
    if (do_link_lock)
    {
        ret = tMPI_Thread_mutex_lock( &(tmpi_global->comm_link_lock) );
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
    }
    if (comm->next)
    {
        comm->next->prev = comm->prev;
    }
    if (comm->prev)
    {
        comm->prev->next = comm->next;
    }
    free(comm);
    if (do_link_lock)
    {
        ret = tMPI_Thread_mutex_unlock( &(tmpi_global->comm_link_lock) );
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
    }
    return TMPI_SUCCESS;
}
Beispiel #19
0
void* tMPI_Once_wait(tMPI_Comm comm, void* (*function)(void*), void *param, 
                     int *was_first)
{
    int myrank;
    struct coll_sync *csync;
    struct coll_env *cev;
    int syncs;
    void *ret;


    if (!comm)
    {
        tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
        return NULL;
    }
    myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());

    /* we increase our counter, and determine which coll_env we get */
    csync=&(comm->csync[myrank]);
    csync->syncs++;
    cev=&(comm->cev[csync->syncs % N_COLL_ENV]);

    /* now do a compare-and-swap on the current_syncc */
    syncs=tMPI_Atomic_get( &(cev->coll.current_sync));
    tMPI_Atomic_memory_barrier_acq();
    if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number. 
                                         If it is a later number, we can't 
                                         have been the first to arrive here. 
                                         Calculating the difference instead
                                         of comparing directly avoids ABA 
                                         problems. */
        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
    {
        /* we're the first! */
        ret=function(param);
        if (was_first)
            *was_first=TRUE;

        /* broadcast the output data */
        cev->coll.res=ret;

        tMPI_Atomic_memory_barrier_rel();
        /* signal that we're done */
        tMPI_Atomic_fetch_add(&(cev->coll.current_sync), 1);
        /* we need to keep being in sync */
        csync->syncs++;
    }
    else
    {
        /* we need to wait until the current_syncc gets increased again */
        csync->syncs++;
        do
        {
            /*tMPI_Atomic_memory_barrier();*/
            syncs=tMPI_Atomic_get( &(cev->coll.current_sync) );
        } while (csync->syncs - syncs > 0); /* difference again due to ABA 
                                               problems */
        tMPI_Atomic_memory_barrier_acq();
        ret=cev->coll.res;
    }
    return ret;
}
Beispiel #20
0
int tMPI_Comm_alloc(tMPI_Comm *newcomm, tMPI_Comm parent, int N)
{
    struct tmpi_comm_ *retc;
    int                i;
    int                ret;

    retc = (struct tmpi_comm_*)tMPI_Malloc(sizeof(struct tmpi_comm_));
    if (retc == NULL)
    {
        return TMPI_ERR_NO_MEM;
    }

    retc->grp.peers = (struct tmpi_thread**)tMPI_Malloc(
                sizeof(struct tmpi_thread*)*Nthreads);
    if (retc->grp.peers == NULL)
    {
        return TMPI_ERR_NO_MEM;
    }
    retc->grp.N = N;

    ret = tMPI_Thread_mutex_init( &(retc->comm_create_lock) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    ret = tMPI_Thread_cond_init( &(retc->comm_create_prep) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    ret = tMPI_Thread_cond_init( &(retc->comm_create_finish) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }

    retc->split    = NULL;
    retc->new_comm = NULL;
    /* we have no topology to start out with */
    retc->cart = NULL;
    /*retc->graph=NULL;*/

    /* we start counting at 0 */
    tMPI_Atomic_set( &(retc->destroy_counter), 0);

    /* initialize the main barrier */
    tMPI_Barrier_init(&(retc->barrier), N);

    /* the reduce barriers */
    {
        /* First calculate the number of reduce barriers */
        int Niter = 0; /* the iteration number */
        int Nred  = N; /* the number of reduce barriers for this iteration */
        while (Nred > 1)
        {
            /* Nred is now Nred/2 + a rest term because solitary
               process at the end of the list must still be accounter for */
            Nred   = Nred/2 + Nred%2;
            Niter += 1;
        }

        retc->N_reduce_iter = Niter;
        /* allocate the list */
        retc->reduce_barrier = (tMPI_Barrier_t**)
            tMPI_Malloc(sizeof(tMPI_Barrier_t*)*(Niter+1));
        if (retc->reduce_barrier == NULL)
        {
            return TMPI_ERR_NO_MEM;
        }
        retc->N_reduce = (int*)tMPI_Malloc(sizeof(int)*(Niter+1));
        if (retc->N_reduce == NULL)
        {
            return TMPI_ERR_NO_MEM;
        }

        /* we re-set Nred to N */
        Nred = N;
        for (i = 0; i < Niter; i++)
        {
            int j;

            Nred              = Nred/2 + Nred%2;
            retc->N_reduce[i] = Nred;
            /* allocate the sub-list */
            retc->reduce_barrier[i] = (tMPI_Barrier_t*)
                tMPI_Malloc(sizeof(tMPI_Barrier_t)*(Nred));
            if (retc->reduce_barrier[i] == NULL)
            {
                return TMPI_ERR_NO_MEM;
            }
            for (j = 0; j < Nred; j++)
            {
                tMPI_Barrier_init(&(retc->reduce_barrier[i][j]), 2);
            }
        }
    }

    /* the reduce buffers */
    retc->reduce_sendbuf = (tMPI_Atomic_ptr_t*)
        tMPI_Malloc(sizeof(tMPI_Atomic_ptr_t)*Nthreads);
    if (retc->reduce_sendbuf == NULL)
    {
        return TMPI_ERR_NO_MEM;
    }
    retc->reduce_recvbuf = (tMPI_Atomic_ptr_t*)
        tMPI_Malloc(sizeof(tMPI_Atomic_ptr_t)*Nthreads);
    if (retc->reduce_recvbuf == NULL)
    {
        return TMPI_ERR_NO_MEM;
    }

    if (parent)
    {
        retc->erh = parent->erh;
    }
    else
    {
        retc->erh = TMPI_ERRORS_ARE_FATAL;
    }

    /* coll_env objects */
    retc->cev = (struct coll_env*)tMPI_Malloc(sizeof(struct coll_env)*
                                              N_COLL_ENV);
    if (retc->cev == NULL)
    {
        return TMPI_ERR_NO_MEM;
    }

    for (i = 0; i < N_COLL_ENV; i++)
    {
        ret = tMPI_Coll_env_init( &(retc->cev[i]), N);
        if (ret != TMPI_SUCCESS)
        {
            return ret;
        }
    }
    /* multi_sync objects */
    retc->csync = (struct coll_sync*)tMPI_Malloc(sizeof(struct coll_sync)*N);
    if (retc->csync == NULL)
    {
        return TMPI_ERR_NO_MEM;
    }

    for (i = 0; i < N; i++)
    {
        ret = tMPI_Coll_sync_init( &(retc->csync[i]), N);
        if (ret != TMPI_SUCCESS)
        {
            return ret;
        }
    }

    ret = tMPI_Thread_mutex_lock( &(tmpi_global->comm_link_lock) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    /* we insert ourselves in the circular list, after TMPI_COMM_WORLD */
    if (TMPI_COMM_WORLD)
    {
        retc->next = TMPI_COMM_WORLD;
        retc->prev = TMPI_COMM_WORLD->prev;

        TMPI_COMM_WORLD->prev->next = retc;
        TMPI_COMM_WORLD->prev       = retc;
    }
    else
    {
        retc->prev = retc->next = retc;
    }
    ret = tMPI_Thread_mutex_unlock( &(tmpi_global->comm_link_lock) );
    if (ret != 0)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
    }
    *newcomm = retc;
    return TMPI_SUCCESS;
}
Beispiel #21
0
void tMPI_Start_threads(tmpi_bool main_returns, int N, int *argc, char ***argv, 
                        void (*start_fn)(void*), void *start_arg,
                        int (*start_fn_main)(int, char**))
{
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Start_threads(%d, %p, %p, %p, %p)", N, argc,
                       argv, start_fn, start_arg);
#endif
    if (N>0) 
    {
        int i;
        int set_affinity=FALSE;

        tmpi_finalized=FALSE;
        Nthreads=N;

        /* allocate global data */
        tmpi_global=(struct tmpi_global*)
                        tMPI_Malloc(sizeof(struct tmpi_global));
        tMPI_Global_init(tmpi_global, N);

        /* allocate world and thread data */
        threads=(struct tmpi_thread*)tMPI_Malloc(sizeof(struct tmpi_thread)*N);
        TMPI_COMM_WORLD=tMPI_Comm_alloc(NULL, N);
        TMPI_GROUP_EMPTY=tMPI_Group_alloc();

        if (tMPI_Thread_key_create(&id_key, NULL))
        {
            tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_INIT);
        }
        for(i=0;i<N;i++)
        {
            TMPI_COMM_WORLD->grp.peers[i]=&(threads[i]);

            /* copy argc, argv */
            if (argc && argv)
            {
                int j;
                threads[i].argc=*argc;
                threads[i].argv=(char**)tMPI_Malloc(threads[i].argc*
                                                   sizeof(char*));
                for(j=0;j<threads[i].argc;j++)
                {
#if ! (defined( _WIN32 ) || defined( _WIN64 ) )
                    threads[i].argv[j]=strdup( (*argv)[j] );
#else
                    threads[i].argv[j]=_strdup( (*argv)[j] );
#endif
                }
            }
            else
            {
                threads[i].argc=0;
                threads[i].argv=NULL;
            }
            threads[i].start_fn=start_fn;
            threads[i].start_fn_main=start_fn_main;
            threads[i].start_arg=start_arg;
        }

        /* now check whether to set affinity */
#ifdef TMPI_THREAD_AFFINITY
        {
            int nhw=tMPI_Thread_get_hw_number();
            if ((nhw > 1) && (nhw == N))
            {
                set_affinity=TRUE;
            }
        }
#endif

        for(i=1;i<N;i++) /* zero is the main thread */
        {
            int ret;

            if (set_affinity)
            {
                ret=tMPI_Thread_create_aff(&(threads[i].thread_id), 
                                           tMPI_Thread_starter,
                                           (void*)&(threads[i]) ) ;
            }
            else
            {
                ret=tMPI_Thread_create(&(threads[i].thread_id), 
                                       tMPI_Thread_starter,
                                       (void*)&(threads[i]) ) ;
            }

            if(ret)
            {
                tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_INIT);
            }
        }
        /* the main thread now also runs start_fn if we don't want
           it to return */
        if (!main_returns)
            tMPI_Thread_starter((void*)&(threads[0]));
        else
            tMPI_Thread_init(&(threads[0]));
    }
}
Beispiel #22
0
int tMPI_Comm_free(tMPI_Comm *comm)
{
    int size;
    int sum;
    int ret;
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Comm_free(%p)", comm);
#endif
#ifndef TMPI_STRICT
    if (!*comm)
    {
        return TMPI_SUCCESS;
    }

    if ((*comm)->grp.N > 1)
    {
        /* we remove ourselves from the comm. */
        ret = tMPI_Thread_mutex_lock(&((*comm)->comm_create_lock));
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
        (*comm)->grp.peers[myrank] = (*comm)->grp.peers[(*comm)->grp.N-1];
        (*comm)->grp.N--;
        ret = tMPI_Thread_mutex_unlock(&((*comm)->comm_create_lock));
        if (ret != 0)
        {
            return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
        }
    }
    else
    {
        /* we're the last one so we can safely destroy it */
        ret = tMPI_Comm_destroy(*comm, TRUE);
        if (ret != 0)
        {
            return ret;
        }
    }
#else
    /* This is correct if programs actually treat Comm_free as a collective
       call */
    if (!*comm)
    {
        return TMPI_SUCCESS;
    }

    size = (*comm)->grp.N;

    /* we add 1 to the destroy counter and actually deallocate if the counter
       reaches N. */
    sum = tMPI_Atomic_fetch_add( &((*comm)->destroy_counter), 1) + 1;
    /* this is a collective call on a shared data structure, so only
       one process (the last one in this case) should do anything */
    if (sum == size)
    {
        ret = tMPI_Comm_destroy(*comm, TRUE);
        if (ret != 0)
        {
            return ret;
        }
    }
#endif
    return TMPI_SUCCESS;
}
Beispiel #23
0
int tMPI_Finalize(void)
{
    int i;
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Finalize()");
#endif
#ifdef TMPI_DEBUG
    printf("%5d: tMPI_Finalize called\n", tMPI_This_threadnr());
    fflush(stdout);
#endif

#ifdef TMPI_PROFILE
    {
        struct tmpi_thread *cur=tMPI_Get_current();

        tMPI_Profile_stop( &(cur->profile) );
        tMPI_Thread_barrier_wait( &(tmpi_global->barrier) );

        if (tMPI_Is_master())
        {
            tMPI_Profiles_summarize(Nthreads, threads);
        }
    }
#endif
    tMPI_Thread_barrier_wait( &(tmpi_global->barrier) );

    if (tMPI_Is_master())
    {

        /* we just wait for all threads to finish; the order isn't very 
           relevant, as all threads should arrive at their endpoints soon. */
        for(i=1;i<Nthreads;i++)
        {
            if (tMPI_Thread_join(threads[i].thread_id, NULL))
            {
                tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_FINALIZE);
            }
            tMPI_Thread_destroy(&(threads[i]));
        }
        /* at this point, we are the only thread left, so we can 
           destroy the global structures with impunity. */
        tMPI_Thread_destroy(&(threads[0]));
        free(threads);

        tMPI_Thread_key_delete(id_key);
        /* de-allocate all the comm stuctures. */
        {
            tMPI_Comm cur=TMPI_COMM_WORLD->next;
            while(cur && (cur!=TMPI_COMM_WORLD) )
            {
                tMPI_Comm next=cur->next;
                tMPI_Comm_destroy(cur);
                cur=next;
            }
            tMPI_Comm_destroy(TMPI_COMM_WORLD);
        }

        tMPI_Group_free(&TMPI_GROUP_EMPTY);
        threads=0;
        TMPI_COMM_WORLD=NULL;
        TMPI_GROUP_EMPTY=NULL;
        Nthreads=0;

        /* deallocate the 'global' structure */
        tMPI_Global_destroy(tmpi_global);
        free(tmpi_global);

        tmpi_finalized=TRUE;
    }
    else
    {
        tMPI_Thread_exit(0);
    }
    return TMPI_SUCCESS;
}
Beispiel #24
0
int tMPI_Scan(void* sendbuf, void* recvbuf, int count,
              tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm)
{
    struct tmpi_thread *cur=tMPI_Get_current();
    int myrank=tMPI_Comm_seek_rank(comm, cur);
    int N=tMPI_Comm_N(comm);
    int prev=myrank - 1; /* my previous neighbor */
    int next=myrank + 1; /* my next neighbor */

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Scan(%p, %p, %d, %p, %p, %p)",
                     sendbuf, recvbuf, count, datatype, op, comm);
#endif
    if (count==0)
        return TMPI_SUCCESS;
    if (!recvbuf)
    {
        return tMPI_Error(comm, TMPI_ERR_BUF);
    }
    if (sendbuf==TMPI_IN_PLACE) 
    {
        sendbuf=recvbuf;
    }

    /* we set our send and recv buffers */
    tMPI_Atomic_ptr_set(&(comm->reduce_sendbuf[myrank]),sendbuf);
    tMPI_Atomic_ptr_set(&(comm->reduce_recvbuf[myrank]),recvbuf);

    /* now wait for the previous rank to finish */
    if (myrank > 0)
    {
        void *a, *b;
        int ret;

#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_start(cur);
#endif
        /* wait for the previous neighbor's data to be ready */
        tMPI_Event_wait( &(comm->csync[myrank].events[prev]) );
        tMPI_Event_process( &(comm->csync[myrank].events[prev]), 1);
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);
#endif
#ifdef TMPI_DEBUG
        printf("%d: scanning with %d \n", myrank, prev, iteration);
        fflush(stdout);
#endif
        /* now do the reduction */
        if (prev > 0)
        {
            a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[prev]));
        }
        else
        {
            a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[prev]));
        }
        b = sendbuf;

        if ((ret=tMPI_Reduce_run_op(recvbuf, a, b, datatype,
                                    count, op, comm)) != TMPI_SUCCESS)
        {
            return ret;
        }

        /* signal to my previous neighbor that I'm done with the data */
        tMPI_Event_signal( &(comm->csync[prev].events[prev]) );
    }
    else
    {
        if (sendbuf != recvbuf)
        {
            /* copy the data if this is rank 0, and not MPI_IN_PLACE */
            memcpy(recvbuf, sendbuf, count*datatype->size);
        }
    }

    if (myrank < N-1)
    {
        /* signal to my next neighbor that I have the data */
        tMPI_Event_signal( &(comm->csync[next].events[myrank]) );
        /* and wait for my next neighbor to finish */
        tMPI_Event_wait( &(comm->csync[myrank].events[myrank]) );
        tMPI_Event_process( &(comm->csync[myrank].events[myrank]), 1);
    }


#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
    tMPI_Profile_wait_start(cur);
#endif
    /*tMPI_Barrier_wait( &(comm->barrier));*/
#if defined(TMPI_PROFILE)
    /*tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);*/
    tMPI_Profile_count_stop(cur, TMPIFN_Scan);
#endif
    return TMPI_SUCCESS;
}
Beispiel #25
0
int tMPI_Alltoall(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
                  void* recvbuf, int recvcount, tMPI_Datatype recvtype,
                  tMPI_Comm comm)
{
    int                 synct;
    struct coll_env    *cev;
    int                 myrank;
    int                 ret = TMPI_SUCCESS;
    int                 i;
    size_t              sendsize = sendtype->size*sendcount;
    size_t              recvsize = recvtype->size*recvcount;
    int                 n_remaining;
    struct tmpi_thread *cur = tMPI_Get_current();

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Alltoall(%p, %d, %p, %p, %d, %p, %p)",
                     sendbuf, sendcount, sendtype,
                     recvbuf, recvcount, recvtype, comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!sendbuf || !recvbuf) /* don't do pointer arithmetic on a NULL ptr */
    {
        return tMPI_Error(comm, TMPI_ERR_BUF);
    }

    myrank = tMPI_Comm_seek_rank(comm, cur);

    /* we increase our counter, and determine which coll_env we get */
    cev = tMPI_Get_cev(comm, myrank, &synct);

    /* post our pointers */
    /* we set up multiple posts, so no Post_multi */
    cev->met[myrank].tag      = TMPI_ALLTOALL_TAG;
    cev->met[myrank].datatype = sendtype;
    tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
    for (i = 0; i < comm->grp.N; i++)
    {
        cev->met[myrank].bufsize[i]   = sendsize;
        cev->met[myrank].buf[i]       = (char*)sendbuf+sendsize*i;
        cev->met[myrank].read_data[i] = FALSE;
    }
    tMPI_Atomic_memory_barrier_rel();
    tMPI_Atomic_set(&(cev->met[myrank].current_sync), synct);

    /* post availability */
    for (i = 0; i < cev->N; i++)
    {
        if (i != myrank)
        {
            tMPI_Event_signal( &(cev->met[i].recv_ev) );
        }
    }

    /* we don't do the copy buffer thing here because it's pointless:
       the processes have to synchronize anyway, because they all
       send and receive. */

    /* do root transfer */
    tMPI_Coll_root_xfer(comm, sendtype, recvtype,
                        sendsize, recvsize,
                        (char*)sendbuf+sendsize*myrank,
                        (char*)recvbuf+recvsize*myrank, &ret);
    cev->met[myrank].read_data[myrank] = TRUE;
    /* and poll data availability */
    n_remaining = cev->N-1;
    while (n_remaining > 0)
    {
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_start(cur);
#endif
        tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
        for (i = 0; i < cev->N; i++)
        {
            if ((!cev->met[myrank].read_data[i]) &&
                (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
            {
                tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
                tMPI_Mult_recv(comm, cev, i, myrank, TMPI_ALLTOALL_TAG,
                               recvtype, recvsize, (char*)recvbuf+recvsize*i,
                               &ret);
                if (ret != TMPI_SUCCESS)
                {
                    return ret;
                }
                cev->met[myrank].read_data[i] = TRUE;
                n_remaining--;
            }
        }
    }


    /* and wait until everybody is done copying our data */
    tMPI_Wait_for_others(cev, myrank);

#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Alltoall);
#endif
    return ret;
}