Exemplo n.º 1
0
/* get a pointer the next coll_env once it's ready. */
struct coll_env *tMPI_Get_cev(tMPI_Comm comm, int myrank, int *counter)
{
    struct coll_sync *csync=&(comm->csync[myrank]);
    struct coll_env *cev;
#ifdef USE_COLLECTIVE_COPY_BUFFER
    int N;
#endif

    /* we increase our counter, and determine which coll_env we get */
    csync->synct++;
    *counter=csync->synct;
    cev=&(comm->cev[csync->synct % N_COLL_ENV]);


#ifdef USE_COLLECTIVE_COPY_BUFFER
    if (cev->met[myrank].using_cb)
    {
        N=tMPI_Event_wait( &(cev->met[myrank].send_ev));
        tMPI_Event_process( &(cev->met[myrank].send_ev), 1);
    }
#endif
#ifdef USE_COLLECTIVE_COPY_BUFFER
    /* clean up old copy_buffer pointers */
    if (cev->met[myrank].cb)  
    {
        tMPI_Copy_buffer_list_return(&(tMPI_Get_current()->cbl_multi),
                                     cev->met[myrank].cb);
        cev->met[myrank].cb=NULL;
        cev->met[myrank].using_cb=FALSE;
    }
#endif

    return cev;
}
Exemplo n.º 2
0
void tMPI_Wait_for_data(struct tmpi_thread *cur, struct coll_env *cev, 
                        int myrank)
{
#if defined(TMPI_PROFILE) 
    tMPI_Profile_wait_start(cur);
#endif
    tMPI_Event_wait( &(cev->met[myrank].recv_ev));
    tMPI_Event_process( &(cev->met[myrank].recv_ev), 1);
#if defined(TMPI_PROFILE) 
    tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
}
Exemplo n.º 3
0
void tMPI_Wait_for_others(struct coll_env *cev, int myrank)
{
#if defined(TMPI_PROFILE) 
    struct tmpi_thread *cur=tMPI_Get_current();
    tMPI_Profile_wait_start(cur);
#endif

#ifdef USE_COLLECTIVE_COPY_BUFFER
    if (! (cev->met[myrank].using_cb) )
#endif
    {
        /* wait until everybody else is done copying the buffer */
        tMPI_Event_wait( &(cev->met[myrank].send_ev));
        tMPI_Event_process( &(cev->met[myrank].send_ev), 1);
    }
#ifdef USE_COLLECTIVE_COPY_BUFFER
    else
    {
        /* wait until everybody else is done copying the original buffer. 
           We use fetch_add because we want to be sure of coherency.
           This wait is bound to be very short (otherwise it wouldn't 
           be double-buffering) so we always spin here. */
        /*tMPI_Atomic_memory_barrier_rel();*/
#if 0
        while (!tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0,
                                    -100000))
#endif
#if 0
        while (tMPI_Atomic_fetch_add( &(cev->met[myrank].buf_readcount), 0) 
               != 0)
#endif
#if 1
        while (tMPI_Atomic_get( &(cev->met[rank].buf_readcount) )>0)
#endif
        {
        }
        tMPI_Atomic_memory_barrier_acq();
    }
#endif
#if defined(TMPI_PROFILE) 
    tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_send);
#endif
}
Exemplo n.º 4
0
int tMPI_Scan(void* sendbuf, void* recvbuf, int count,
              tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm)
{
    struct tmpi_thread *cur=tMPI_Get_current();
    int myrank=tMPI_Comm_seek_rank(comm, cur);
    int N=tMPI_Comm_N(comm);
    int prev=myrank - 1; /* my previous neighbor */
    int next=myrank + 1; /* my next neighbor */

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Scan(%p, %p, %d, %p, %p, %p)",
                     sendbuf, recvbuf, count, datatype, op, comm);
#endif
    if (count==0)
        return TMPI_SUCCESS;
    if (!recvbuf)
    {
        return tMPI_Error(comm, TMPI_ERR_BUF);
    }
    if (sendbuf==TMPI_IN_PLACE) 
    {
        sendbuf=recvbuf;
    }

    /* we set our send and recv buffers */
    tMPI_Atomic_ptr_set(&(comm->reduce_sendbuf[myrank]),sendbuf);
    tMPI_Atomic_ptr_set(&(comm->reduce_recvbuf[myrank]),recvbuf);

    /* now wait for the previous rank to finish */
    if (myrank > 0)
    {
        void *a, *b;
        int ret;

#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_start(cur);
#endif
        /* wait for the previous neighbor's data to be ready */
        tMPI_Event_wait( &(comm->csync[myrank].events[prev]) );
        tMPI_Event_process( &(comm->csync[myrank].events[prev]), 1);
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);
#endif
#ifdef TMPI_DEBUG
        printf("%d: scanning with %d \n", myrank, prev, iteration);
        fflush(stdout);
#endif
        /* now do the reduction */
        if (prev > 0)
        {
            a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[prev]));
        }
        else
        {
            a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[prev]));
        }
        b = sendbuf;

        if ((ret=tMPI_Reduce_run_op(recvbuf, a, b, datatype,
                                    count, op, comm)) != TMPI_SUCCESS)
        {
            return ret;
        }

        /* signal to my previous neighbor that I'm done with the data */
        tMPI_Event_signal( &(comm->csync[prev].events[prev]) );
    }
    else
    {
        if (sendbuf != recvbuf)
        {
            /* copy the data if this is rank 0, and not MPI_IN_PLACE */
            memcpy(recvbuf, sendbuf, count*datatype->size);
        }
    }

    if (myrank < N-1)
    {
        /* signal to my next neighbor that I have the data */
        tMPI_Event_signal( &(comm->csync[next].events[myrank]) );
        /* and wait for my next neighbor to finish */
        tMPI_Event_wait( &(comm->csync[myrank].events[myrank]) );
        tMPI_Event_process( &(comm->csync[myrank].events[myrank]), 1);
    }


#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
    tMPI_Profile_wait_start(cur);
#endif
    /*tMPI_Barrier_wait( &(comm->barrier));*/
#if defined(TMPI_PROFILE)
    /*tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);*/
    tMPI_Profile_count_stop(cur, TMPIFN_Scan);
#endif
    return TMPI_SUCCESS;
}
Exemplo n.º 5
0
int tMPI_Gather(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
                void* recvbuf, int recvcount, tMPI_Datatype recvtype,
                int root, tMPI_Comm comm)
{
    int                 synct;
    struct coll_env    *cev;
    int                 myrank;
    int                 ret = TMPI_SUCCESS;
    struct tmpi_thread *cur = tMPI_Get_current();

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Gather(%p, %d, %p, %p, %d, %p, %d, %p)",
                     sendbuf, sendcount, sendtype,
                     recvbuf, recvcount, recvtype, root, comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    myrank = tMPI_Comm_seek_rank(comm, cur);

    /* we increase our counter, and determine which coll_env we get */
    cev = tMPI_Get_cev(comm, myrank, &synct);

    if (myrank == root)
    {
        int i;
        int n_remaining = comm->grp.N-1;
        /* do root transfer */
        if (sendbuf != TMPI_IN_PLACE)
        {
            tMPI_Coll_root_xfer(comm, sendtype, recvtype,
                                sendtype->size*sendcount,
                                recvtype->size*recvcount,
                                sendbuf,
                                (char*)recvbuf+myrank*recvcount*recvtype->size,
                                &ret);
        }
        for (i = 0; i < comm->grp.N; i++)
        {
            cev->met[myrank].read_data[i] = FALSE;
        }
        cev->met[myrank].read_data[myrank] = TRUE;

        /* wait for data availability as long as there are xfers to be done */
        while (n_remaining > 0)
        {
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
            tMPI_Profile_wait_start(cur);
#endif
            tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
            tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
            /* now check all of them */
            for (i = 0; i < comm->grp.N; i++)
            {
                if (!cev->met[myrank].read_data[i] &&
                    (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
                {
                    tMPI_Mult_recv(comm, cev, i, 0, TMPI_GATHER_TAG, recvtype,
                                   recvcount*recvtype->size,
                                   (char*)recvbuf+i*recvcount*recvtype->size,
                                   &ret);
                    tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
                    if (ret != TMPI_SUCCESS)
                    {
                        return ret;
                    }
                    cev->met[myrank].read_data[i] = TRUE;
                    n_remaining--;
                }
            }
        }
    }
    else
    {
        if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
        {
            return tMPI_Error(comm, TMPI_ERR_BUF);
        }

        /* first set up the data just to root. */
        ret = tMPI_Post_multi(cev, myrank, 0, TMPI_GATHER_TAG, sendtype,
                              sendcount*sendtype->size, sendbuf, 1, synct, root);
        if (ret != TMPI_SUCCESS)
        {
            return ret;
        }
        /* and wait until root is done copying */
        tMPI_Wait_for_others(cev, myrank);
    }
#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Gather);
#endif
    return ret;
}
Exemplo n.º 6
0
int tMPI_Alltoall(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
                  void* recvbuf, int recvcount, tMPI_Datatype recvtype,
                  tMPI_Comm comm)
{
    int                 synct;
    struct coll_env    *cev;
    int                 myrank;
    int                 ret = TMPI_SUCCESS;
    int                 i;
    size_t              sendsize = sendtype->size*sendcount;
    size_t              recvsize = recvtype->size*recvcount;
    int                 n_remaining;
    struct tmpi_thread *cur = tMPI_Get_current();

#ifdef TMPI_PROFILE
    tMPI_Profile_count_start(cur);
#endif
#ifdef TMPI_TRACE
    tMPI_Trace_print("tMPI_Alltoall(%p, %d, %p, %p, %d, %p, %p)",
                     sendbuf, sendcount, sendtype,
                     recvbuf, recvcount, recvtype, comm);
#endif

    if (!comm)
    {
        return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
    }
    if (!sendbuf || !recvbuf) /* don't do pointer arithmetic on a NULL ptr */
    {
        return tMPI_Error(comm, TMPI_ERR_BUF);
    }

    myrank = tMPI_Comm_seek_rank(comm, cur);

    /* we increase our counter, and determine which coll_env we get */
    cev = tMPI_Get_cev(comm, myrank, &synct);

    /* post our pointers */
    /* we set up multiple posts, so no Post_multi */
    cev->met[myrank].tag      = TMPI_ALLTOALL_TAG;
    cev->met[myrank].datatype = sendtype;
    tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
    for (i = 0; i < comm->grp.N; i++)
    {
        cev->met[myrank].bufsize[i]   = sendsize;
        cev->met[myrank].buf[i]       = (char*)sendbuf+sendsize*i;
        cev->met[myrank].read_data[i] = FALSE;
    }
    tMPI_Atomic_memory_barrier_rel();
    tMPI_Atomic_set(&(cev->met[myrank].current_sync), synct);

    /* post availability */
    for (i = 0; i < cev->N; i++)
    {
        if (i != myrank)
        {
            tMPI_Event_signal( &(cev->met[i].recv_ev) );
        }
    }

    /* we don't do the copy buffer thing here because it's pointless:
       the processes have to synchronize anyway, because they all
       send and receive. */

    /* do root transfer */
    tMPI_Coll_root_xfer(comm, sendtype, recvtype,
                        sendsize, recvsize,
                        (char*)sendbuf+sendsize*myrank,
                        (char*)recvbuf+recvsize*myrank, &ret);
    cev->met[myrank].read_data[myrank] = TRUE;
    /* and poll data availability */
    n_remaining = cev->N-1;
    while (n_remaining > 0)
    {
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_start(cur);
#endif
        tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
#if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
        tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
#endif
        for (i = 0; i < cev->N; i++)
        {
            if ((!cev->met[myrank].read_data[i]) &&
                (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
            {
                tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
                tMPI_Mult_recv(comm, cev, i, myrank, TMPI_ALLTOALL_TAG,
                               recvtype, recvsize, (char*)recvbuf+recvsize*i,
                               &ret);
                if (ret != TMPI_SUCCESS)
                {
                    return ret;
                }
                cev->met[myrank].read_data[i] = TRUE;
                n_remaining--;
            }
        }
    }


    /* and wait until everybody is done copying our data */
    tMPI_Wait_for_others(cev, myrank);

#ifdef TMPI_PROFILE
    tMPI_Profile_count_stop(cur, TMPIFN_Alltoall);
#endif
    return ret;
}