/* get a pointer the next coll_env once it's ready. */ struct coll_env *tMPI_Get_cev(tMPI_Comm comm, int myrank, int *counter) { struct coll_sync *csync=&(comm->csync[myrank]); struct coll_env *cev; #ifdef USE_COLLECTIVE_COPY_BUFFER int N; #endif /* we increase our counter, and determine which coll_env we get */ csync->synct++; *counter=csync->synct; cev=&(comm->cev[csync->synct % N_COLL_ENV]); #ifdef USE_COLLECTIVE_COPY_BUFFER if (cev->met[myrank].using_cb) { N=tMPI_Event_wait( &(cev->met[myrank].send_ev)); tMPI_Event_process( &(cev->met[myrank].send_ev), 1); } #endif #ifdef USE_COLLECTIVE_COPY_BUFFER /* clean up old copy_buffer pointers */ if (cev->met[myrank].cb) { tMPI_Copy_buffer_list_return(&(tMPI_Get_current()->cbl_multi), cev->met[myrank].cb); cev->met[myrank].cb=NULL; cev->met[myrank].using_cb=FALSE; } #endif return cev; }
void tMPI_Wait_for_data(struct tmpi_thread *cur, struct coll_env *cev, int myrank) { #if defined(TMPI_PROFILE) tMPI_Profile_wait_start(cur); #endif tMPI_Event_wait( &(cev->met[myrank].recv_ev)); tMPI_Event_process( &(cev->met[myrank].recv_ev), 1); #if defined(TMPI_PROFILE) tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv); #endif }
void tMPI_Wait_for_others(struct coll_env *cev, int myrank) { #if defined(TMPI_PROFILE) struct tmpi_thread *cur=tMPI_Get_current(); tMPI_Profile_wait_start(cur); #endif #ifdef USE_COLLECTIVE_COPY_BUFFER if (! (cev->met[myrank].using_cb) ) #endif { /* wait until everybody else is done copying the buffer */ tMPI_Event_wait( &(cev->met[myrank].send_ev)); tMPI_Event_process( &(cev->met[myrank].send_ev), 1); } #ifdef USE_COLLECTIVE_COPY_BUFFER else { /* wait until everybody else is done copying the original buffer. We use fetch_add because we want to be sure of coherency. This wait is bound to be very short (otherwise it wouldn't be double-buffering) so we always spin here. */ /*tMPI_Atomic_memory_barrier_rel();*/ #if 0 while (!tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0, -100000)) #endif #if 0 while (tMPI_Atomic_fetch_add( &(cev->met[myrank].buf_readcount), 0) != 0) #endif #if 1 while (tMPI_Atomic_get( &(cev->met[rank].buf_readcount) )>0) #endif { } tMPI_Atomic_memory_barrier_acq(); } #endif #if defined(TMPI_PROFILE) tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_send); #endif }
int tMPI_Scan(void* sendbuf, void* recvbuf, int count, tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm) { struct tmpi_thread *cur=tMPI_Get_current(); int myrank=tMPI_Comm_seek_rank(comm, cur); int N=tMPI_Comm_N(comm); int prev=myrank - 1; /* my previous neighbor */ int next=myrank + 1; /* my next neighbor */ #ifdef TMPI_PROFILE tMPI_Profile_count_start(cur); #endif #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Scan(%p, %p, %d, %p, %p, %p)", sendbuf, recvbuf, count, datatype, op, comm); #endif if (count==0) return TMPI_SUCCESS; if (!recvbuf) { return tMPI_Error(comm, TMPI_ERR_BUF); } if (sendbuf==TMPI_IN_PLACE) { sendbuf=recvbuf; } /* we set our send and recv buffers */ tMPI_Atomic_ptr_set(&(comm->reduce_sendbuf[myrank]),sendbuf); tMPI_Atomic_ptr_set(&(comm->reduce_recvbuf[myrank]),recvbuf); /* now wait for the previous rank to finish */ if (myrank > 0) { void *a, *b; int ret; #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT) tMPI_Profile_wait_start(cur); #endif /* wait for the previous neighbor's data to be ready */ tMPI_Event_wait( &(comm->csync[myrank].events[prev]) ); tMPI_Event_process( &(comm->csync[myrank].events[prev]), 1); #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT) tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce); #endif #ifdef TMPI_DEBUG printf("%d: scanning with %d \n", myrank, prev, iteration); fflush(stdout); #endif /* now do the reduction */ if (prev > 0) { a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[prev])); } else { a = (void*)tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[prev])); } b = sendbuf; if ((ret=tMPI_Reduce_run_op(recvbuf, a, b, datatype, count, op, comm)) != TMPI_SUCCESS) { return ret; } /* signal to my previous neighbor that I'm done with the data */ tMPI_Event_signal( &(comm->csync[prev].events[prev]) ); } else { if (sendbuf != recvbuf) { /* copy the data if this is rank 0, and not MPI_IN_PLACE */ memcpy(recvbuf, sendbuf, count*datatype->size); } } if (myrank < N-1) { /* signal to my next neighbor that I have the data */ tMPI_Event_signal( &(comm->csync[next].events[myrank]) ); /* and wait for my next neighbor to finish */ tMPI_Event_wait( &(comm->csync[myrank].events[myrank]) ); tMPI_Event_process( &(comm->csync[myrank].events[myrank]), 1); } #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT) tMPI_Profile_wait_start(cur); #endif /*tMPI_Barrier_wait( &(comm->barrier));*/ #if defined(TMPI_PROFILE) /*tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);*/ tMPI_Profile_count_stop(cur, TMPIFN_Scan); #endif return TMPI_SUCCESS; }
int tMPI_Gather(void* sendbuf, int sendcount, tMPI_Datatype sendtype, void* recvbuf, int recvcount, tMPI_Datatype recvtype, int root, tMPI_Comm comm) { int synct; struct coll_env *cev; int myrank; int ret = TMPI_SUCCESS; struct tmpi_thread *cur = tMPI_Get_current(); #ifdef TMPI_PROFILE tMPI_Profile_count_start(cur); #endif #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Gather(%p, %d, %p, %p, %d, %p, %d, %p)", sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); #endif if (!comm) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM); } myrank = tMPI_Comm_seek_rank(comm, cur); /* we increase our counter, and determine which coll_env we get */ cev = tMPI_Get_cev(comm, myrank, &synct); if (myrank == root) { int i; int n_remaining = comm->grp.N-1; /* do root transfer */ if (sendbuf != TMPI_IN_PLACE) { tMPI_Coll_root_xfer(comm, sendtype, recvtype, sendtype->size*sendcount, recvtype->size*recvcount, sendbuf, (char*)recvbuf+myrank*recvcount*recvtype->size, &ret); } for (i = 0; i < comm->grp.N; i++) { cev->met[myrank].read_data[i] = FALSE; } cev->met[myrank].read_data[myrank] = TRUE; /* wait for data availability as long as there are xfers to be done */ while (n_remaining > 0) { #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT) tMPI_Profile_wait_start(cur); #endif tMPI_Event_wait( &(cev->met[myrank]).recv_ev ); #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT) tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv); #endif /* now check all of them */ for (i = 0; i < comm->grp.N; i++) { if (!cev->met[myrank].read_data[i] && (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct)) { tMPI_Mult_recv(comm, cev, i, 0, TMPI_GATHER_TAG, recvtype, recvcount*recvtype->size, (char*)recvbuf+i*recvcount*recvtype->size, &ret); tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1); if (ret != TMPI_SUCCESS) { return ret; } cev->met[myrank].read_data[i] = TRUE; n_remaining--; } } } } else { if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */ { return tMPI_Error(comm, TMPI_ERR_BUF); } /* first set up the data just to root. */ ret = tMPI_Post_multi(cev, myrank, 0, TMPI_GATHER_TAG, sendtype, sendcount*sendtype->size, sendbuf, 1, synct, root); if (ret != TMPI_SUCCESS) { return ret; } /* and wait until root is done copying */ tMPI_Wait_for_others(cev, myrank); } #ifdef TMPI_PROFILE tMPI_Profile_count_stop(cur, TMPIFN_Gather); #endif return ret; }
int tMPI_Alltoall(void* sendbuf, int sendcount, tMPI_Datatype sendtype, void* recvbuf, int recvcount, tMPI_Datatype recvtype, tMPI_Comm comm) { int synct; struct coll_env *cev; int myrank; int ret = TMPI_SUCCESS; int i; size_t sendsize = sendtype->size*sendcount; size_t recvsize = recvtype->size*recvcount; int n_remaining; struct tmpi_thread *cur = tMPI_Get_current(); #ifdef TMPI_PROFILE tMPI_Profile_count_start(cur); #endif #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Alltoall(%p, %d, %p, %p, %d, %p, %p)", sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); #endif if (!comm) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM); } if (!sendbuf || !recvbuf) /* don't do pointer arithmetic on a NULL ptr */ { return tMPI_Error(comm, TMPI_ERR_BUF); } myrank = tMPI_Comm_seek_rank(comm, cur); /* we increase our counter, and determine which coll_env we get */ cev = tMPI_Get_cev(comm, myrank, &synct); /* post our pointers */ /* we set up multiple posts, so no Post_multi */ cev->met[myrank].tag = TMPI_ALLTOALL_TAG; cev->met[myrank].datatype = sendtype; tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 ); for (i = 0; i < comm->grp.N; i++) { cev->met[myrank].bufsize[i] = sendsize; cev->met[myrank].buf[i] = (char*)sendbuf+sendsize*i; cev->met[myrank].read_data[i] = FALSE; } tMPI_Atomic_memory_barrier_rel(); tMPI_Atomic_set(&(cev->met[myrank].current_sync), synct); /* post availability */ for (i = 0; i < cev->N; i++) { if (i != myrank) { tMPI_Event_signal( &(cev->met[i].recv_ev) ); } } /* we don't do the copy buffer thing here because it's pointless: the processes have to synchronize anyway, because they all send and receive. */ /* do root transfer */ tMPI_Coll_root_xfer(comm, sendtype, recvtype, sendsize, recvsize, (char*)sendbuf+sendsize*myrank, (char*)recvbuf+recvsize*myrank, &ret); cev->met[myrank].read_data[myrank] = TRUE; /* and poll data availability */ n_remaining = cev->N-1; while (n_remaining > 0) { #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT) tMPI_Profile_wait_start(cur); #endif tMPI_Event_wait( &(cev->met[myrank]).recv_ev ); #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT) tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv); #endif for (i = 0; i < cev->N; i++) { if ((!cev->met[myrank].read_data[i]) && (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct)) { tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1); tMPI_Mult_recv(comm, cev, i, myrank, TMPI_ALLTOALL_TAG, recvtype, recvsize, (char*)recvbuf+recvsize*i, &ret); if (ret != TMPI_SUCCESS) { return ret; } cev->met[myrank].read_data[i] = TRUE; n_remaining--; } } } /* and wait until everybody is done copying our data */ tMPI_Wait_for_others(cev, myrank); #ifdef TMPI_PROFILE tMPI_Profile_count_stop(cur, TMPIFN_Alltoall); #endif return ret; }