void tMPI_Wait_for_others(struct coll_env *cev, int myrank) { #if defined(TMPI_PROFILE) struct tmpi_thread *cur=tMPI_Get_current(); tMPI_Profile_wait_start(cur); #endif #ifdef USE_COLLECTIVE_COPY_BUFFER if (! (cev->met[myrank].using_cb) ) #endif { /* wait until everybody else is done copying the buffer */ tMPI_Event_wait( &(cev->met[myrank].send_ev)); tMPI_Event_process( &(cev->met[myrank].send_ev), 1); } #ifdef USE_COLLECTIVE_COPY_BUFFER else { /* wait until everybody else is done copying the original buffer. We use fetch_add because we want to be sure of coherency. This wait is bound to be very short (otherwise it wouldn't be double-buffering) so we always spin here. */ /*tMPI_Atomic_memory_barrier_rel();*/ #if 0 while (!tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0, -100000)) #endif #if 0 while (tMPI_Atomic_fetch_add( &(cev->met[myrank].buf_readcount), 0) != 0) #endif #if 1 while (tMPI_Atomic_get( &(cev->met[rank].buf_readcount) )>0) #endif { } tMPI_Atomic_memory_barrier_acq(); } #endif #if defined(TMPI_PROFILE) tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_send); #endif }
int tMPI_Comm_free(tMPI_Comm *comm) { int size; int sum; int ret; #ifdef TMPI_TRACE tMPI_Trace_print("tMPI_Comm_free(%p)", comm); #endif #ifndef TMPI_STRICT if (!*comm) { return TMPI_SUCCESS; } if ((*comm)->grp.N > 1) { /* we remove ourselves from the comm. */ ret = tMPI_Thread_mutex_lock(&((*comm)->comm_create_lock)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } (*comm)->grp.peers[myrank] = (*comm)->grp.peers[(*comm)->grp.N-1]; (*comm)->grp.N--; ret = tMPI_Thread_mutex_unlock(&((*comm)->comm_create_lock)); if (ret != 0) { return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO); } } else { /* we're the last one so we can safely destroy it */ ret = tMPI_Comm_destroy(*comm, TRUE); if (ret != 0) { return ret; } } #else /* This is correct if programs actually treat Comm_free as a collective call */ if (!*comm) { return TMPI_SUCCESS; } size = (*comm)->grp.N; /* we add 1 to the destroy counter and actually deallocate if the counter reaches N. */ sum = tMPI_Atomic_fetch_add( &((*comm)->destroy_counter), 1) + 1; /* this is a collective call on a shared data structure, so only one process (the last one in this case) should do anything */ if (sum == size) { ret = tMPI_Comm_destroy(*comm, TRUE); if (ret != 0) { return ret; } } #endif return TMPI_SUCCESS; }
void* tMPI_Once_wait(tMPI_Comm comm, void* (*function)(void*), void *param, int *was_first) { int myrank; struct coll_sync *csync; struct coll_env *cev; int syncs; void *ret; if (!comm) { tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM); return NULL; } myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current()); /* we increase our counter, and determine which coll_env we get */ csync=&(comm->csync[myrank]); csync->syncs++; cev=&(comm->cev[csync->syncs % N_COLL_ENV]); /* now do a compare-and-swap on the current_syncc */ syncs=tMPI_Atomic_get( &(cev->coll.current_sync)); tMPI_Atomic_memory_barrier_acq(); if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number. If it is a later number, we can't have been the first to arrive here. Calculating the difference instead of comparing directly avoids ABA problems. */ tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs)) { /* we're the first! */ ret=function(param); if (was_first) *was_first=TRUE; /* broadcast the output data */ cev->coll.res=ret; tMPI_Atomic_memory_barrier_rel(); /* signal that we're done */ tMPI_Atomic_fetch_add(&(cev->coll.current_sync), 1); /* we need to keep being in sync */ csync->syncs++; } else { /* we need to wait until the current_syncc gets increased again */ csync->syncs++; do { /*tMPI_Atomic_memory_barrier();*/ syncs=tMPI_Atomic_get( &(cev->coll.current_sync) ); } while (csync->syncs - syncs > 0); /* difference again due to ABA problems */ tMPI_Atomic_memory_barrier_acq(); ret=cev->coll.res; } return ret; }
void tMPI_Mult_recv(tMPI_Comm comm, struct coll_env *cev, int rank, int index, int expected_tag, tMPI_Datatype recvtype, size_t recvsize, void *recvbuf, int *ret) { size_t sendsize=cev->met[rank].bufsize[index]; /* check tags, types */ if ((cev->met[rank].datatype != recvtype ) || (cev->met[rank].tag != expected_tag)) { *ret=tMPI_Error(comm, TMPI_ERR_MULTI_MISMATCH); } if (sendsize) /* we allow NULL ptrs if there's nothing to xmit */ { void *srcbuf; #ifdef USE_COLLECTIVE_COPY_BUFFER tmpi_bool decrease_ctr=FALSE; #endif if ( sendsize > recvsize ) { *ret=tMPI_Error(comm, TMPI_ERR_XFER_BUFSIZE); return; } if ( cev->met[rank].buf == recvbuf ) { *ret=tMPI_Error(TMPI_COMM_WORLD,TMPI_ERR_XFER_BUF_OVERLAP); return; } /* get source buffer */ #ifdef USE_COLLECTIVE_COPY_BUFFER if ( !(cev->met[rank].using_cb)) #endif { srcbuf=cev->met[rank].buf[index]; } #ifdef USE_COLLECTIVE_COPY_BUFFER else { srcbuf=tMPI_Atomic_ptr_get(&(cev->met[rank].cpbuf[index])); tMPI_Atomic_memory_barrier_acq(); if(!srcbuf) { /* there was (as of yet) no copied buffer */ void *try_again_srcbuf; /* we need to try checking the pointer again after we increase the read counter, signaling that one more thread is reading. */ tMPI_Atomic_add_return(&(cev->met[rank].buf_readcount), 1); /* a full memory barrier */ tMPI_Atomic_memory_barrier(); try_again_srcbuf=tMPI_Atomic_ptr_get( &(cev->met[rank].cpbuf[index])); if (!try_again_srcbuf) { /* apparently the copied buffer is not ready yet. We just use the real source buffer. We have already indicated we're reading from the regular buf. */ srcbuf=cev->met[rank].buf[index]; decrease_ctr=TRUE; } else { /* We tried again, and this time there was a copied buffer. We use that, and indicate that we're not reading from the regular buf. This case should be pretty rare. */ tMPI_Atomic_fetch_add(&(cev->met[rank].buf_readcount),-1); tMPI_Atomic_memory_barrier_acq(); srcbuf=try_again_srcbuf; } } #ifdef TMPI_PROFILE if (srcbuf) tMPI_Profile_count_buffered_coll_xfer(tMPI_Get_current()); #endif } #endif /* copy data */ memcpy((char*)recvbuf, srcbuf, sendsize); #ifdef TMPI_PROFILE tMPI_Profile_count_coll_xfer(tMPI_Get_current()); #endif #ifdef USE_COLLECTIVE_COPY_BUFFER if (decrease_ctr) { /* we decrement the read count; potentially releasing the buffer. */ tMPI_Atomic_memory_barrier_rel(); tMPI_Atomic_fetch_add( &(cev->met[rank].buf_readcount), -1); } #endif } /* signal one thread ready */ { int reta; tMPI_Atomic_memory_barrier_rel(); reta=tMPI_Atomic_add_return( &(cev->met[rank].n_remaining), -1); if (reta <= 0) { tMPI_Event_signal( &(cev->met[rank].send_ev) ); } } }