void smp_coll_barrier_tree_pull_pull(smp_coll_t handle, int flags) { int i; int flagset = handle->barrier_flag_set; gasnett_local_wmb(); for(i=0; i<handle->barrier_num_children; i++) { gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, handle->barrier_children[i], flagset)==0); } /*set my flag indicating barrier is done*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, !flagset, 0); SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, flagset, 1); /*wait for parent to raise flag*/ if(handle->MYTHREAD!=handle->barrier_root) { gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, handle->barrier_parent, 2+flagset)==0); } /*parent has now acked my signal so we can clear the up signal*/ /*clear my down flags from previous round*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, 2+(!flagset), 0); /*set my down flag for this round*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, 2+flagset, 1); handle->barrier_flag_set = !handle->barrier_flag_set; gasnett_local_rmb(); }
void smp_coll_barrier_tree_push_pull(smp_coll_t handle, int flags) { int flagset = handle->barrier_flag_set; gasnett_local_wmb(); /*push based tree wait for all children*/ gasneti_waitwhile(SMP_COLL_READ_ATOMIC(handle, handle->MYTHREAD, 0, handle->curr_atomic_set)!=handle->barrier_num_children); SMP_COLL_RESET_ATOMIC(handle, handle->MYTHREAD, 0, handle->curr_atomic_set); /*signal parent and wiat for parent*/ if(handle->MYTHREAD!=handle->barrier_root) { SMP_COLL_INC_ATOMIC(handle, handle->barrier_parent, 0, handle->curr_atomic_set); gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, handle->barrier_parent, flagset)==0); } /*parent has now acked my signal so we can clear the up signal*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, flagset, 0); /*clear my down flags from previous round*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, (!flagset), 0); /*set my down flag for this round*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, flagset, 1); handle->barrier_flag_set = !handle->barrier_flag_set; handle->curr_atomic_set = !handle->curr_atomic_set; gasnett_local_rmb(); }
void smp_coll_barrier_tree_pull_push(smp_coll_t handle, int flags) { int i; int flagset = handle->barrier_flag_set; gasnett_local_wmb(); for(i=0; i<handle->barrier_num_children; i++) { gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, handle->barrier_children[i], flagset)==0); } /*reset old one*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, !flagset, 0); /*set my flag indicating barrier is done*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, flagset, 1); if(handle->MYTHREAD!=handle->barrier_root) { /*singal parent and wait for parent to signal us*/ gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, handle->MYTHREAD, 2+flagset)==0); SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, 2+flagset, 0); } /*signal all my children*/ for(i=0; i<handle->barrier_num_children; i++) { SMP_COLL_SET_BARRIER_FLAG(handle, handle->barrier_children[i], 2+flagset, 1); } handle->barrier_flag_set = !handle->barrier_flag_set; gasnett_local_rmb(); }
void test_amlong(threaddata_t *tdata) { int peer = RANDOM_PEER(tdata); int node = tt_thread_map[peer]; void *laddr = tt_addr_map[tdata->tid]; void *raddr = tt_addr_map[peer]; size_t len; do { len = RANDOM_SIZE(); } while ((len > gasnet_AMMaxLongRequest()) || (len > gasnet_AMMaxLongReply()) || (len > TEST_SEGZ_PER_THREAD)); tdata->flag = -1; gasnett_local_wmb(); ACTION_PRINTF("tid=%3d> AMLongRequest (sz=%7d) to tid=%3d", tdata->tid, (int)len, peer); GASNET_Safe(gasnet_AMRequestLong2(node, hidx_ping_longhandler, laddr, len, raddr, tdata->ltid, peer)); GASNET_BLOCKUNTIL(tdata->flag == 0); tdata->flag = -1; ACTION_PRINTF("tid=%3d> AMLongRequest to tid=%3d complete.", tdata->tid, peer); }
/*this is a push based implementation since each thread signals on the remote thread when it is ready*/ void smp_coll_barrier_tree_push_push(smp_coll_t handle, int flags) { int i; int flagset = handle->barrier_flag_set; int atomicset = handle->curr_atomic_set; gasnett_local_wmb(); /*push based tree wait for all children*/ gasneti_waitwhile(SMP_COLL_READ_ATOMIC(handle, handle->MYTHREAD, 0, atomicset)!=handle->barrier_num_children); SMP_COLL_RESET_ATOMIC(handle, handle->MYTHREAD, 0, atomicset); /*if i'm not root*/ if(handle->MYTHREAD!=handle->barrier_root) { /*singal parent and wait for parent to signal us*/ SMP_COLL_INC_ATOMIC(handle, handle->barrier_parent, 0, atomicset); gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, handle->MYTHREAD, flagset)==0); SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, flagset, 0); } /*signal all my children*/ for(i=0; i<handle->barrier_num_children; i++) { SMP_COLL_SET_BARRIER_FLAG(handle, handle->barrier_children[i], flagset, 1); } handle->barrier_flag_set = !handle->barrier_flag_set; handle->curr_atomic_set = !handle->curr_atomic_set; gasnett_local_rmb(); }
void smp_coll_barrier_tree_flag(smp_coll_t handle, int flags) { int idx = 0; int num_digits = handle->barrier_log_radix_THREADS; int radixlog2 = handle->barrier_log_2_radix; int radix = handle->barrier_radix; int i,j,k; int parent=-1; gasnett_local_wmb(); /* reduce data from all the children*/ for(i=num_digits-1,j=0; i>=0; i--,j++) { /*if my i^th digit is 0 that means that i am a sender for this round*/ if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0 && SMP_COLL_GET_LOWER_K_DIGITS_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0) { for(k=1;k<radix;k++) { int dest = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, i, k, radix, radixlog2); if(dest<handle->THREADS) { /*wait for dest to be ready before we send*/ gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, dest, 0)==0); } } } } /*set the flag indicating that my data (and all the data under my subtree is ready)*/ SMP_COLL_SET_BARRIER_FLAG(handle, handle->MYTHREAD, 0, 1); if(handle->MYTHREAD!=barrier_root) { /*Wait for parent to signal that my data for the entire tree has arrived*/ gasneti_waitwhile(SMP_COLL_GET_BARRIER_FLAG(handle, handle->MYTHREAD, 0)!=0); } /*broadcast the data back down my subtree*/ for(i=num_digits-1,j=0; i>=0; i--,j++) { /*if my i^th digit is 0 that means that i am a sender for this round*/ if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0 && SMP_COLL_GET_LOWER_K_DIGITS_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0) { for(k=1;k<radix;k++) { int dest = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, i, k, radix, radixlog2); if(dest<handle->THREADS) { /*write memory barrier to ensure data is transfered before we set the flag*/ SMP_COLL_SET_BARRIER_FLAG(handle, dest, 0, 0); } } } } gasnett_local_rmb(); }
void smp_coll_broadcast_tree_flag(smp_coll_t handle, int num_addrs, void * const dstlist[], const void *src, size_t nbytes, int flags, int radix){ int num_digits = smp_coll_mylogn(handle->THREADS, radix); int radixlog2 = smp_coll_mylogn(radix,2); int i,j,k; if((flags & SMP_COLL_ALL_SYNC)) smp_coll_barrier(handle, flags); /*first thing all threads do is set their flag to be 1 indicating they have arrived*/ SMP_COLL_SET_BCAST_FLAG(handle, handle->MYTHREAD, 0, 1); /* Don't care who my parent is for this algorithm for(i=0; i<num_digits; i++) { if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)!=0) { parent = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, i, 0, radix, radixlog2); break; } } */ /*they then wait for the parent to come around and reset their flag back to 0 indicating the data has also arrived*/ if(handle->MYTHREAD!=0) { gasneti_waitwhile(SMP_COLL_GET_BCAST_FLAG(handle, handle->MYTHREAD,0)!=0); } else { memcpy(dstlist[0], src, nbytes); } for(i=num_digits-1,j=0; i>=0; i--,j++) { /*if my i^th digit is 0 that means that i am a sender for this round*/ if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0 && SMP_COLL_GET_LOWER_K_DIGITS_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0) { for(k=1;k<radix;k++) { int dest = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, i, k, radix, radixlog2); if(dest<handle->THREADS) { /*wait for dest to be ready before we send*/ gasneti_waitwhile(SMP_COLL_GET_BCAST_FLAG(handle, dest, 0)==0); memcpy(dstlist[dest], dstlist[handle->MYTHREAD], nbytes); /*write memory barrier to ensure data is transfered before we set the flag*/ gasnett_local_wmb(); SMP_COLL_SET_BCAST_FLAG(handle, dest, 0, 0); } } } } if((flags & SMP_COLL_ALL_SYNC)) smp_coll_barrier(handle, flags); }
void test_amshort(threaddata_t *tdata) { int peer = RANDOM_PEER(tdata); int node = tt_thread_map[peer]; ACTION_PRINTF("tid=%3d> AMShortRequest to tid=%3d", tdata->tid, peer); tdata->flag = -1; gasnett_local_wmb(); GASNET_Safe(gasnet_AMRequestShort1(node, hidx_ping_shorthandler, tdata->ltid)); GASNET_BLOCKUNTIL(tdata->flag == 0); tdata->flag = -1; ACTION_PRINTF("tid=%3d> AMShortRequest to tid=%3d complete.", tdata->tid, peer); }
void smp_coll_broadcast_tree_atomic(smp_coll_t handle, int num_addrs, void * const dstlist[], const void *src, size_t nbytes, int flags, int radix){ int num_digits = smp_coll_mylogn(handle->THREADS, radix); int radixlog2 = smp_coll_mylogn(radix,2); int i,j,k; int parent=-1; if((flags & SMP_COLL_ALL_SYNC)) smp_coll_barrier(handle, flags); for(i=0; i<num_digits; i++) { if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)!=0) { parent = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, i, 0, radix, radixlog2); break; } } if(parent!=-1) { gasneti_waitwhile(SMP_COLL_READ_ATOMIC(handle, handle->MYTHREAD, 0, handle->curr_atomic_set)!=1); SMP_COLL_DEC_ATOMIC(handle, handle->MYTHREAD, 0, handle->curr_atomic_set); gasnett_local_rmb(); } else { memcpy(dstlist[0], src, nbytes); } for(i=num_digits-1,j=0; i>=0; i--,j++) { /*if my i^th digit is 0 that means that i am a sender for this round*/ if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0 && SMP_COLL_GET_LOWER_K_DIGITS_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0) { for(k=1;k<radix;k++) { int dest = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, i, k, radix, radixlog2); if(dest<handle->THREADS) { GASNETE_FAST_UNALIGNED_MEMCPY(dstlist[dest], dstlist[handle->MYTHREAD], nbytes); gasnett_local_wmb(); SMP_COLL_INC_ATOMIC(handle, dest, 0, handle->curr_atomic_set); } } } } if((flags & SMP_COLL_ALL_SYNC)) smp_coll_barrier(handle, flags); handle->curr_atomic_set = !handle->curr_atomic_set; }
/*log_r(n) rounds with O(n) messages per round*/ void smp_coll_barrier_dissem_atomic(smp_coll_t handle, int flags) { const dissem_info_t *dissem = handle->dissem_info; const dissem_vector_t *barrier_order = dissem->barrier_order; int i,j; gasnett_local_wmb(); for(i=0; i<dissem->dissemination_phases; i++) { if(barrier_order[i].n > 0) { const int* elem_list = barrier_order[i].elem_list; /*post on the sems*/ for(j=0; j<barrier_order[i].n; j++) { int dest = elem_list[j]; /*increment counter i on dest by 1*/ SMP_COLL_INC_ATOMIC(handle, dest, i, handle->curr_atomic_set); } /*wait for counter i to be barrier_order[i].n*/ gasneti_waitwhile(SMP_COLL_READ_ATOMIC(handle, handle->MYTHREAD, i, handle->curr_atomic_set)!=barrier_order[i].n); SMP_COLL_RESET_ATOMIC(handle, handle->MYTHREAD, i, handle->curr_atomic_set); } } handle->curr_atomic_set = !handle->curr_atomic_set; gasnett_local_rmb(); }
void test_ammedium(threaddata_t *tdata) { int peer = RANDOM_PEER(tdata); int node = tt_thread_map[peer]; void *laddr = tt_addr_map[tdata->tid]; size_t len; do { len = RANDOM_SIZE(); } while (len > gasnet_AMMaxMedium()); ACTION_PRINTF("tid=%3d> AMMediumRequest (sz=%7d) to tid=%3d", tdata->tid, (int)len, peer); tdata->flag = -1; gasnett_local_wmb(); GASNET_Safe(gasnet_AMRequestMedium1(node, hidx_ping_medhandler, laddr, len, tdata->ltid)); GASNET_BLOCKUNTIL(tdata->flag == 0); tdata->flag = -1; ACTION_PRINTF("tid=%3d> AMMediumRequest to tid=%3d complete.", tdata->tid, peer); }
void smp_coll_broadcast_tree_leaf_get_flag(smp_coll_t handle, int num_addrs, void * const dstlist[], const void *src, size_t nbytes, int flags){ int num_digits = handle->broadcast_log_radix_THREADS; int radixlog2 = handle->broadcast_log_2_radix; int radix = handle->broadcast_radix; int i,j,k; int parent=-1; double a=2.0; if((flags & SMP_COLL_ALL_SYNC)) smp_coll_barrier(handle, flags); /*first thing all threads do is set their flag to be 1 indicating they have arrived*/ SMP_COLL_SET_BCAST_FLAG(handle, handle->MYTHREAD, 0, 1); /*they then wait for the parent to come around and reset their flag back to 0 indicating the data has also arrived*/ if(handle->MYTHREAD!=0) { while(SMP_COLL_GET_BCAST_FLAG(handle, handle->MYTHREAD,0)!=0){a=1.0/a;} /*read memory barrier to ensure data is transfered before we use it*/ gasnett_local_rmb(); } else { memcpy(dstlist[0], src, nbytes); } /* broadcast down the tree except to the last level*/ for(i=num_digits-1,j=0; i>=1; i--,j++) { /*if my i^th digit is 0 that means that i am a sender for this round*/ if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0 && SMP_COLL_GET_LOWER_K_DIGITS_POWER2RADIX(handle->MYTHREAD, i, radix, radixlog2)==0) { for(k=1;k<radix;k++) { int dest = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, i, k, radix, radixlog2); if(dest<handle->THREADS) { /*wait for dest to be ready before we send*/ while(SMP_COLL_GET_BCAST_FLAG(handle, dest, 0)==0){a=1.0/a;} memcpy(dstlist[dest], dstlist[handle->MYTHREAD], nbytes); /*write memory barrier to ensure data is transfered before we set the flag*/ gasnett_local_wmb(); SMP_COLL_SET_BCAST_FLAG(handle, dest, 0, 0); } } } } /* all leaf nodes perform a final get of the data*/ /* i am parent of a leaf node*/ if(SMP_COLL_GET_ITH_DIGIT_POWER2RADIX(handle->MYTHREAD, 0, radix, radixlog2)==0) { for(k=1;k<radix;k++) { int dest = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, 0, k, radix, radixlog2); if(dest<handle->THREADS) { double a=2.0; /*wait for dest to be ready before we send*/ while(SMP_COLL_GET_BCAST_FLAG(handle, dest, 0)==0){a=1.0/a;} /*write memory barrier to ensure data is transfered before we set the flag*/ gasnett_local_wmb(); SMP_COLL_SET_BCAST_FLAG(handle, dest, 0, 0); } } /* wait for all children to finish reads*/ for(k=1; k<radix; k++) { int dest = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, 0, k, radix, radixlog2); if(dest<handle->THREADS) { while(SMP_COLL_GET_BCAST_FLAG(handle, dest, 1)==0){a=1.0/a;} SMP_COLL_SET_BCAST_FLAG(handle, dest, 1, 0); } } } else { /*i am a leaf*/ int parent = SMP_COLL_MAKE_NUM_POWER2RADIX(handle->MYTHREAD, 0, 0, radix, radixlog2); /*wait for the parent to set the flag to 0 indicating the data is ready to go*/ /*grab the data*/ gasnett_local_rmb(); memcpy(dstlist[handle->MYTHREAD], dstlist[parent], nbytes); /*raise a second flag indicating that my data read is done*/ gasnett_local_wmb(); SMP_COLL_SET_BCAST_FLAG(handle, handle->MYTHREAD, 1, 1); } if((flags & SMP_COLL_ALL_SYNC)) smp_coll_barrier(handle, flags); }
void test_mpi(threaddata_t *tdata) { MPI_Request sendhandle = MPI_REQUEST_NULL; MPI_Request recvhandle = MPI_REQUEST_NULL; int peer = tdata->tid_peer; int node = tt_thread_map[peer]; int mpipeer = gasnetnode_to_mpirank[node]; int sz; char *sendbuf; char *recvbuf; int tag = tdata->tid; int i; do { sz = RANDOM_SIZE(); } while (sz == 0); /* some MPI's may barf on 0 byte send/recv */ sendbuf = (char*)test_malloc(sz); recvbuf = (char*)test_malloc(sz); for (i=0; i < MIN(sz,4096); i++) { /* randomize at least the first 4 KB */ sendbuf[i] = (char)rand(); } ACTION_PRINTF("tid=%3d> starting MPI ping-pong with tid=%3d.\n", tdata->tid, peer); MPI_LOCK(); ACTION_PRINTF("tid=%3d> setting MPI_Irecv, %i bytes\n", tdata->tid, sz); MPI_SAFE(MPI_Irecv(recvbuf, sz, MPI_BYTE, mpipeer, 10000+tag, MPI_COMM_WORLD, &recvhandle)); assert(recvhandle != MPI_REQUEST_NULL); ACTION_PRINTF("tid=%3d> sending MPI message, %i bytes\n", tdata->tid, sz); MPI_SAFE(MPI_Isend(sendbuf, sz, MPI_BYTE, mpipeer, tag, MPI_COMM_WORLD, &sendhandle)); assert(sendhandle != MPI_REQUEST_NULL); MPI_UNLOCK(); tdata->flag = -1; gasnett_local_wmb(); ACTION_PRINTF("tid=%3d> MPI AMShortRequest to tid=%3d\n", tdata->tid, peer); GASNET_Safe(gasnet_AMRequestShort2(node, hidx_mpi_handler, tdata->tid, sz)); while (tdata->flag != 0) { ACTION_PRINTF("tid=%3d> MPI probe AMShortRequest to tid=%3d\n", tdata->tid, peer); GASNET_Safe(gasnet_AMRequestShort1(node, hidx_mpi_probehandler, tdata->tid)); gasnett_sched_yield(); test_sleep(tdata); GASNET_Safe(gasnet_AMPoll()); mpi_test(&sendhandle); /* occasional testing may be required for progress */ mpi_test(&recvhandle); } tdata->flag = -1; mpi_complete(&sendhandle); mpi_complete(&recvhandle); /* verify */ for (i=0; i < sz; i++) { if (sendbuf[i] != recvbuf[i]) FATALERR("mismatch at element %i in MPI test.", i); } test_free(sendbuf); test_free(recvbuf); ACTION_PRINTF("tid=%3d> MPI ping-pong with tid=%3d complete.\n", tdata->tid, peer); }