void __shmem_barrier_linear (int PE_start, int logPE_stride, int PE_size, long *pSync) { const int me = _my_pe (); const int step = 1 << logPE_stride; const long nreplies = _SHMEM_SYNC_VALUE + PE_size - 1; int i, round; int thatpe; for (round = 0; round < 2; round += 1) { for (thatpe = PE_start, i = 0; i < PE_size; thatpe += step, i += 1) { if (thatpe != me) { shmem_long_inc (&pSync[round], thatpe); __shmem_trace (SHMEM_LOG_BARRIER, "round = %d, sent increment to PE %d", round, thatpe); } } shmem_long_wait_until (&pSync[round], _SHMEM_CMP_EQ, nreplies); pSync[round] = _SHMEM_SYNC_VALUE; } }
void FORTRANIFY (shmem_int8_inc) (long *target, int *pe) { shmem_long_inc (target, *pe); }
void shmemi_broadcast32_tree (void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync) { int child_l, child_r, parent; const int step = 1 << logPE_stride; int my_pe = GET_STATE (mype); int *target_ptr, *source_ptr; int no_children; long is_ready, lchild_ready, rchild_ready; is_ready = 1; lchild_ready = -1; rchild_ready = -1; shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); pSync[0] = 0; pSync[1] = 0; target_ptr = (int *) target; source_ptr = (int *) source; set_2tree (PE_start, step, PE_size, &parent, &child_l, &child_r, my_pe); no_children = 0; build_tree (PE_start, step, PE_root, PE_size, &parent, &child_l, &child_r, my_pe); shmemi_trace (SHMEM_LOG_BROADCAST, "before broadcast, R_child = %d L_child = %d", child_r, child_l); /* The actual broadcast */ if (PE_size > 1) { if (my_pe == (PE_start + step * PE_root)) { pSync[0] = SHMEM_SYNC_VALUE; if (child_l != -1) { shmem_long_get (&lchild_ready, (const long *) &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_int_put (target_ptr, source_ptr, nlong, child_l); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_int_put (target_ptr, source_ptr, nlong, child_r); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; } else { shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, is_ready); pSync[0] = SHMEM_SYNC_VALUE; shmemi_trace (SHMEM_LOG_BROADCAST, "inside else"); memcpy (source_ptr, target_ptr, nlong * sizeof (int)); if (child_l != -1) { shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_int_put (target_ptr, source_ptr, nlong, child_l); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_int_put (target_ptr, source_ptr, nlong, child_r); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } pSync[0] = SHMEM_SYNC_VALUE; if (no_children == 0) { pSync[1] = SHMEM_SYNC_VALUE; /* TO DO: Is check for parents pSync required? */ shmem_long_inc (&pSync[1], parent); } else { shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; /* printf("PE %d incrementing child count on PE %d\n",my_pe,parent); */ shmem_long_inc (&pSync[1], parent); } } shmemi_trace (SHMEM_LOG_BROADCAST, "at the end of bcast32"); /* shmem_barrier(PE_start, logPE_stride, PE_size, pSync); */ } }
void shmemi_barrier_tree (int PE_start, int logPE_stride, int PE_size, long *pSync) { int child_l, child_r, parent; const int step = 1 << logPE_stride; int my_pe = GET_STATE (mype); int no_children; long is_ready, lchild_ready, rchild_ready; is_ready = 1; lchild_ready = -1; rchild_ready = -1; shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); /* printf("Tree barrier\n"); */ set_2tree (PE_start, step, PE_size, &parent, &child_l, &child_r, my_pe); no_children = 0; shmemi_trace (SHMEM_LOG_BARRIER, "before barrier, R_child = %d L_child = %d", child_r, child_l); /* The actual barrier */ if (PE_size > 1) { pSync[0] = 0; pSync[1] = 0; if (my_pe == PE_start) { pSync[0] = SHMEM_SYNC_VALUE; if (child_l != -1) { shmem_long_get (&lchild_ready, (const long *) &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; } else { shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, is_ready); shmemi_trace (SHMEM_LOG_BARRIER, "inside else"); if (child_l != -1) { shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } pSync[0] = SHMEM_SYNC_VALUE; if (no_children == 0) { pSync[1] = SHMEM_SYNC_VALUE; shmem_long_inc (&pSync[1], parent); } else { shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; shmem_long_inc (&pSync[1], parent); } } shmemi_trace (SHMEM_LOG_BARRIER, "at the end of barrier"); } }