void sc_allgather_alltoall (sc_MPI_Comm mpicomm, char *data, int datasize, int groupsize, int myoffset, int myrank) { int j, peer; int mpiret; sc_MPI_Request *request; SC_ASSERT (myoffset >= 0 && myoffset < groupsize); request = SC_ALLOC (sc_MPI_Request, 2 * groupsize); for (j = 0; j < groupsize; ++j) { if (j == myoffset) { request[j] = request[groupsize + j] = sc_MPI_REQUEST_NULL; continue; } peer = myrank - (myoffset - j); mpiret = sc_MPI_Irecv (data + j * datasize, datasize, sc_MPI_BYTE, peer, SC_TAG_AG_ALLTOALL, mpicomm, request + j); SC_CHECK_MPI (mpiret); mpiret = sc_MPI_Isend (data + myoffset * datasize, datasize, sc_MPI_BYTE, peer, SC_TAG_AG_ALLTOALL, mpicomm, request + groupsize + j); SC_CHECK_MPI (mpiret); } mpiret = sc_MPI_Waitall (2 * groupsize, request, sc_MPI_STATUSES_IGNORE); SC_CHECK_MPI (mpiret); SC_FREE (request); }
int p6est_profile_sync (p6est_profile_t * profile) { p4est_lnodes_t *lnodes = profile->lnodes; p4est_locidx_t nln = lnodes->num_local_nodes; sc_array_t lrview; p4est_lnodes_buffer_t *countbuf; sc_array_t *sharers; size_t zz, nsharers; int nleft; int8_t *recv, *send; int *array_of_indices; p4est_locidx_t recv_total; p4est_locidx_t *recv_offsets, recv_offset; p4est_locidx_t send_total; p4est_locidx_t *send_offsets, send_offset; p4est_locidx_t (*lr)[2]; sc_array_t *lc = profile->lnode_columns; sc_MPI_Request *recv_request, *send_request; sc_array_t *work; int any_change = 0; int any_global_change; int mpiret, mpirank; int evenodd = profile->evenodd; lr = (p4est_locidx_t (*)[2]) profile->lnode_ranges; sharers = lnodes->sharers; nsharers = sharers->elem_count; mpiret = sc_MPI_Comm_rank (lnodes->mpicomm, &mpirank); SC_CHECK_MPI (mpiret); sc_array_init_data (&lrview, lr, 2 * sizeof (p4est_locidx_t), nln); countbuf = p4est_lnodes_share_all_begin (&lrview, lnodes); send_offsets = P4EST_ALLOC (p4est_locidx_t, nsharers + 1); send_offset = 0; for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; sc_array_t *send_buf; size_t zy, nnodes; send_offsets[zz] = send_offset; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { continue; } send_buf = (sc_array_t *) sc_array_index (countbuf->send_buffers, zz); nnodes = sharer->shared_nodes.elem_count; P4EST_ASSERT (nnodes == send_buf->elem_count); P4EST_ASSERT (send_buf->elem_size == 2 * sizeof (p4est_locidx_t)); for (zy = 0; zy < nnodes; zy++) { p4est_locidx_t *lp = (p4est_locidx_t *) sc_array_index (send_buf, zy); P4EST_ASSERT (lp[0] >= 0); P4EST_ASSERT (lp[1] >= 0); send_offset += lp[1]; } } send_total = send_offsets[nsharers] = send_offset; p4est_lnodes_share_all_end (countbuf); recv_offsets = P4EST_ALLOC (p4est_locidx_t, nsharers + 1); recv_offset = 0; for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; sc_array_t *recv_buf; size_t zy, nnodes; recv_offsets[zz] = recv_offset; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { continue; } recv_buf = (sc_array_t *) sc_array_index (countbuf->recv_buffers, zz); nnodes = sharer->shared_nodes.elem_count; P4EST_ASSERT (nnodes == recv_buf->elem_count); P4EST_ASSERT (recv_buf->elem_size == 2 * sizeof (p4est_locidx_t)); for (zy = 0; zy < nnodes; zy++) { p4est_locidx_t *lp = (p4est_locidx_t *) sc_array_index (recv_buf, zy); P4EST_ASSERT (lp[0] >= 0); P4EST_ASSERT (lp[1] >= 0); recv_offset += lp[1]; } } recv_total = recv_offsets[nsharers] = recv_offset; recv = P4EST_ALLOC (int8_t, recv_total); recv_request = P4EST_ALLOC (sc_MPI_Request, nsharers); send = P4EST_ALLOC (int8_t, send_total); send_request = P4EST_ALLOC (sc_MPI_Request, nsharers); /* post receives */ nleft = 0; for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; int icount = recv_offsets[zz + 1] - recv_offsets[zz]; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { recv_request[zz] = sc_MPI_REQUEST_NULL; continue; } if (icount) { mpiret = sc_MPI_Irecv (recv + recv_offsets[zz], icount * sizeof (int8_t), sc_MPI_BYTE, sharer->rank, P6EST_COMM_BALANCE, lnodes->mpicomm, recv_request + zz); SC_CHECK_MPI (mpiret); nleft++; } else { recv_request[zz] = sc_MPI_REQUEST_NULL; } } /* post sends */ for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; size_t zy, nnodes; int icount; sc_array_t *shared_nodes; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { send_request[zz] = sc_MPI_REQUEST_NULL; continue; } shared_nodes = &sharer->shared_nodes; nnodes = shared_nodes->elem_count; icount = 0; for (zy = 0; zy < nnodes; zy++) { p4est_locidx_t nidx; int8_t *c; nidx = *((p4est_locidx_t *) sc_array_index (shared_nodes, zy)); if (lr[nidx][1]) { c = (int8_t *) sc_array_index (lc, lr[nidx][0]); memcpy (send + send_offsets[zz] + icount, c, lr[nidx][1] * sizeof (int8_t)); icount += lr[nidx][1]; } else { P4EST_ASSERT (!lr[nidx][0]); } } P4EST_ASSERT (icount == send_offsets[zz + 1] - send_offsets[zz]); if (icount) { mpiret = sc_MPI_Isend (send + send_offsets[zz], icount * sizeof (int8_t), sc_MPI_BYTE, sharer->rank, P6EST_COMM_BALANCE, lnodes->mpicomm, send_request + zz); SC_CHECK_MPI (mpiret); } else { send_request[zz] = sc_MPI_REQUEST_NULL; } } work = sc_array_new (sizeof (int8_t)); array_of_indices = P4EST_ALLOC (int, nsharers); while (nleft) { int outcount; int i; mpiret = sc_MPI_Waitsome (nsharers, recv_request, &outcount, array_of_indices, sc_MPI_STATUSES_IGNORE); SC_CHECK_MPI (mpiret); for (i = 0; i < outcount; i++) { p4est_lnodes_rank_t *sharer; size_t zy, nnode; sc_array_t *shared_nodes; sc_array_t *recv_buf; zz = array_of_indices[i]; sharer = p4est_lnodes_rank_array_index (sharers, zz); shared_nodes = &sharer->shared_nodes; recv_buf = (sc_array_t *) sc_array_index (countbuf->recv_buffers, zz); nnode = shared_nodes->elem_count; P4EST_ASSERT (nnode == recv_buf->elem_count); recv_offset = recv_offsets[zz]; for (zy = 0; zy < nnode; zy++) { p4est_locidx_t *lp; p4est_locidx_t nidx; sc_array_t oldview, newview; nidx = *((p4est_locidx_t *) sc_array_index (shared_nodes, zy)); lp = (p4est_locidx_t *) sc_array_index (recv_buf, zy); sc_array_init_view (&oldview, lc, lr[nidx][0], lr[nidx][1]); sc_array_init_data (&newview, recv + recv_offset, sizeof (int8_t), lp[1]); if (profile->ptype == P6EST_PROFILE_UNION) { p6est_profile_union (&oldview, &newview, work); if (work->elem_count > oldview.elem_count) { int8_t *c; any_change = 1; lr[nidx][0] = lc->elem_count; lr[nidx][1] = work->elem_count; profile->lnode_changed[evenodd][nidx] = 1; c = (int8_t *) sc_array_push_count (lc, work->elem_count); memcpy (c, work->array, work->elem_count * work->elem_size); } } else { p6est_profile_intersection (&oldview, &newview, work); P4EST_ASSERT (work->elem_count <= oldview.elem_count); if (work->elem_count < oldview.elem_count) { lr[nidx][1] = work->elem_count; memcpy (oldview.array, work->array, work->elem_count * work->elem_size); } } recv_offset += lp[1]; } P4EST_ASSERT (recv_offset == recv_offsets[zz + 1]); } nleft -= outcount; P4EST_ASSERT (nleft >= 0); } P4EST_FREE (array_of_indices); sc_array_destroy (work); p6est_profile_compress (profile); p4est_lnodes_buffer_destroy (countbuf); P4EST_FREE (recv_request); P4EST_FREE (recv_offsets); P4EST_FREE (recv); { mpiret = sc_MPI_Waitall (nsharers, send_request, sc_MPI_STATUSES_IGNORE); SC_CHECK_MPI (mpiret); P4EST_FREE (send_request); P4EST_FREE (send_offsets); P4EST_FREE (send); any_global_change = any_change; mpiret = sc_MPI_Allreduce (&any_change, &any_global_change, 1, sc_MPI_INT, sc_MPI_LOR, lnodes->mpicomm); SC_CHECK_MPI (mpiret); } return any_global_change; }
void sc_allgather_recursive (sc_MPI_Comm mpicomm, char *data, int datasize, int groupsize, int myoffset, int myrank) { const int g2 = groupsize / 2; const int g2B = groupsize - g2; int mpiret; sc_MPI_Request request[3]; SC_ASSERT (myoffset >= 0 && myoffset < groupsize); if (groupsize > SC_AG_ALLTOALL_MAX) { if (myoffset < g2) { sc_allgather_recursive (mpicomm, data, datasize, g2, myoffset, myrank); mpiret = sc_MPI_Irecv (data + g2 * datasize, g2B * datasize, sc_MPI_BYTE, myrank + g2, SC_TAG_AG_RECURSIVE_B, mpicomm, request + 0); SC_CHECK_MPI (mpiret); mpiret = sc_MPI_Isend (data, g2 * datasize, sc_MPI_BYTE, myrank + g2, SC_TAG_AG_RECURSIVE_A, mpicomm, request + 1); SC_CHECK_MPI (mpiret); if (myoffset == g2 - 1 && g2 != g2B) { mpiret = sc_MPI_Isend (data, g2 * datasize, sc_MPI_BYTE, myrank + g2B, SC_TAG_AG_RECURSIVE_C, mpicomm, request + 2); SC_CHECK_MPI (mpiret); } else { request[2] = sc_MPI_REQUEST_NULL; } } else { sc_allgather_recursive (mpicomm, data + g2 * datasize, datasize, g2B, myoffset - g2, myrank); if (myoffset == groupsize - 1 && g2 != g2B) { request[0] = sc_MPI_REQUEST_NULL; request[1] = sc_MPI_REQUEST_NULL; mpiret = sc_MPI_Irecv (data, g2 * datasize, sc_MPI_BYTE, myrank - g2B, SC_TAG_AG_RECURSIVE_C, mpicomm, request + 2); SC_CHECK_MPI (mpiret); } else { mpiret = sc_MPI_Irecv (data, g2 * datasize, sc_MPI_BYTE, myrank - g2, SC_TAG_AG_RECURSIVE_A, mpicomm, request + 0); SC_CHECK_MPI (mpiret); mpiret = sc_MPI_Isend (data + g2 * datasize, g2B * datasize, sc_MPI_BYTE, myrank - g2, SC_TAG_AG_RECURSIVE_B, mpicomm, request + 1); SC_CHECK_MPI (mpiret); request[2] = sc_MPI_REQUEST_NULL; } } mpiret = sc_MPI_Waitall (3, request, sc_MPI_STATUSES_IGNORE); SC_CHECK_MPI (mpiret); } else { sc_allgather_alltoall (mpicomm, data, datasize, groupsize, myoffset, myrank); } }