Пример #1
0
void
sc_allgather_alltoall (sc_MPI_Comm mpicomm, char *data, int datasize,
                       int groupsize, int myoffset, int myrank)
{
  int                 j, peer;
  int                 mpiret;
  sc_MPI_Request     *request;

  SC_ASSERT (myoffset >= 0 && myoffset < groupsize);

  request = SC_ALLOC (sc_MPI_Request, 2 * groupsize);

  for (j = 0; j < groupsize; ++j) {
    if (j == myoffset) {
      request[j] = request[groupsize + j] = sc_MPI_REQUEST_NULL;
      continue;
    }
    peer = myrank - (myoffset - j);

    mpiret = sc_MPI_Irecv (data + j * datasize, datasize, sc_MPI_BYTE,
                           peer, SC_TAG_AG_ALLTOALL, mpicomm, request + j);
    SC_CHECK_MPI (mpiret);

    mpiret = sc_MPI_Isend (data + myoffset * datasize, datasize, sc_MPI_BYTE,
                           peer, SC_TAG_AG_ALLTOALL,
                           mpicomm, request + groupsize + j);
    SC_CHECK_MPI (mpiret);
  }

  mpiret = sc_MPI_Waitall (2 * groupsize, request, sc_MPI_STATUSES_IGNORE);
  SC_CHECK_MPI (mpiret);

  SC_FREE (request);
}
Пример #2
0
int
p6est_profile_sync (p6est_profile_t * profile)
{
  p4est_lnodes_t     *lnodes = profile->lnodes;
  p4est_locidx_t      nln = lnodes->num_local_nodes;
  sc_array_t          lrview;
  p4est_lnodes_buffer_t *countbuf;
  sc_array_t         *sharers;
  size_t              zz, nsharers;
  int                 nleft;
  int8_t             *recv, *send;
  int                *array_of_indices;
  p4est_locidx_t      recv_total;
  p4est_locidx_t     *recv_offsets, recv_offset;
  p4est_locidx_t      send_total;
  p4est_locidx_t     *send_offsets, send_offset;
  p4est_locidx_t (*lr)[2];
  sc_array_t         *lc = profile->lnode_columns;
  sc_MPI_Request     *recv_request, *send_request;
  sc_array_t         *work;
  int                 any_change = 0;
  int                 any_global_change;
  int                 mpiret, mpirank;
  int                 evenodd = profile->evenodd;

  lr = (p4est_locidx_t (*)[2]) profile->lnode_ranges;
  sharers = lnodes->sharers;
  nsharers = sharers->elem_count;

  mpiret = sc_MPI_Comm_rank (lnodes->mpicomm, &mpirank);
  SC_CHECK_MPI (mpiret);

  sc_array_init_data (&lrview, lr, 2 * sizeof (p4est_locidx_t), nln);

  countbuf = p4est_lnodes_share_all_begin (&lrview, lnodes);
  send_offsets = P4EST_ALLOC (p4est_locidx_t, nsharers + 1);
  send_offset = 0;
  for (zz = 0; zz < nsharers; zz++) {
    p4est_lnodes_rank_t *sharer;
    sc_array_t         *send_buf;
    size_t              zy, nnodes;

    send_offsets[zz] = send_offset;
    sharer = p4est_lnodes_rank_array_index (sharers, zz);
    if (sharer->rank == mpirank) {
      continue;
    }
    send_buf = (sc_array_t *) sc_array_index (countbuf->send_buffers, zz);
    nnodes = sharer->shared_nodes.elem_count;

    P4EST_ASSERT (nnodes == send_buf->elem_count);

    P4EST_ASSERT (send_buf->elem_size == 2 * sizeof (p4est_locidx_t));
    for (zy = 0; zy < nnodes; zy++) {
      p4est_locidx_t     *lp =
        (p4est_locidx_t *) sc_array_index (send_buf, zy);
      P4EST_ASSERT (lp[0] >= 0);
      P4EST_ASSERT (lp[1] >= 0);
      send_offset += lp[1];
    }
  }
  send_total = send_offsets[nsharers] = send_offset;

  p4est_lnodes_share_all_end (countbuf);
  recv_offsets = P4EST_ALLOC (p4est_locidx_t, nsharers + 1);
  recv_offset = 0;
  for (zz = 0; zz < nsharers; zz++) {
    p4est_lnodes_rank_t *sharer;
    sc_array_t         *recv_buf;
    size_t              zy, nnodes;

    recv_offsets[zz] = recv_offset;
    sharer = p4est_lnodes_rank_array_index (sharers, zz);
    if (sharer->rank == mpirank) {
      continue;
    }
    recv_buf = (sc_array_t *) sc_array_index (countbuf->recv_buffers, zz);
    nnodes = sharer->shared_nodes.elem_count;

    P4EST_ASSERT (nnodes == recv_buf->elem_count);

    P4EST_ASSERT (recv_buf->elem_size == 2 * sizeof (p4est_locidx_t));
    for (zy = 0; zy < nnodes; zy++) {
      p4est_locidx_t     *lp =
        (p4est_locidx_t *) sc_array_index (recv_buf, zy);
      P4EST_ASSERT (lp[0] >= 0);
      P4EST_ASSERT (lp[1] >= 0);
      recv_offset += lp[1];
    }
  }
  recv_total = recv_offsets[nsharers] = recv_offset;

  recv = P4EST_ALLOC (int8_t, recv_total);
  recv_request = P4EST_ALLOC (sc_MPI_Request, nsharers);
  send = P4EST_ALLOC (int8_t, send_total);
  send_request = P4EST_ALLOC (sc_MPI_Request, nsharers);

  /* post receives */
  nleft = 0;
  for (zz = 0; zz < nsharers; zz++) {
    p4est_lnodes_rank_t *sharer;
    int                 icount = recv_offsets[zz + 1] - recv_offsets[zz];

    sharer = p4est_lnodes_rank_array_index (sharers, zz);
    if (sharer->rank == mpirank) {
      recv_request[zz] = sc_MPI_REQUEST_NULL;
      continue;
    }
    if (icount) {
      mpiret =
        sc_MPI_Irecv (recv + recv_offsets[zz], icount * sizeof (int8_t),
                      sc_MPI_BYTE, sharer->rank, P6EST_COMM_BALANCE,
                      lnodes->mpicomm, recv_request + zz);
      SC_CHECK_MPI (mpiret);
      nleft++;
    }
    else {
      recv_request[zz] = sc_MPI_REQUEST_NULL;
    }
  }

  /* post sends */
  for (zz = 0; zz < nsharers; zz++) {
    p4est_lnodes_rank_t *sharer;
    size_t              zy, nnodes;
    int                 icount;
    sc_array_t         *shared_nodes;

    sharer = p4est_lnodes_rank_array_index (sharers, zz);
    if (sharer->rank == mpirank) {
      send_request[zz] = sc_MPI_REQUEST_NULL;
      continue;
    }
    shared_nodes = &sharer->shared_nodes;
    nnodes = shared_nodes->elem_count;
    icount = 0;
    for (zy = 0; zy < nnodes; zy++) {
      p4est_locidx_t      nidx;
      int8_t             *c;

      nidx = *((p4est_locidx_t *) sc_array_index (shared_nodes, zy));

      if (lr[nidx][1]) {
        c = (int8_t *) sc_array_index (lc, lr[nidx][0]);
        memcpy (send + send_offsets[zz] + icount, c,
                lr[nidx][1] * sizeof (int8_t));
        icount += lr[nidx][1];
      }
      else {
        P4EST_ASSERT (!lr[nidx][0]);
      }
    }
    P4EST_ASSERT (icount == send_offsets[zz + 1] - send_offsets[zz]);
    if (icount) {
      mpiret =
        sc_MPI_Isend (send + send_offsets[zz], icount * sizeof (int8_t),
                      sc_MPI_BYTE, sharer->rank, P6EST_COMM_BALANCE,
                      lnodes->mpicomm, send_request + zz);
      SC_CHECK_MPI (mpiret);
    }
    else {
      send_request[zz] = sc_MPI_REQUEST_NULL;
    }
  }

  work = sc_array_new (sizeof (int8_t));
  array_of_indices = P4EST_ALLOC (int, nsharers);
  while (nleft) {
    int                 outcount;
    int                 i;

    mpiret = sc_MPI_Waitsome (nsharers, recv_request, &outcount,
                              array_of_indices, sc_MPI_STATUSES_IGNORE);
    SC_CHECK_MPI (mpiret);

    for (i = 0; i < outcount; i++) {
      p4est_lnodes_rank_t *sharer;
      size_t              zy, nnode;
      sc_array_t         *shared_nodes;
      sc_array_t         *recv_buf;

      zz = array_of_indices[i];
      sharer = p4est_lnodes_rank_array_index (sharers, zz);
      shared_nodes = &sharer->shared_nodes;
      recv_buf = (sc_array_t *) sc_array_index (countbuf->recv_buffers, zz);
      nnode = shared_nodes->elem_count;
      P4EST_ASSERT (nnode == recv_buf->elem_count);

      recv_offset = recv_offsets[zz];
      for (zy = 0; zy < nnode; zy++) {
        p4est_locidx_t     *lp;
        p4est_locidx_t      nidx;
        sc_array_t          oldview, newview;

        nidx = *((p4est_locidx_t *) sc_array_index (shared_nodes, zy));
        lp = (p4est_locidx_t *) sc_array_index (recv_buf, zy);

        sc_array_init_view (&oldview, lc, lr[nidx][0], lr[nidx][1]);
        sc_array_init_data (&newview, recv + recv_offset, sizeof (int8_t),
                            lp[1]);
        if (profile->ptype == P6EST_PROFILE_UNION) {
          p6est_profile_union (&oldview, &newview, work);

          if (work->elem_count > oldview.elem_count) {
            int8_t             *c;

            any_change = 1;
            lr[nidx][0] = lc->elem_count;
            lr[nidx][1] = work->elem_count;
            profile->lnode_changed[evenodd][nidx] = 1;

            c = (int8_t *) sc_array_push_count (lc, work->elem_count);
            memcpy (c, work->array, work->elem_count * work->elem_size);
          }
        }
        else {
          p6est_profile_intersection (&oldview, &newview, work);
          P4EST_ASSERT (work->elem_count <= oldview.elem_count);
          if (work->elem_count < oldview.elem_count) {
            lr[nidx][1] = work->elem_count;
            memcpy (oldview.array, work->array,
                    work->elem_count * work->elem_size);
          }
        }

        recv_offset += lp[1];
      }
      P4EST_ASSERT (recv_offset == recv_offsets[zz + 1]);
    }

    nleft -= outcount;
    P4EST_ASSERT (nleft >= 0);
  }
  P4EST_FREE (array_of_indices);
  sc_array_destroy (work);

  p6est_profile_compress (profile);
  p4est_lnodes_buffer_destroy (countbuf);

  P4EST_FREE (recv_request);
  P4EST_FREE (recv_offsets);
  P4EST_FREE (recv);

  {
    mpiret = sc_MPI_Waitall (nsharers, send_request, sc_MPI_STATUSES_IGNORE);

    SC_CHECK_MPI (mpiret);
    P4EST_FREE (send_request);
    P4EST_FREE (send_offsets);
    P4EST_FREE (send);

    any_global_change = any_change;
    mpiret = sc_MPI_Allreduce (&any_change, &any_global_change, 1, sc_MPI_INT,
                               sc_MPI_LOR, lnodes->mpicomm);

    SC_CHECK_MPI (mpiret);
  }

  return any_global_change;
}
Пример #3
0
void
sc_allgather_recursive (sc_MPI_Comm mpicomm, char *data, int datasize,
                        int groupsize, int myoffset, int myrank)
{
  const int           g2 = groupsize / 2;
  const int           g2B = groupsize - g2;
  int                 mpiret;
  sc_MPI_Request      request[3];

  SC_ASSERT (myoffset >= 0 && myoffset < groupsize);

  if (groupsize > SC_AG_ALLTOALL_MAX) {
    if (myoffset < g2) {
      sc_allgather_recursive (mpicomm, data, datasize, g2, myoffset, myrank);

      mpiret = sc_MPI_Irecv (data + g2 * datasize, g2B * datasize,
                             sc_MPI_BYTE, myrank + g2, SC_TAG_AG_RECURSIVE_B,
                             mpicomm, request + 0);
      SC_CHECK_MPI (mpiret);

      mpiret = sc_MPI_Isend (data, g2 * datasize, sc_MPI_BYTE,
                             myrank + g2, SC_TAG_AG_RECURSIVE_A,
                             mpicomm, request + 1);
      SC_CHECK_MPI (mpiret);

      if (myoffset == g2 - 1 && g2 != g2B) {
        mpiret = sc_MPI_Isend (data, g2 * datasize, sc_MPI_BYTE,
                               myrank + g2B, SC_TAG_AG_RECURSIVE_C,
                               mpicomm, request + 2);
        SC_CHECK_MPI (mpiret);
      }
      else {
        request[2] = sc_MPI_REQUEST_NULL;
      }
    }
    else {
      sc_allgather_recursive (mpicomm, data + g2 * datasize, datasize, g2B,
                              myoffset - g2, myrank);

      if (myoffset == groupsize - 1 && g2 != g2B) {
        request[0] = sc_MPI_REQUEST_NULL;
        request[1] = sc_MPI_REQUEST_NULL;

        mpiret = sc_MPI_Irecv (data, g2 * datasize, sc_MPI_BYTE,
                               myrank - g2B, SC_TAG_AG_RECURSIVE_C,
                               mpicomm, request + 2);
        SC_CHECK_MPI (mpiret);
      }
      else {
        mpiret = sc_MPI_Irecv (data, g2 * datasize, sc_MPI_BYTE,
                               myrank - g2, SC_TAG_AG_RECURSIVE_A,
                               mpicomm, request + 0);
        SC_CHECK_MPI (mpiret);

        mpiret = sc_MPI_Isend (data + g2 * datasize, g2B * datasize,
                               sc_MPI_BYTE, myrank - g2,
                               SC_TAG_AG_RECURSIVE_B, mpicomm, request + 1);
        SC_CHECK_MPI (mpiret);

        request[2] = sc_MPI_REQUEST_NULL;
      }
    }

    mpiret = sc_MPI_Waitall (3, request, sc_MPI_STATUSES_IGNORE);
    SC_CHECK_MPI (mpiret);
  }
  else {
    sc_allgather_alltoall (mpicomm, data, datasize, groupsize, myoffset,
                           myrank);
  }
}