Пример #1
0
/**
 * Alltoall basic_linear (STARMPI:alltoall-simple)
 **/
int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount,
                                          MPI_Datatype sendtype,
                                          void *recvbuf, int recvcount,
                                          MPI_Datatype recvtype,
                                          MPI_Comm comm)
{
  int system_tag = 888;
  int i, rank, size, err, count;
  MPI_Aint lb = 0, sendext = 0, recvext = 0;
  MPI_Request *requests;

  /* Initialize. */
  rank = smpi_comm_rank(comm);
  size = smpi_comm_size(comm);
  XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank);
  smpi_datatype_extent(sendtype, &lb, &sendext);
  smpi_datatype_extent(recvtype, &lb, &recvext);
  /* simple optimization */
  err = smpi_datatype_copy((char *)sendbuf + rank * sendcount * sendext, 
                           sendcount, sendtype, 
                           (char *)recvbuf + rank * recvcount * recvext, 
                           recvcount, recvtype);
  if (err == MPI_SUCCESS && size > 1) {
    /* Initiate all send/recv to/from others. */
    requests = xbt_new(MPI_Request, 2 * (size - 1));
    /* Post all receives first -- a simple optimization */
    count = 0;
    for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) {
      requests[count] =
          smpi_irecv_init((char *)recvbuf + i * recvcount * recvext, recvcount, 
                          recvtype, i, system_tag, comm);
      count++;
    }
    /* Now post all sends in reverse order
     *   - We would like to minimize the search time through message queue
     *     when messages actually arrive in the order in which they were posted.
     * TODO: check the previous assertion
     */
    for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) {
      requests[count] =
          smpi_isend_init((char *)sendbuf + i * sendcount * sendext, sendcount,
                          sendtype, i, system_tag, comm);
      count++;
    }
    /* Wait for them all. */
    smpi_mpi_startall(count, requests);
    XBT_DEBUG("<%d> wait for %d requests", rank, count);
    smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE);
    for(i = 0; i < count; i++) {
      if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]);
    }
    xbt_free(requests);
  }
  return err;
}
/*  
 * Linear functions are copied from the basic coll module.  For
 * some small number of nodes and/or small data sizes they are just as
 * fast as tuned/tree based segmenting operations and as such may be
 * selected by the decision functions.  These are copied into this module
 * due to the way we select modules in V1. i.e. in V2 we will handle this
 * differently and so will not have to duplicate code.  
 * GEF Oct05 after asking Jeff.  
 */
int
smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps,
                                            MPI_Datatype sdtype,
                                            void *rbuf, int *rcounts, int *rdisps,
                                            MPI_Datatype rdtype,
                                            MPI_Comm comm)
{
    int i, size, rank;
    char *psnd, *prcv;
    int nreqs;
    ptrdiff_t sext, rext;
    MPI_Request *preq;
    size = smpi_comm_size(comm);
    rank = smpi_comm_rank(comm);
    MPI_Request *ireqs= xbt_malloc(sizeof(MPI_Request) * size * 2);
    XBT_DEBUG(
                 "coll:tuned:alltoallv_intra_basic_linear rank %d", rank);

    sext=smpi_datatype_get_extent(sdtype);
    rext=smpi_datatype_get_extent(rdtype);

    /* Simple optimization - handle send to self first */
    psnd = ((char *) sbuf) + (sdisps[rank] * sext);
    prcv = ((char *) rbuf) + (rdisps[rank] * rext);
    if (0 != scounts[rank]) {
        smpi_datatype_copy(psnd, scounts[rank], sdtype,
                              prcv, rcounts[rank], rdtype);
    }

    /* If only one process, we're done. */
    if (1 == size) {
        return MPI_SUCCESS;
    }

    /* Now, initiate all send/recv to/from others. */
    nreqs = 0;
    preq = ireqs;

    /* Post all receives first */
    for (i = 0; i < size; ++i) {
        if (i == rank || 0 == rcounts[i]) {
            continue;
        }

        prcv = ((char *) rbuf) + (rdisps[i] * rext);

        *preq = smpi_irecv_init(prcv, rcounts[i], rdtype,
                                      i, COLL_TAG_ALLTOALLV, comm
                                      );
        preq++;
        ++nreqs;
        
    }

    /* Now post all sends */
    for (i = 0; i < size; ++i) {
        if (i == rank || 0 == scounts[i]) {
            continue;
        }

        psnd = ((char *) sbuf) + (sdisps[i] * sext);
        *preq=smpi_isend_init(psnd, scounts[i], sdtype,
                                      i, COLL_TAG_ALLTOALLV, comm
                                      );
        preq++;
        ++nreqs;
    }

    /* Start your engines.  This will never return an error. */
    smpi_mpi_startall(nreqs, ireqs);

    /* Wait for them all.  If there's an error, note that we don't care
     * what the error was -- just that there *was* an error.  The PML
     * will finish all requests, even if one or more of them fail.
     * i.e., by the end of this call, all the requests are free-able.
     * So free them anyway -- even if there was an error, and return the
     * error after we free everything. */
    smpi_mpi_waitall(nreqs, ireqs,
                                MPI_STATUSES_IGNORE);

    /* Free the requests. */
    for (i = 0; i < nreqs; ++i) {
      if(ireqs[i]!=MPI_REQUEST_NULL)smpi_mpi_request_free(&ireqs[i]);
    }

    return MPI_SUCCESS;
}
Пример #3
0
/**
 * Alltoall Bruck
 *
 * Openmpi calls this routine when the message size sent to each rank < 2000 bytes and size < 12
 * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not
 * less...
 **/
int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddisps,
                                   MPI_Datatype sendtype, void *recvbuf,
                                   int *recvcounts, int *recvdisps, MPI_Datatype recvtype,
                                   MPI_Comm comm)
{
  int system_tag = 777;
  int i, rank, size, err, count;
  MPI_Aint lb;
  MPI_Aint sendext = 0;
  MPI_Aint recvext = 0;
  MPI_Request *requests;

  // FIXME: check implementation
  rank = smpi_comm_rank(comm);
  size = smpi_comm_size(comm);
  XBT_DEBUG("<%d> algorithm alltoall_bruck() called.", rank);

  err = smpi_datatype_extent(sendtype, &lb, &sendext);
  err = smpi_datatype_extent(recvtype, &lb, &recvext);
  /* Local copy from self */
  err =
      smpi_datatype_copy((char *)sendbuf + senddisps[rank] * sendext, 
                         sendcounts[rank], sendtype, 
                         (char *)recvbuf + recvdisps[rank] * recvext,
                         recvcounts[rank], recvtype);
  if (err == MPI_SUCCESS && size > 1) {
    /* Initiate all send/recv to/from others. */
    requests = xbt_new(MPI_Request, 2 * (size - 1));
    count = 0;
    /* Create all receives that will be posted first */
    for (i = 0; i < size; ++i) {
      if (i == rank) {
        XBT_DEBUG("<%d> skip request creation [src = %d, recvcount = %d]",
               rank, i, recvcounts[i]);
        continue;
      }
      requests[count] =
          smpi_irecv_init((char *)recvbuf + recvdisps[i] * recvext, recvcounts[i],
                          recvtype, i, system_tag, comm);
      count++;
    }
    /* Now create all sends  */
    for (i = 0; i < size; ++i) {
      if (i == rank) {
        XBT_DEBUG("<%d> skip request creation [dst = %d, sendcount = %d]",
               rank, i, sendcounts[i]);
        continue;
      }
      requests[count] =
          smpi_isend_init((char *)sendbuf + senddisps[i] * sendext, sendcounts[i],
                          sendtype, i, system_tag, comm);
      count++;
    }
    /* Wait for them all. */
    smpi_mpi_startall(count, requests);
    XBT_DEBUG("<%d> wait for %d requests", rank, count);
    smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE);
    xbt_free(requests);
  }
  return MPI_SUCCESS;
}