/** * Alltoall basic_linear (STARMPI:alltoall-simple) **/ int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { int system_tag = 888; int i, rank, size, err, count; MPI_Aint lb = 0, sendext = 0, recvext = 0; MPI_Request *requests; /* Initialize. */ rank = smpi_comm_rank(comm); size = smpi_comm_size(comm); XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank); smpi_datatype_extent(sendtype, &lb, &sendext); smpi_datatype_extent(recvtype, &lb, &recvext); /* simple optimization */ err = smpi_datatype_copy((char *)sendbuf + rank * sendcount * sendext, sendcount, sendtype, (char *)recvbuf + rank * recvcount * recvext, recvcount, recvtype); if (err == MPI_SUCCESS && size > 1) { /* Initiate all send/recv to/from others. */ requests = xbt_new(MPI_Request, 2 * (size - 1)); /* Post all receives first -- a simple optimization */ count = 0; for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) { requests[count] = smpi_irecv_init((char *)recvbuf + i * recvcount * recvext, recvcount, recvtype, i, system_tag, comm); count++; } /* Now post all sends in reverse order * - We would like to minimize the search time through message queue * when messages actually arrive in the order in which they were posted. * TODO: check the previous assertion */ for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) { requests[count] = smpi_isend_init((char *)sendbuf + i * sendcount * sendext, sendcount, sendtype, i, system_tag, comm); count++; } /* Wait for them all. */ smpi_mpi_startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); for(i = 0; i < count; i++) { if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]); } xbt_free(requests); } return err; }
/* * Linear functions are copied from the basic coll module. For * some small number of nodes and/or small data sizes they are just as * fast as tuned/tree based segmenting operations and as such may be * selected by the decision functions. These are copied into this module * due to the way we select modules in V1. i.e. in V2 we will handle this * differently and so will not have to duplicate code. * GEF Oct05 after asking Jeff. */ int smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps, MPI_Datatype sdtype, void *rbuf, int *rcounts, int *rdisps, MPI_Datatype rdtype, MPI_Comm comm) { int i, size, rank; char *psnd, *prcv; int nreqs; ptrdiff_t sext, rext; MPI_Request *preq; size = smpi_comm_size(comm); rank = smpi_comm_rank(comm); MPI_Request *ireqs= xbt_malloc(sizeof(MPI_Request) * size * 2); XBT_DEBUG( "coll:tuned:alltoallv_intra_basic_linear rank %d", rank); sext=smpi_datatype_get_extent(sdtype); rext=smpi_datatype_get_extent(rdtype); /* Simple optimization - handle send to self first */ psnd = ((char *) sbuf) + (sdisps[rank] * sext); prcv = ((char *) rbuf) + (rdisps[rank] * rext); if (0 != scounts[rank]) { smpi_datatype_copy(psnd, scounts[rank], sdtype, prcv, rcounts[rank], rdtype); } /* If only one process, we're done. */ if (1 == size) { return MPI_SUCCESS; } /* Now, initiate all send/recv to/from others. */ nreqs = 0; preq = ireqs; /* Post all receives first */ for (i = 0; i < size; ++i) { if (i == rank || 0 == rcounts[i]) { continue; } prcv = ((char *) rbuf) + (rdisps[i] * rext); *preq = smpi_irecv_init(prcv, rcounts[i], rdtype, i, COLL_TAG_ALLTOALLV, comm ); preq++; ++nreqs; } /* Now post all sends */ for (i = 0; i < size; ++i) { if (i == rank || 0 == scounts[i]) { continue; } psnd = ((char *) sbuf) + (sdisps[i] * sext); *preq=smpi_isend_init(psnd, scounts[i], sdtype, i, COLL_TAG_ALLTOALLV, comm ); preq++; ++nreqs; } /* Start your engines. This will never return an error. */ smpi_mpi_startall(nreqs, ireqs); /* Wait for them all. If there's an error, note that we don't care * what the error was -- just that there *was* an error. The PML * will finish all requests, even if one or more of them fail. * i.e., by the end of this call, all the requests are free-able. * So free them anyway -- even if there was an error, and return the * error after we free everything. */ smpi_mpi_waitall(nreqs, ireqs, MPI_STATUSES_IGNORE); /* Free the requests. */ for (i = 0; i < nreqs; ++i) { if(ireqs[i]!=MPI_REQUEST_NULL)smpi_mpi_request_free(&ireqs[i]); } return MPI_SUCCESS; }
int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm) { int system_tag = 889; int i, rank, size, err, count; MPI_Aint lb = 0, sendext = 0, recvext = 0; MPI_Request *requests; /* Initialize. */ rank = smpi_comm_rank(comm); size = smpi_comm_size(comm); XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank); smpi_datatype_extent(sendtype, &lb, &sendext); smpi_datatype_extent(recvtype, &lb, &recvext); /* Local copy from self */ err = smpi_datatype_copy((char *)sendbuf + senddisps[rank] * sendext, sendcounts[rank], sendtype, (char *)recvbuf + recvdisps[rank] * recvext, recvcounts[rank], recvtype); if (err == MPI_SUCCESS && size > 1) { /* Initiate all send/recv to/from others. */ requests = xbt_new(MPI_Request, 2 * (size - 1)); count = 0; /* Create all receives that will be posted first */ for (i = 0; i < size; ++i) { if (i == rank || recvcounts[i] == 0) { XBT_DEBUG ("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]); continue; } requests[count] = smpi_irecv_init((char *)recvbuf + recvdisps[i] * recvext, recvcounts[i], recvtype, i, system_tag, comm); count++; } /* Now create all sends */ for (i = 0; i < size; ++i) { if (i == rank || sendcounts[i] == 0) { XBT_DEBUG ("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]); continue; } requests[count] = smpi_isend_init((char *)sendbuf + senddisps[i] * sendext, sendcounts[i], sendtype, i, system_tag, comm); count++; } /* Wait for them all. */ smpi_mpi_startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); for(i = 0; i < count; i++) { if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]); } xbt_free(requests); } return err; }