Exemple #1
0
int MPIR_Iscan_cdesc(CFI_cdesc_t* x0, CFI_cdesc_t* x1, int x2, MPI_Datatype x3, MPI_Op x4, MPI_Comm x5, MPI_Request * x6)
{
    int err = MPI_SUCCESS;
    void *buf0 = x0->base_addr;
    void *buf1 = x1->base_addr;
    int count1 = x2;
    MPI_Datatype dtype1 = x3;

    if (buf0 == &MPIR_F08_MPI_BOTTOM) {
        buf0 = MPI_BOTTOM;
    } else if (buf0 == &MPIR_F08_MPI_IN_PLACE) {
        buf0 = MPI_IN_PLACE;
    }

    if (buf1 == &MPIR_F08_MPI_BOTTOM) {
        buf1 = MPI_BOTTOM;
    }

    if (x1->rank != 0 && !CFI_is_contiguous(x1)) {
        err = cdesc_create_datatype(x1, x2, x3, &dtype1);
        count1 = 1;
    }

    err = MPI_Iscan(buf0, buf1, count1, dtype1, x4, x5, x6);

    if (dtype1 != x3)  MPI_Type_free(&dtype1);
    return err;
}
  inline void inclusive_scan(
    ::yampi::request& request,
    ::yampi::buffer<SendValue> const& send_buffer,
    ContiguousIterator const first,
    ::yampi::binary_operation const& operation,
    ::yampi::communicator const& communicator,
    ::yampi::environment const& environment)
  {
    static_assert(
      (YAMPI_is_same<
         typename std::iterator_traits<ContiguousIterator>::value_type,
         SendValue>::value),
      "value_type of ContiguousIterator must be the same to SendValue");

    MPI_Request mpi_request;
    int const error_code
      = MPI_Iscan(
          const_cast<SendValue*>(send_buffer.data()),
          const_cast<SendValue*>(YAMPI_addressof(*first)),
          send_buffer.count(), send_buffer.datatype().mpi_datatype(),
          operation.mpi_op(), communicator.mpi_comm(), YAMPI_addressof(mpi_request));
    if (error_code != MPI_SUCCESS)
      throw ::yampi::error(error_code, "yampi::inclusive_scan", environment);

    request.reset(mpi_request, environment);
  }
Exemple #3
0
FORT_DLL_SPEC void FORT_CALL mpi_iscan_ ( void*v1, void*v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *v7, MPI_Fint *ierr ){

#ifndef HAVE_MPI_F_INIT_WORKS_WITH_C
    if (MPIR_F_NeedInit){ mpirinitf_(); MPIR_F_NeedInit = 0; }
#endif
    if (v1 == MPIR_F_MPI_IN_PLACE) v1 = MPI_IN_PLACE;
    *ierr = MPI_Iscan( v1, v2, (int)*v3, (MPI_Datatype)(*v4), (MPI_Op)*v5, (MPI_Comm)(*v6), (MPI_Request *)(v7) );
}
Exemple #4
0
int main(int argc, char **argv)
{
    int errs = 0;
    int i;
    int rank, size;
    int *sbuf = NULL;
    int *rbuf = NULL;
    int *scounts = NULL;
    int *rcounts = NULL;
    int *sdispls = NULL;
    int *rdispls = NULL;
    int *types = NULL;
    MPI_Comm comm;
    MPI_Request req;

    /* intentionally not using MTest_Init/MTest_Finalize in order to make it
     * easy to take this test and use it as an NBC sanity test outside of the
     * MPICH test suite */
    MPI_Init(&argc, &argv);

    comm = MPI_COMM_WORLD;

    MPI_Comm_size(comm, &size);
    MPI_Comm_rank(comm, &rank);

    /* enough space for every process to contribute at least NUM_INTS ints to any
     * collective operation */
    sbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(sbuf);
    rbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(rbuf);
    scounts = malloc(size * sizeof(int));
    my_assert(scounts);
    rcounts = malloc(size * sizeof(int));
    my_assert(rcounts);
    sdispls = malloc(size * sizeof(int));
    my_assert(sdispls);
    rdispls = malloc(size * sizeof(int));
    my_assert(rdispls);
    types = malloc(size * sizeof(int));
    my_assert(types);

    for (i = 0; i < size; ++i) {
        sbuf[2 * i] = i;
        sbuf[2 * i + 1] = i;
        rbuf[2 * i] = i;
        rbuf[2 * i + 1] = i;
        scounts[i] = NUM_INTS;
        rcounts[i] = NUM_INTS;
        sdispls[i] = i * NUM_INTS;
        rdispls[i] = i * NUM_INTS;
        types[i] = MPI_INT;
    }

    MPI_Ibarrier(comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ibcast(sbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Igather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    else
        MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Igatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, 0, comm,
                     &req);
    else
        MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req);
    else
        MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm,
                      &req);
    else
        MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm,
                    &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoall(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallv(MPI_IN_PLACE, NULL, NULL, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT,
                   comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallw(sbuf, scounts, sdispls, types, rbuf, rcounts, rdispls, types, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallw(MPI_IN_PLACE, NULL, NULL, NULL, rbuf, rcounts, rdispls, types, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Ireduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req);
    else
        MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallreduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallreduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter(sbuf, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter(MPI_IN_PLACE, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter_block(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter_block(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iexscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iexscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (sbuf)
        free(sbuf);
    if (rbuf)
        free(rbuf);
    if (scounts)
        free(scounts);
    if (rcounts)
        free(rcounts);
    if (sdispls)
        free(sdispls);
    if (rdispls)
        free(rdispls);

    if (rank == 0) {
        if (errs)
            fprintf(stderr, "Found %d errors\n", errs);
        else
            printf(" No errors\n");
    }
    MPI_Finalize();
    return 0;
}
Exemple #5
0
/* Starts a "random" operation on "comm" corresponding to "rndnum" and returns
 * in (*req) a request handle corresonding to that operation.  This call should
 * be considered collective over comm (with a consistent value for "rndnum"),
 * even though the operation may only be a point-to-point request. */
static void start_random_nonblocking(MPI_Comm comm, unsigned int rndnum, MPI_Request *req, struct laundry *l)
{
    int i, j;
    int rank, size;
    int *buf = NULL;
    int *recvbuf = NULL;
    int *sendcounts = NULL;
    int *recvcounts = NULL;
    int *sdispls = NULL;
    int *rdispls = NULL;
    int *sendtypes = NULL;
    int *recvtypes = NULL;
    signed char *buf_alias = NULL;

    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);

    *req = MPI_REQUEST_NULL;

    l->case_num = -1;
    l->comm = comm;

    l->buf        = buf        = malloc(COUNT*size*sizeof(int));
    l->recvbuf    = recvbuf    = malloc(COUNT*size*sizeof(int));
    l->sendcounts = sendcounts = malloc(size*sizeof(int));
    l->recvcounts = recvcounts = malloc(size*sizeof(int));
    l->sdispls    = sdispls    = malloc(size*sizeof(int));
    l->rdispls    = rdispls    = malloc(size*sizeof(int));
    l->sendtypes  = sendtypes  = malloc(size*sizeof(MPI_Datatype));
    l->recvtypes  = recvtypes  = malloc(size*sizeof(MPI_Datatype));

#define NUM_CASES (21)
    l->case_num = rand_range(rndnum, 0, NUM_CASES);
    switch (l->case_num) {
        case 0: /* MPI_Ibcast */
            for (i = 0; i < COUNT; ++i) {
                if (rank == 0) {
                    buf[i] = i;
                }
                else {
                    buf[i] = 0xdeadbeef;
                }
            }
            MPI_Ibcast(buf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 1: /* MPI_Ibcast (again, but designed to stress scatter/allgather impls) */
            /* FIXME fiddle with PRIME and buffer allocation s.t. PRIME is much larger (1021?) */
            buf_alias = (signed char *)buf;
            my_assert(COUNT*size*sizeof(int) > PRIME); /* sanity */
            for (i = 0; i < PRIME; ++i) {
                if (rank == 0)
                    buf_alias[i] = i;
                else
                    buf_alias[i] = 0xdb;
            }
            for (i = PRIME; i < COUNT * size * sizeof(int); ++i) {
                buf_alias[i] = 0xbf;
            }
            MPI_Ibcast(buf_alias, PRIME, MPI_SIGNED_CHAR, 0, comm, req);
            break;

        case 2: /* MPI_Ibarrier */
            MPI_Ibarrier(comm, req);
            break;

        case 3: /* MPI_Ireduce */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, 0, comm, req);
            break;

        case 4: /* same again, use a user op and free it before the wait */
            {
                MPI_Op op = MPI_OP_NULL;
                MPI_Op_create(sum_fn, /*commute=*/1, &op);
                for (i = 0; i < COUNT; ++i) {
                    buf[i] = rank + i;
                    recvbuf[i] = 0xdeadbeef;
                }
                MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, op, 0, comm, req);
                MPI_Op_free(&op);
            }
            break;

        case 5: /* MPI_Iallreduce */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iallreduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 6: /* MPI_Ialltoallv (a weak test, neither irregular nor sparse) */
            for (i = 0; i < size; ++i) {
                sendcounts[i] = COUNT;
                recvcounts[i] = COUNT;
                sdispls[i] = COUNT * i;
                rdispls[i] = COUNT * i;
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + (i * j);
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ialltoallv(buf, sendcounts, sdispls, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req);
            break;

        case 7: /* MPI_Igather */
            for (i = 0; i < size*COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 8: /* same test again, just use a dup'ed datatype and free it before the wait */
            {
                MPI_Datatype type = MPI_DATATYPE_NULL;
                MPI_Type_dup(MPI_INT, &type);
                for (i = 0; i < size*COUNT; ++i) {
                    buf[i] = rank + i;
                    recvbuf[i] = 0xdeadbeef;
                }
                MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, type, 0, comm, req);
                MPI_Type_free(&type); /* should cause implementations that don't refcount
                                         correctly to blow up or hang in the wait */
            }
            break;

        case 9: /* MPI_Iscatter */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    if (rank == 0)
                        buf[i*COUNT+j] = i + j;
                    else
                        buf[i*COUNT+j] = 0xdeadbeef;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Iscatter(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 10: /* MPI_Iscatterv */
            for (i = 0; i < size; ++i) {
                /* weak test, just test the regular case where all counts are equal */
                sendcounts[i] = COUNT;
                sdispls[i] = i * COUNT;
                for (j = 0; j < COUNT; ++j) {
                    if (rank == 0)
                        buf[i*COUNT+j] = i + j;
                    else
                        buf[i*COUNT+j] = 0xdeadbeef;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Iscatterv(buf, sendcounts, sdispls, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 11: /* MPI_Ireduce_scatter */
            for (i = 0; i < size; ++i) {
                recvcounts[i] = COUNT;
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + i;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ireduce_scatter(buf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm, req);
            break;

        case 12: /* MPI_Ireduce_scatter_block */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + i;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ireduce_scatter_block(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 13: /* MPI_Igatherv */
            for (i = 0; i < size*COUNT; ++i) {
                buf[i] = 0xdeadbeef;
                recvbuf[i] = 0xdeadbeef;
            }
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
            }
            for (i = 0; i < size; ++i) {
                recvcounts[i] = COUNT;
                rdispls[i] = i * COUNT;
            }
            MPI_Igatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, 0, comm, req);
            break;

        case 14: /* MPI_Ialltoall */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + (i * j);
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ialltoall(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req);
            break;

        case 15: /* MPI_Iallgather */
            for (i = 0; i < size*COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iallgather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req);
            break;

        case 16: /* MPI_Iallgatherv */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
                recvcounts[i] = COUNT;
                rdispls[i] = i * COUNT;
            }
            for (i = 0; i < COUNT; ++i)
                buf[i] = rank + i;
            MPI_Iallgatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req);
            break;

        case 17: /* MPI_Iscan */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 18: /* MPI_Iexscan */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iexscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 19: /* MPI_Ialltoallw (a weak test, neither irregular nor sparse) */
            for (i = 0; i < size; ++i) {
                sendcounts[i] = COUNT;
                recvcounts[i] = COUNT;
                sdispls[i] = COUNT * i * sizeof(int);
                rdispls[i] = COUNT * i * sizeof(int);
                sendtypes[i] = MPI_INT;
                recvtypes[i] = MPI_INT;
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + (i * j);
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ialltoallw(buf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req);
            break;

        case 20: /* basic pt2pt MPI_Isend/MPI_Irecv pairing */
            /* even ranks send to odd ranks, but only if we have a full pair */
            if ((rank % 2 != 0) || (rank != size-1)) {
                for (j = 0; j < COUNT; ++j) {
                    buf[j] = j;
                    recvbuf[j] = 0xdeadbeef;
                }
                if (rank % 2 == 0)
                    MPI_Isend(buf, COUNT, MPI_INT, rank+1, 5, comm, req);
                else
                    MPI_Irecv(recvbuf, COUNT, MPI_INT, rank-1, 5, comm, req);
            }
            break;

        default:
            fprintf(stderr, "unexpected value for l->case_num=%d)\n", (l->case_num));
            MPI_Abort(comm, 1);
            exit(1);
            break;
    }
}
Exemple #6
0
int main(int argc, char **argv)
{
    int errs = 0;
    int i;
    int rank, size;
    int *sbuf = NULL;
    int *rbuf = NULL;
    int *scounts = NULL;
    int *rcounts = NULL;
    int *sdispls = NULL;
    int *rdispls = NULL;
    MPI_Datatype *types = NULL;
    MPI_Comm comm;
    MPI_Request req;

    /* intentionally not using MTest_Init/MTest_Finalize in order to make it
     * easy to take this test and use it as an NBC sanity test outside of the
     * MPICH test suite */
    MPI_Init(&argc, &argv);

    comm = MPI_COMM_WORLD;

    MPI_Comm_size(comm, &size);
    MPI_Comm_rank(comm, &rank);

    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

    /* enough space for every process to contribute at least NUM_INTS ints to any
     * collective operation */
    sbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(sbuf);
    rbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(rbuf);
    scounts = malloc(size * sizeof(int));
    my_assert(scounts);
    rcounts = malloc(size * sizeof(int));
    my_assert(rcounts);
    sdispls = malloc(size * sizeof(int));
    my_assert(sdispls);
    rdispls = malloc(size * sizeof(int));
    my_assert(rdispls);
    types = malloc(size * sizeof(MPI_Datatype));
    my_assert(types);

    for (i = 0; i < size; ++i) {
        sbuf[2 * i] = i;
        sbuf[2 * i + 1] = i;
        rbuf[2 * i] = i;
        rbuf[2 * i + 1] = i;
        scounts[i] = NUM_INTS;
        rcounts[i] = NUM_INTS;
        sdispls[i] = i * NUM_INTS;
        rdispls[i] = i * NUM_INTS;
        types[i] = MPI_INT;
    }

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Igather(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req))
        errs++;

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, sbuf, rcounts, rdispls, MPI_INT, 0, comm, &req))
        errs++;

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req))
        errs++;

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req))
        errs++;

    if (MPI_SUCCESS == MPI_Iallgather(&sbuf[rank], 1, MPI_INT, sbuf, 1, MPI_INT, comm, &req))
        errs++;

    if (MPI_SUCCESS ==
        MPI_Iallgatherv(&sbuf[rank * rcounts[rank]], rcounts[rank], MPI_INT, sbuf, rcounts, rdispls,
                        MPI_INT, comm, &req))
        errs++;

    if (MPI_SUCCESS == MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, comm, &req))
        errs++;

    if (MPI_SUCCESS ==
        MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, sbuf, scounts, sdispls, MPI_INT, comm,
                       &req))
        errs++;

    if (MPI_SUCCESS ==
        MPI_Ialltoallw(sbuf, scounts, sdispls, types, sbuf, scounts, sdispls, types, comm, &req))
        errs++;

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Ireduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req))
        errs++;

    if (MPI_SUCCESS == MPI_Iallreduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req))
        errs++;

    if (MPI_SUCCESS == MPI_Ireduce_scatter(sbuf, sbuf, rcounts, MPI_INT, MPI_SUM, comm, &req))
        errs++;

    if (MPI_SUCCESS ==
        MPI_Ireduce_scatter_block(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req))
        errs++;

    if (MPI_SUCCESS == MPI_Iscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req))
        errs++;

    if (MPI_SUCCESS == MPI_Iexscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req))
        errs++;

    if (sbuf)
        free(sbuf);
    if (rbuf)
        free(rbuf);
    if (scounts)
        free(scounts);
    if (rcounts)
        free(rcounts);
    if (sdispls)
        free(sdispls);
    if (rdispls)
        free(rdispls);
    if (types)
        free(types);

    if (rank == 0) {
        if (errs)
            fprintf(stderr, "Found %d errors\n", errs);
        else
            printf(" No errors\n");
    }
    MPI_Finalize();
    return 0;
}