int main(int argc, char **argv) { int errs = 0; int i; int rank, size; int *sbuf = NULL; int *rbuf = NULL; int *scounts = NULL; int *rcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; int *types = NULL; MPI_Comm comm; MPI_Request req; /* intentionally not using MTest_Init/MTest_Finalize in order to make it * easy to take this test and use it as an NBC sanity test outside of the * MPICH test suite */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); /* enough space for every process to contribute at least NUM_INTS ints to any * collective operation */ sbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(sbuf); rbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(rbuf); scounts = malloc(size * sizeof(int)); my_assert(scounts); rcounts = malloc(size * sizeof(int)); my_assert(rcounts); sdispls = malloc(size * sizeof(int)); my_assert(sdispls); rdispls = malloc(size * sizeof(int)); my_assert(rdispls); types = malloc(size * sizeof(int)); my_assert(types); for (i = 0; i < size; ++i) { sbuf[2 * i] = i; sbuf[2 * i + 1] = i; rbuf[2 * i] = i; rbuf[2 * i + 1] = i; scounts[i] = NUM_INTS; rcounts[i] = NUM_INTS; sdispls[i] = i * NUM_INTS; rdispls[i] = i * NUM_INTS; types[i] = MPI_INT; } MPI_Ibarrier(comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ibcast(sbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Igather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); else MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Igatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); else MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req); else MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req); else MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoall(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallv(MPI_IN_PLACE, NULL, NULL, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallw(sbuf, scounts, sdispls, types, rbuf, rcounts, rdispls, types, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallw(MPI_IN_PLACE, NULL, NULL, NULL, rbuf, rcounts, rdispls, types, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Ireduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); else MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallreduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallreduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter(sbuf, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter(MPI_IN_PLACE, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter_block(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter_block(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iexscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iexscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (sbuf) free(sbuf); if (rbuf) free(rbuf); if (scounts) free(scounts); if (rcounts) free(rcounts); if (sdispls) free(sdispls); if (rdispls) free(rdispls); if (rank == 0) { if (errs) fprintf(stderr, "Found %d errors\n", errs); else printf(" No errors\n"); } MPI_Finalize(); return 0; }
/* Starts a "random" operation on "comm" corresponding to "rndnum" and returns * in (*req) a request handle corresonding to that operation. This call should * be considered collective over comm (with a consistent value for "rndnum"), * even though the operation may only be a point-to-point request. */ static void start_random_nonblocking(MPI_Comm comm, unsigned int rndnum, MPI_Request *req, struct laundry *l) { int i, j; int rank, size; int *buf = NULL; int *recvbuf = NULL; int *sendcounts = NULL; int *recvcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; int *sendtypes = NULL; int *recvtypes = NULL; signed char *buf_alias = NULL; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); *req = MPI_REQUEST_NULL; l->case_num = -1; l->comm = comm; l->buf = buf = malloc(COUNT*size*sizeof(int)); l->recvbuf = recvbuf = malloc(COUNT*size*sizeof(int)); l->sendcounts = sendcounts = malloc(size*sizeof(int)); l->recvcounts = recvcounts = malloc(size*sizeof(int)); l->sdispls = sdispls = malloc(size*sizeof(int)); l->rdispls = rdispls = malloc(size*sizeof(int)); l->sendtypes = sendtypes = malloc(size*sizeof(MPI_Datatype)); l->recvtypes = recvtypes = malloc(size*sizeof(MPI_Datatype)); #define NUM_CASES (21) l->case_num = rand_range(rndnum, 0, NUM_CASES); switch (l->case_num) { case 0: /* MPI_Ibcast */ for (i = 0; i < COUNT; ++i) { if (rank == 0) { buf[i] = i; } else { buf[i] = 0xdeadbeef; } } MPI_Ibcast(buf, COUNT, MPI_INT, 0, comm, req); break; case 1: /* MPI_Ibcast (again, but designed to stress scatter/allgather impls) */ /* FIXME fiddle with PRIME and buffer allocation s.t. PRIME is much larger (1021?) */ buf_alias = (signed char *)buf; my_assert(COUNT*size*sizeof(int) > PRIME); /* sanity */ for (i = 0; i < PRIME; ++i) { if (rank == 0) buf_alias[i] = i; else buf_alias[i] = 0xdb; } for (i = PRIME; i < COUNT * size * sizeof(int); ++i) { buf_alias[i] = 0xbf; } MPI_Ibcast(buf_alias, PRIME, MPI_SIGNED_CHAR, 0, comm, req); break; case 2: /* MPI_Ibarrier */ MPI_Ibarrier(comm, req); break; case 3: /* MPI_Ireduce */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, 0, comm, req); break; case 4: /* same again, use a user op and free it before the wait */ { MPI_Op op = MPI_OP_NULL; MPI_Op_create(sum_fn, /*commute=*/1, &op); for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, op, 0, comm, req); MPI_Op_free(&op); } break; case 5: /* MPI_Iallreduce */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iallreduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 6: /* MPI_Ialltoallv (a weak test, neither irregular nor sparse) */ for (i = 0; i < size; ++i) { sendcounts[i] = COUNT; recvcounts[i] = COUNT; sdispls[i] = COUNT * i; rdispls[i] = COUNT * i; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoallv(buf, sendcounts, sdispls, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req); break; case 7: /* MPI_Igather */ for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 8: /* same test again, just use a dup'ed datatype and free it before the wait */ { MPI_Datatype type = MPI_DATATYPE_NULL; MPI_Type_dup(MPI_INT, &type); for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, type, 0, comm, req); MPI_Type_free(&type); /* should cause implementations that don't refcount correctly to blow up or hang in the wait */ } break; case 9: /* MPI_Iscatter */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { if (rank == 0) buf[i*COUNT+j] = i + j; else buf[i*COUNT+j] = 0xdeadbeef; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Iscatter(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 10: /* MPI_Iscatterv */ for (i = 0; i < size; ++i) { /* weak test, just test the regular case where all counts are equal */ sendcounts[i] = COUNT; sdispls[i] = i * COUNT; for (j = 0; j < COUNT; ++j) { if (rank == 0) buf[i*COUNT+j] = i + j; else buf[i*COUNT+j] = 0xdeadbeef; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Iscatterv(buf, sendcounts, sdispls, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 11: /* MPI_Ireduce_scatter */ for (i = 0; i < size; ++i) { recvcounts[i] = COUNT; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + i; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ireduce_scatter(buf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm, req); break; case 12: /* MPI_Ireduce_scatter_block */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + i; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ireduce_scatter_block(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 13: /* MPI_Igatherv */ for (i = 0; i < size*COUNT; ++i) { buf[i] = 0xdeadbeef; recvbuf[i] = 0xdeadbeef; } for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; } for (i = 0; i < size; ++i) { recvcounts[i] = COUNT; rdispls[i] = i * COUNT; } MPI_Igatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, 0, comm, req); break; case 14: /* MPI_Ialltoall */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoall(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req); break; case 15: /* MPI_Iallgather */ for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iallgather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req); break; case 16: /* MPI_Iallgatherv */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { recvbuf[i*COUNT+j] = 0xdeadbeef; } recvcounts[i] = COUNT; rdispls[i] = i * COUNT; } for (i = 0; i < COUNT; ++i) buf[i] = rank + i; MPI_Iallgatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req); break; case 17: /* MPI_Iscan */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 18: /* MPI_Iexscan */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iexscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 19: /* MPI_Ialltoallw (a weak test, neither irregular nor sparse) */ for (i = 0; i < size; ++i) { sendcounts[i] = COUNT; recvcounts[i] = COUNT; sdispls[i] = COUNT * i * sizeof(int); rdispls[i] = COUNT * i * sizeof(int); sendtypes[i] = MPI_INT; recvtypes[i] = MPI_INT; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoallw(buf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req); break; case 20: /* basic pt2pt MPI_Isend/MPI_Irecv pairing */ /* even ranks send to odd ranks, but only if we have a full pair */ if ((rank % 2 != 0) || (rank != size-1)) { for (j = 0; j < COUNT; ++j) { buf[j] = j; recvbuf[j] = 0xdeadbeef; } if (rank % 2 == 0) MPI_Isend(buf, COUNT, MPI_INT, rank+1, 5, comm, req); else MPI_Irecv(recvbuf, COUNT, MPI_INT, rank-1, 5, comm, req); } break; default: fprintf(stderr, "unexpected value for l->case_num=%d)\n", (l->case_num)); MPI_Abort(comm, 1); exit(1); break; } }
void IMB_iallgatherv(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks MPI_Iallgatherv Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0; Type_Size s_size, r_size; int s_num = 0, r_num = 0; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); MPI_Type_size(c_info->r_data_type, &r_size); if ((s_size != 0) && (r_size != 0)) { s_num = size / s_size; r_num = size / r_size; } if(c_info->rank != -1) { /* GET PURE TIME. DISPLACEMENT AND RECEIVE COUNT WILL BE INITIALIZED HERE */ IMB_iallgatherv_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); for(i=0; i<N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_ovrlp = MPI_Wtime(); for(i=0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Iallgatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->rdispl, c_info->r_data_type, c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); CHK_DIFF("Iallgatherv", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, 0, ((size_t)c_info->num_procs * (size_t)size), 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
void IMB_iallgatherv_pure(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) /* MPI-NBC benchmark kernel Benchmarks IMB_Iallgatherv_pure Input variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information -size (type int) Basic message size in bytes -ITERATIONS (type struct iter_schedule *) Repetition scheduling -RUN_MODE (type MODES) (only MPI-2 case: see [1]) Output variables: -time (type double*) Timing result per sample */ { int i = 0; Type_Size s_size, r_size; int s_num = 0, r_num; MPI_Request request; MPI_Status status; double t_pure = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->s_data_type, &s_size); MPI_Type_size(c_info->s_data_type, &r_size); if ((s_size != 0) && (r_size != 0)) { s_num = size / s_size; r_num = size / r_size; } for (i=0 ; i < c_info->num_procs; ++i) { c_info->rdispl[i] = r_num * i; c_info->reccnt[i] = r_num; } if(c_info->rank != -1) { for (i = 0; i < N_BARR; i++) { MPI_Barrier(c_info->communicator); } t_pure = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Iallgatherv((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, s_num, c_info->s_data_type, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, c_info->reccnt, c_info->rdispl, c_info->r_data_type, c_info->communicator, &request); MPI_ERRHAND(ierr); MPI_Wait(&request, &status); CHK_DIFF("Iallgatherv_pure", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, 0, ((size_t)c_info->num_procs * (size_t)size), 1, put, 0, ITERATIONS->n_sample, i, -2, &defect); } t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample; } time[0] = t_pure; }
int main(int argc, char **argv) { int errs = 0; int i; int rank, size; int *sbuf = NULL; int *rbuf = NULL; int *scounts = NULL; int *rcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; MPI_Datatype *types = NULL; MPI_Comm comm; MPI_Request req; /* intentionally not using MTest_Init/MTest_Finalize in order to make it * easy to take this test and use it as an NBC sanity test outside of the * MPICH test suite */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); /* enough space for every process to contribute at least NUM_INTS ints to any * collective operation */ sbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(sbuf); rbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(rbuf); scounts = malloc(size * sizeof(int)); my_assert(scounts); rcounts = malloc(size * sizeof(int)); my_assert(rcounts); sdispls = malloc(size * sizeof(int)); my_assert(sdispls); rdispls = malloc(size * sizeof(int)); my_assert(rdispls); types = malloc(size * sizeof(MPI_Datatype)); my_assert(types); for (i = 0; i < size; ++i) { sbuf[2 * i] = i; sbuf[2 * i + 1] = i; rbuf[2 * i] = i; rbuf[2 * i + 1] = i; scounts[i] = NUM_INTS; rcounts[i] = NUM_INTS; sdispls[i] = i * NUM_INTS; rdispls[i] = i * NUM_INTS; types[i] = MPI_INT; } if (rank == 0 && MPI_SUCCESS == MPI_Igather(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, sbuf, rcounts, rdispls, MPI_INT, 0, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iallgather(&sbuf[rank], 1, MPI_INT, sbuf, 1, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iallgatherv(&sbuf[rank * rcounts[rank]], rcounts[rank], MPI_INT, sbuf, rcounts, rdispls, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, sbuf, scounts, sdispls, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ialltoallw(sbuf, scounts, sdispls, types, sbuf, scounts, sdispls, types, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Ireduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iallreduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ireduce_scatter(sbuf, sbuf, rcounts, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ireduce_scatter_block(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iexscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (sbuf) free(sbuf); if (rbuf) free(rbuf); if (scounts) free(scounts); if (rcounts) free(rcounts); if (sdispls) free(sdispls); if (rdispls) free(rdispls); if (types) free(types); if (rank == 0) { if (errs) fprintf(stderr, "Found %d errors\n", errs); else printf(" No errors\n"); } MPI_Finalize(); return 0; }