int MPIR_Iallreduce_cdesc(CFI_cdesc_t* x0, CFI_cdesc_t* x1, int x2, MPI_Datatype x3, MPI_Op x4, MPI_Comm x5, MPI_Request * x6) { int err = MPI_SUCCESS; void *buf0 = x0->base_addr; void *buf1 = x1->base_addr; int count1 = x2; MPI_Datatype dtype1 = x3; if (buf0 == &MPIR_F08_MPI_BOTTOM) { buf0 = MPI_BOTTOM; } else if (buf0 == &MPIR_F08_MPI_IN_PLACE) { buf0 = MPI_IN_PLACE; } if (buf1 == &MPIR_F08_MPI_BOTTOM) { buf1 = MPI_BOTTOM; } if (x1->rank != 0 && !CFI_is_contiguous(x1)) { err = cdesc_create_datatype(x1, x2, x3, &dtype1); count1 = 1; } err = MPI_Iallreduce(buf0, buf1, count1, dtype1, x4, x5, x6); if (dtype1 != x3) MPI_Type_free(&dtype1); return err; }
void IMB_iallreduce_pure(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; Type_Size s_size; int s_num = 0; MPI_Request request; MPI_Status status; double t_pure = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; /* GET SIZE OF DATA TYPE */ MPI_Type_size(c_info->red_data_type, &s_size); if (s_size != 0) { s_num = size / s_size; } if(c_info->rank != -1) { IMB_do_n_barriers (c_info->communicator, N_BARR); for(i = 0; i < ITERATIONS->n_sample; i++) { t_pure -= MPI_Wtime(); ierr = MPI_Iallreduce((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, s_num, c_info->red_data_type, c_info->op_type, c_info->communicator, &request); MPI_ERRHAND(ierr); MPI_Wait(&request, &status); t_pure += MPI_Wtime(); CHK_DIFF("Iallreduce_pure", c_info, (char*)c_info->r_buffer + i % ITERATIONS->r_cache_iter * ITERATIONS->r_offs, 0, size, size, asize, put, 0, ITERATIONS->n_sample, i, -1, &defect); IMB_do_n_barriers (c_info->communicator, c_info->sync); } t_pure /= ITERATIONS->n_sample; } time[0] = t_pure; }
MTEST_THREAD_RETURN_TYPE test_iallred(void *arg) { MPI_Request req; int tid = *(int *) arg; int buf[BUF_SIZE]; MTEST_VG_MEM_INIT(buf, BUF_SIZE * sizeof(int)); if (tid == rank) MTestSleep(1); MPI_Iallreduce(MPI_IN_PLACE, buf, BUF_SIZE, MPI_INT, MPI_BAND, comms[tid], &req); MPI_Wait(&req, MPI_STATUS_IGNORE); return (MTEST_THREAD_RETURN_TYPE) 0; }
static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request) { PETSC_UNUSED PetscErrorCode ierr; PetscFunctionBegin; #if defined(PETSC_HAVE_MPI_IALLREDUCE) ierr = MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IALLREDUCE) ierr = MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);CHKERRQ(ierr); #else ierr = MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);CHKERRQ(ierr); *request = MPI_REQUEST_NULL; #endif PetscFunctionReturn(0); }
int main(int argc, char *argv[]) { MPI_Request request; int size, rank; int one = 1, two = 2, isum, sum; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(size == 2); MPI_Iallreduce(&one, &isum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &request); MPI_Allreduce(&two, &sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Wait(&request, MPI_STATUS_IGNORE); assert(isum == 2); assert(sum == 4); if (rank == 0) printf(" No errors\n"); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int errs = 0; int i; int rank, size; int *sbuf = NULL; int *rbuf = NULL; int *scounts = NULL; int *rcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; int *types = NULL; MPI_Comm comm; MPI_Request req; /* intentionally not using MTest_Init/MTest_Finalize in order to make it * easy to take this test and use it as an NBC sanity test outside of the * MPICH test suite */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); /* enough space for every process to contribute at least NUM_INTS ints to any * collective operation */ sbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(sbuf); rbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(rbuf); scounts = malloc(size * sizeof(int)); my_assert(scounts); rcounts = malloc(size * sizeof(int)); my_assert(rcounts); sdispls = malloc(size * sizeof(int)); my_assert(sdispls); rdispls = malloc(size * sizeof(int)); my_assert(rdispls); types = malloc(size * sizeof(int)); my_assert(types); for (i = 0; i < size; ++i) { sbuf[2 * i] = i; sbuf[2 * i + 1] = i; rbuf[2 * i] = i; rbuf[2 * i + 1] = i; scounts[i] = NUM_INTS; rcounts[i] = NUM_INTS; sdispls[i] = i * NUM_INTS; rdispls[i] = i * NUM_INTS; types[i] = MPI_INT; } MPI_Ibarrier(comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ibcast(sbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Igather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); else MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Igatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); else MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req); else MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req); else MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoall(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallv(MPI_IN_PLACE, NULL, NULL, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallw(sbuf, scounts, sdispls, types, rbuf, rcounts, rdispls, types, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallw(MPI_IN_PLACE, NULL, NULL, NULL, rbuf, rcounts, rdispls, types, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Ireduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); else MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallreduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallreduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter(sbuf, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter(MPI_IN_PLACE, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter_block(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter_block(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iexscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iexscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (sbuf) free(sbuf); if (rbuf) free(rbuf); if (scounts) free(scounts); if (rcounts) free(rcounts); if (sdispls) free(sdispls); if (rdispls) free(rdispls); if (rank == 0) { if (errs) fprintf(stderr, "Found %d errors\n", errs); else printf(" No errors\n"); } MPI_Finalize(); return 0; }
/* Starts a "random" operation on "comm" corresponding to "rndnum" and returns * in (*req) a request handle corresonding to that operation. This call should * be considered collective over comm (with a consistent value for "rndnum"), * even though the operation may only be a point-to-point request. */ static void start_random_nonblocking(MPI_Comm comm, unsigned int rndnum, MPI_Request *req, struct laundry *l) { int i, j; int rank, size; int *buf = NULL; int *recvbuf = NULL; int *sendcounts = NULL; int *recvcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; int *sendtypes = NULL; int *recvtypes = NULL; signed char *buf_alias = NULL; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); *req = MPI_REQUEST_NULL; l->case_num = -1; l->comm = comm; l->buf = buf = malloc(COUNT*size*sizeof(int)); l->recvbuf = recvbuf = malloc(COUNT*size*sizeof(int)); l->sendcounts = sendcounts = malloc(size*sizeof(int)); l->recvcounts = recvcounts = malloc(size*sizeof(int)); l->sdispls = sdispls = malloc(size*sizeof(int)); l->rdispls = rdispls = malloc(size*sizeof(int)); l->sendtypes = sendtypes = malloc(size*sizeof(MPI_Datatype)); l->recvtypes = recvtypes = malloc(size*sizeof(MPI_Datatype)); #define NUM_CASES (21) l->case_num = rand_range(rndnum, 0, NUM_CASES); switch (l->case_num) { case 0: /* MPI_Ibcast */ for (i = 0; i < COUNT; ++i) { if (rank == 0) { buf[i] = i; } else { buf[i] = 0xdeadbeef; } } MPI_Ibcast(buf, COUNT, MPI_INT, 0, comm, req); break; case 1: /* MPI_Ibcast (again, but designed to stress scatter/allgather impls) */ /* FIXME fiddle with PRIME and buffer allocation s.t. PRIME is much larger (1021?) */ buf_alias = (signed char *)buf; my_assert(COUNT*size*sizeof(int) > PRIME); /* sanity */ for (i = 0; i < PRIME; ++i) { if (rank == 0) buf_alias[i] = i; else buf_alias[i] = 0xdb; } for (i = PRIME; i < COUNT * size * sizeof(int); ++i) { buf_alias[i] = 0xbf; } MPI_Ibcast(buf_alias, PRIME, MPI_SIGNED_CHAR, 0, comm, req); break; case 2: /* MPI_Ibarrier */ MPI_Ibarrier(comm, req); break; case 3: /* MPI_Ireduce */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, 0, comm, req); break; case 4: /* same again, use a user op and free it before the wait */ { MPI_Op op = MPI_OP_NULL; MPI_Op_create(sum_fn, /*commute=*/1, &op); for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, op, 0, comm, req); MPI_Op_free(&op); } break; case 5: /* MPI_Iallreduce */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iallreduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 6: /* MPI_Ialltoallv (a weak test, neither irregular nor sparse) */ for (i = 0; i < size; ++i) { sendcounts[i] = COUNT; recvcounts[i] = COUNT; sdispls[i] = COUNT * i; rdispls[i] = COUNT * i; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoallv(buf, sendcounts, sdispls, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req); break; case 7: /* MPI_Igather */ for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 8: /* same test again, just use a dup'ed datatype and free it before the wait */ { MPI_Datatype type = MPI_DATATYPE_NULL; MPI_Type_dup(MPI_INT, &type); for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, type, 0, comm, req); MPI_Type_free(&type); /* should cause implementations that don't refcount correctly to blow up or hang in the wait */ } break; case 9: /* MPI_Iscatter */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { if (rank == 0) buf[i*COUNT+j] = i + j; else buf[i*COUNT+j] = 0xdeadbeef; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Iscatter(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 10: /* MPI_Iscatterv */ for (i = 0; i < size; ++i) { /* weak test, just test the regular case where all counts are equal */ sendcounts[i] = COUNT; sdispls[i] = i * COUNT; for (j = 0; j < COUNT; ++j) { if (rank == 0) buf[i*COUNT+j] = i + j; else buf[i*COUNT+j] = 0xdeadbeef; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Iscatterv(buf, sendcounts, sdispls, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 11: /* MPI_Ireduce_scatter */ for (i = 0; i < size; ++i) { recvcounts[i] = COUNT; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + i; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ireduce_scatter(buf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm, req); break; case 12: /* MPI_Ireduce_scatter_block */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + i; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ireduce_scatter_block(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 13: /* MPI_Igatherv */ for (i = 0; i < size*COUNT; ++i) { buf[i] = 0xdeadbeef; recvbuf[i] = 0xdeadbeef; } for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; } for (i = 0; i < size; ++i) { recvcounts[i] = COUNT; rdispls[i] = i * COUNT; } MPI_Igatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, 0, comm, req); break; case 14: /* MPI_Ialltoall */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoall(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req); break; case 15: /* MPI_Iallgather */ for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iallgather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req); break; case 16: /* MPI_Iallgatherv */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { recvbuf[i*COUNT+j] = 0xdeadbeef; } recvcounts[i] = COUNT; rdispls[i] = i * COUNT; } for (i = 0; i < COUNT; ++i) buf[i] = rank + i; MPI_Iallgatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req); break; case 17: /* MPI_Iscan */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 18: /* MPI_Iexscan */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iexscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 19: /* MPI_Ialltoallw (a weak test, neither irregular nor sparse) */ for (i = 0; i < size; ++i) { sendcounts[i] = COUNT; recvcounts[i] = COUNT; sdispls[i] = COUNT * i * sizeof(int); rdispls[i] = COUNT * i * sizeof(int); sendtypes[i] = MPI_INT; recvtypes[i] = MPI_INT; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoallw(buf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req); break; case 20: /* basic pt2pt MPI_Isend/MPI_Irecv pairing */ /* even ranks send to odd ranks, but only if we have a full pair */ if ((rank % 2 != 0) || (rank != size-1)) { for (j = 0; j < COUNT; ++j) { buf[j] = j; recvbuf[j] = 0xdeadbeef; } if (rank % 2 == 0) MPI_Isend(buf, COUNT, MPI_INT, rank+1, 5, comm, req); else MPI_Irecv(recvbuf, COUNT, MPI_INT, rank-1, 5, comm, req); } break; default: fprintf(stderr, "unexpected value for l->case_num=%d)\n", (l->case_num)); MPI_Abort(comm, 1); exit(1); break; } }
static void ADIOI_Iread_and_exch(ADIOI_NBC_Request *nbc_req, int *error_code) { ADIOI_Iread_and_exch_vars *vars = nbc_req->data.rd.rae_vars; ADIO_File fd = vars->fd; MPI_Datatype datatype = vars->datatype; int nprocs = vars->nprocs; ADIOI_Access *others_req = vars->others_req; /* Read in sizes of no more than coll_bufsize, an info parameter. Send data to appropriate processes. Place recd. data in user buf. The idea is to reduce the amount of extra memory required for collective I/O. If all data were read all at once, which is much easier, it would require temp space more than the size of user_buf, which is often unacceptable. For example, to read a distributed array from a file, where each local array is 8Mbytes, requiring at least another 8Mbytes of temp space is unacceptable. */ int i, j; ADIO_Offset st_loc = -1, end_loc = -1; ADIOI_Flatlist_node *flat_buf = NULL; int coll_bufsize; *error_code = MPI_SUCCESS; /* changed below if error */ /* only I/O errors are currently reported */ /* calculate the number of reads of size coll_bufsize to be done by each process and the max among all processes. That gives the no. of communication phases as well. coll_bufsize is obtained from the hints object. */ coll_bufsize = fd->hints->cb_buffer_size; vars->coll_bufsize = coll_bufsize; /* grab some initial values for st_loc and end_loc */ for (i = 0; i < nprocs; i++) { if (others_req[i].count) { st_loc = others_req[i].offsets[0]; end_loc = others_req[i].offsets[0]; break; } } /* now find the real values */ for (i = 0; i < nprocs; i++) for (j = 0; j < others_req[i].count; j++) { st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]); end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j] + others_req[i].lens[j] - 1)); } vars->st_loc = st_loc; vars->end_loc = end_loc; /* calculate ntimes, the number of times this process must perform I/O * operations in order to complete all the requests it has received. * the need for multiple I/O operations comes from the restriction that * we only use coll_bufsize bytes of memory for internal buffering. */ if ((st_loc == -1) && (end_loc == -1)) { /* this process does no I/O. */ vars->ntimes = 0; } else { /* ntimes=ceiling_div(end_loc - st_loc + 1, coll_bufsize)*/ vars->ntimes = (int)((end_loc - st_loc + coll_bufsize) / coll_bufsize); } *error_code = MPI_Iallreduce(&vars->ntimes, &vars->max_ntimes, 1, MPI_INT, MPI_MAX, fd->comm, &vars->req1); vars->read_buf = fd->io_buf; /* Allocated at open time */ vars->curr_offlen_ptr = (int *)ADIOI_Calloc(nprocs, sizeof(int)); /* its use is explained below. calloc initializes to 0. */ vars->count = (int *)ADIOI_Malloc(nprocs * sizeof(int)); /* to store count of how many off-len pairs per proc are satisfied in an iteration. */ vars->partial_send = (int *)ADIOI_Calloc(nprocs, sizeof(int)); /* if only a portion of the last off-len pair is sent to a process in a particular iteration, the length sent is stored here. calloc initializes to 0. */ vars->send_size = (int *)ADIOI_Malloc(nprocs * sizeof(int)); /* total size of data to be sent to each proc. in an iteration */ vars->recv_size = (int *)ADIOI_Malloc(nprocs * sizeof(int)); /* total size of data to be recd. from each proc. in an iteration. Of size nprocs so that I can use MPI_Alltoall later. */ vars->recd_from_proc = (int *)ADIOI_Calloc(nprocs, sizeof(int)); /* amount of data recd. so far from each proc. Used in ADIOI_Fill_user_buffer. initialized to 0 here. */ vars->start_pos = (int *)ADIOI_Malloc(nprocs*sizeof(int)); /* used to store the starting value of curr_offlen_ptr[i] in this iteration */ ADIOI_Datatype_iscontig(datatype, &vars->buftype_is_contig); if (!vars->buftype_is_contig) { ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; vars->flat_buf = flat_buf; } MPI_Type_extent(datatype, &vars->buftype_extent); vars->done = 0; vars->off = st_loc; vars->for_curr_iter = vars->for_next_iter = 0; /* set the state to wait until MPI_Ialltoall finishes. */ nbc_req->data.rd.state = ADIOI_IRC_STATE_IREAD_AND_EXCH; }
int mpsort_mpi_histogram_sort(struct crstruct d, struct crmpistruct o, struct TIMER * tmr) { char Pmax[d.rsize]; char Pmin[d.rsize]; char P[d.rsize * (o.NTask - 1)]; ptrdiff_t C[o.NTask + 1]; /* desired counts */ ptrdiff_t myCLT[o.NTask + 1]; /* counts of less than P */ ptrdiff_t CLT[o.NTask + 1]; ptrdiff_t myCLE[o.NTask + 1]; /* counts of less than or equal to P */ ptrdiff_t CLE[o.NTask + 1]; int SendCount[o.NTask]; int SendDispl[o.NTask]; int RecvCount[o.NTask]; int RecvDispl[o.NTask]; ptrdiff_t myT_CLT[o.NTask]; ptrdiff_t myT_CLE[o.NTask]; ptrdiff_t myT_C[o.NTask]; ptrdiff_t myC[o.NTask + 1]; int iter = 0; int done = 0; char * buffer; int i; (tmr->time = MPI_Wtime(), strcpy(tmr->name, "START"), tmr++); /* and sort the local array */ radix_sort(d.base, d.nmemb, d.size, d.radix, d.rsize, d.arg); MPI_Barrier(o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "FirstSort"), tmr++); _find_Pmax_Pmin_C(o.mybase, o.mynmemb, o.myoutnmemb, Pmax, Pmin, C, &d, &o); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "PmaxPmin"), tmr++); memset(P, 0, d.rsize * (o.NTask -1)); struct piter pi; piter_init(&pi, Pmin, Pmax, o.NTask - 1, &d); while(!done) { iter ++; piter_bisect(&pi, P); #if MPI_VERSION >= 3 if (1 || mpsort_mpi_has_options(MPSORT_DISABLE_IALLREDUCE) ) { _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, myCLE, &d); MPI_Allreduce(myCLT, CLT, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); MPI_Allreduce(myCLE, CLE, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); } else { /* overlap allreduce with histogramming by pipelining */ MPI_Request r[1]; _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, NULL, &d); /* reduce the bins just calculated */ MPI_Iallreduce(myCLT, CLT, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm, &r[0]); _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLE, NULL, &d); MPI_Waitall(1, r, MPI_STATUSES_IGNORE); MPI_Allreduce(myCLE, CLE, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); } #else _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, myCLE, &d); MPI_Allreduce(myCLT, CLT, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); MPI_Allreduce(myCLE, CLE, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); #endif (iter>10?tmr--:0, tmr->time = MPI_Wtime(), sprintf(tmr->name, "bisect%04d", iter), tmr++); piter_accept(&pi, P, C, CLT, CLE); #if 0 { int k; for(k = 0; k < o.NTask; k ++) { MPI_Barrier(o.comm); int i; if(o.ThisTask != k) continue; printf("P (%d): PMin %d PMax %d P ", o.ThisTask, *(int*) Pmin, *(int*) Pmax ); for(i = 0; i < o.NTask - 1; i ++) { printf(" %d ", ((int*) P) [i]); } printf("\n"); printf("C (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", C[i]); } printf("\n"); printf("CLT (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLT[i]); } printf("\n"); printf("CLE (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLE[i]); } printf("\n"); } } #endif done = piter_all_done(&pi); } piter_destroy(&pi); _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, myCLE, &d); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "findP"), tmr++); /* transpose the matrix, could have been done with a new datatype */ /* MPI_Alltoall(myCLT, 1, MPI_TYPE_PTRDIFF, myT_CLT, 1, MPI_TYPE_PTRDIFF, o.comm); */ MPI_Alltoall(myCLT + 1, 1, MPI_TYPE_PTRDIFF, myT_CLT, 1, MPI_TYPE_PTRDIFF, o.comm); /*MPI_Alltoall(myCLE, 1, MPI_TYPE_PTRDIFF, myT_CLE, 1, MPI_TYPE_PTRDIFF, o.comm); */ MPI_Alltoall(myCLE + 1, 1, MPI_TYPE_PTRDIFF, myT_CLE, 1, MPI_TYPE_PTRDIFF, o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "LayDistr"), tmr++); _solve_for_layout_mpi(o.NTask, C, myT_CLT, myT_CLE, myT_C, o.comm); myC[0] = 0; MPI_Alltoall(myT_C, 1, MPI_TYPE_PTRDIFF, myC + 1, 1, MPI_TYPE_PTRDIFF, o.comm); #if 0 for(i = 0;i < o.NTask; i ++) { int j; MPI_Barrier(o.comm); if(o.ThisTask != i) continue; for(j = 0; j < o.NTask + 1; j ++) { printf("%d %d %d, ", myCLT[j], myC[j], myCLE[j]); } printf("\n"); } #endif (tmr->time = MPI_Wtime(), strcpy(tmr->name, "LaySolve"), tmr++); for(i = 0; i < o.NTask; i ++) { SendCount[i] = myC[i + 1] - myC[i]; } MPI_Alltoall(SendCount, 1, MPI_INT, RecvCount, 1, MPI_INT, o.comm); SendDispl[0] = 0; RecvDispl[0] = 0; size_t totrecv = RecvCount[0]; for(i = 1; i < o.NTask; i ++) { SendDispl[i] = SendDispl[i - 1] + SendCount[i - 1]; RecvDispl[i] = RecvDispl[i - 1] + RecvCount[i - 1]; if(SendDispl[i] != myC[i]) { fprintf(stderr, "SendDispl error\n"); abort(); } totrecv += RecvCount[i]; } if(totrecv != o.myoutnmemb) { fprintf(stderr, "totrecv = %td, mismatch with %td\n", totrecv, o.myoutnmemb); abort(); } #if 0 { int k; for(k = 0; k < o.NTask; k ++) { MPI_Barrier(o.comm); if(o.ThisTask != k) continue; printf("P (%d): ", o.ThisTask); for(i = 0; i < o.NTask - 1; i ++) { printf("%d ", ((int*) P) [i]); } printf("\n"); printf("C (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", C[i]); } printf("\n"); printf("CLT (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLT[i]); } printf("\n"); printf("CLE (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLE[i]); } printf("\n"); printf("MyC (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", myC[i]); } printf("\n"); printf("MyCLT (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", myCLT[i]); } printf("\n"); printf("MyCLE (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", myCLE[i]); } printf("\n"); printf("Send Count(%d): ", o.ThisTask); for(i = 0; i < o.NTask; i ++) { printf("%d ", SendCount[i]); } printf("\n"); printf("My data(%d): ", o.ThisTask); for(i = 0; i < mynmemb; i ++) { printf("%d ", ((int*) mybase)[i]); } printf("\n"); } } #endif if(o.myoutbase == o.mybase) buffer = malloc(d.size * o.myoutnmemb); else buffer = o.myoutbase; MPI_Alltoallv_smart( o.mybase, SendCount, SendDispl, o.MPI_TYPE_DATA, buffer, RecvCount, RecvDispl, o.MPI_TYPE_DATA, o.comm); if(o.myoutbase == o.mybase) { memcpy(o.myoutbase, buffer, o.myoutnmemb * d.size); free(buffer); } MPI_Barrier(o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "Exchange"), tmr++); radix_sort(o.myoutbase, o.myoutnmemb, d.size, d.radix, d.rsize, d.arg); MPI_Barrier(o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "SecondSort"), tmr++); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "END"), tmr++); return 0; }
int main(int argc, char **argv) { int errs = 0; int i; int rank, size; int *sbuf = NULL; int *rbuf = NULL; int *scounts = NULL; int *rcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; MPI_Datatype *types = NULL; MPI_Comm comm; MPI_Request req; /* intentionally not using MTest_Init/MTest_Finalize in order to make it * easy to take this test and use it as an NBC sanity test outside of the * MPICH test suite */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); /* enough space for every process to contribute at least NUM_INTS ints to any * collective operation */ sbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(sbuf); rbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(rbuf); scounts = malloc(size * sizeof(int)); my_assert(scounts); rcounts = malloc(size * sizeof(int)); my_assert(rcounts); sdispls = malloc(size * sizeof(int)); my_assert(sdispls); rdispls = malloc(size * sizeof(int)); my_assert(rdispls); types = malloc(size * sizeof(MPI_Datatype)); my_assert(types); for (i = 0; i < size; ++i) { sbuf[2 * i] = i; sbuf[2 * i + 1] = i; rbuf[2 * i] = i; rbuf[2 * i + 1] = i; scounts[i] = NUM_INTS; rcounts[i] = NUM_INTS; sdispls[i] = i * NUM_INTS; rdispls[i] = i * NUM_INTS; types[i] = MPI_INT; } if (rank == 0 && MPI_SUCCESS == MPI_Igather(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, sbuf, rcounts, rdispls, MPI_INT, 0, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iallgather(&sbuf[rank], 1, MPI_INT, sbuf, 1, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iallgatherv(&sbuf[rank * rcounts[rank]], rcounts[rank], MPI_INT, sbuf, rcounts, rdispls, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, sbuf, scounts, sdispls, MPI_INT, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ialltoallw(sbuf, scounts, sdispls, types, sbuf, scounts, sdispls, types, comm, &req)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Ireduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iallreduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ireduce_scatter(sbuf, sbuf, rcounts, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Ireduce_scatter_block(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (MPI_SUCCESS == MPI_Iexscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req)) errs++; if (sbuf) free(sbuf); if (rbuf) free(rbuf); if (scounts) free(scounts); if (rcounts) free(rcounts); if (sdispls) free(sdispls); if (rdispls) free(rdispls); if (types) free(types); if (rank == 0) { if (errs) fprintf(stderr, "Found %d errors\n", errs); else printf(" No errors\n"); } MPI_Finalize(); return 0; }