void IMB_ibarrier_pure(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; MPI_Request request; MPI_Status status; double t_pure = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; if(c_info->rank != -1) { IMB_do_n_barriers (c_info->communicator, N_BARR); t_pure = MPI_Wtime(); for(i = 0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ibarrier(c_info->communicator, &request); MPI_ERRHAND(ierr); MPI_Wait(&request, &status); } t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample; } time[0] = t_pure; }
int measure_ibarrier_overlap(nbctest_params_t *params, nbctest_result_t *result) { #ifdef HAVE_NBC double starttime, endtime; int rc; static MPI_Request req; starttime = timeslot_startsync(); result->inittime = hpctimer_wtime(); rc = MPI_Ibarrier(params->comm, &req); result->inittime = hpctimer_wtime() - result->inittime; nbcbench_simulate_computing(params, &req, result); result->waittime = hpctimer_wtime(); rc = MPI_Wait(&req, MPI_STATUS_IGNORE); result->waittime = hpctimer_wtime() - result->waittime; endtime = timeslot_stopsync(); if ((rc == MPI_SUCCESS) && (starttime > 0.0) && (endtime > 0.0)) { result->totaltime = endtime - starttime; return MEASURE_SUCCESS; } #endif return MEASURE_FAILURE; }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (barrier == MPI_REQUEST_NULL) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,unitbytes,done; PetscInt i; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscFunctionBegin; ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); tdata = (char*)todata; ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(&segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(&segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (barrier == MPI_REQUEST_NULL) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(&segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(&segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); PetscFunctionReturn(0); }
diy::mpi::request diy::mpi::communicator:: ibarrier() const { #ifndef DIY_NO_MPI request r_; MPI_Ibarrier(comm_, &r_.r); return r_; #else // this is not the ideal fix; in principle we should just return a status // that tests true, but this requires redesigning some parts of our no-mpi // handling DIY_UNSUPPORTED_MPI_CALL(MPI_Ibarrier); #endif }
void IMB_ibarrier(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS, MODES RUN_MODE, double* time) { int i = 0; MPI_Request request; MPI_Status status; double t_pure = 0., t_comp = 0., t_ovrlp = 0.; #ifdef CHECK defect=0.; #endif ierr = 0; if(c_info->rank != -1) { IMB_ibarrier_pure(c_info, size, ITERATIONS, RUN_MODE, &t_pure); /* INITIALIZATION CALL */ IMB_cpu_exploit(t_pure, 1); IMB_do_n_barriers (c_info->communicator, N_BARR); t_ovrlp = MPI_Wtime(); for(i=0; i < ITERATIONS->n_sample; i++) { ierr = MPI_Ibarrier(c_info->communicator, &request); MPI_ERRHAND(ierr); t_comp -= MPI_Wtime(); IMB_cpu_exploit(t_pure, 0); t_comp += MPI_Wtime(); MPI_Wait(&request, &status); } t_ovrlp = (MPI_Wtime() - t_ovrlp) / ITERATIONS->n_sample; t_comp /= ITERATIONS->n_sample; } time[0] = t_pure; time[1] = t_ovrlp; time[2] = t_comp; }
int main(int argc, char *argv[]) { #if defined(TEST_NBC_ROUTINES) MPI_Request barrier; int i,done; #endif int rank; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&rank); #if defined(TEST_NBC_ROUTINES) MPI_Ibarrier(MPI_COMM_WORLD,&barrier); for (i=0,done=0; !done; i++) { usleep(1000); /*printf("[%d] MPI_Test: %d\n",rank,i);*/ MPI_Test(&barrier,&done,MPI_STATUS_IGNORE); } #endif if (rank == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int errs = 0; int i; int rank, size; int *sbuf = NULL; int *rbuf = NULL; int *scounts = NULL; int *rcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; int *types = NULL; MPI_Comm comm; MPI_Request req; /* intentionally not using MTest_Init/MTest_Finalize in order to make it * easy to take this test and use it as an NBC sanity test outside of the * MPICH test suite */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); /* enough space for every process to contribute at least NUM_INTS ints to any * collective operation */ sbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(sbuf); rbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(rbuf); scounts = malloc(size * sizeof(int)); my_assert(scounts); rcounts = malloc(size * sizeof(int)); my_assert(rcounts); sdispls = malloc(size * sizeof(int)); my_assert(sdispls); rdispls = malloc(size * sizeof(int)); my_assert(rdispls); types = malloc(size * sizeof(int)); my_assert(types); for (i = 0; i < size; ++i) { sbuf[2 * i] = i; sbuf[2 * i + 1] = i; rbuf[2 * i] = i; rbuf[2 * i + 1] = i; scounts[i] = NUM_INTS; rcounts[i] = NUM_INTS; sdispls[i] = i * NUM_INTS; rdispls[i] = i * NUM_INTS; types[i] = MPI_INT; } MPI_Ibarrier(comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ibcast(sbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Igather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); else MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Igatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); else MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req); else MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req); else MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallgatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoall(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallv(MPI_IN_PLACE, NULL, NULL, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallw(sbuf, scounts, sdispls, types, rbuf, rcounts, rdispls, types, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ialltoallw(MPI_IN_PLACE, NULL, NULL, NULL, rbuf, rcounts, rdispls, types, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (0 == rank) MPI_Ireduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); else MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallreduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iallreduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter(sbuf, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter(MPI_IN_PLACE, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter_block(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Ireduce_scatter_block(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iexscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Iexscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req); MPI_Wait(&req, MPI_STATUS_IGNORE); if (sbuf) free(sbuf); if (rbuf) free(rbuf); if (scounts) free(scounts); if (rcounts) free(rcounts); if (sdispls) free(sdispls); if (rdispls) free(rdispls); if (rank == 0) { if (errs) fprintf(stderr, "Found %d errors\n", errs); else printf(" No errors\n"); } MPI_Finalize(); return 0; }
/* Starts a "random" operation on "comm" corresponding to "rndnum" and returns * in (*req) a request handle corresonding to that operation. This call should * be considered collective over comm (with a consistent value for "rndnum"), * even though the operation may only be a point-to-point request. */ static void start_random_nonblocking(MPI_Comm comm, unsigned int rndnum, MPI_Request *req, struct laundry *l) { int i, j; int rank, size; int *buf = NULL; int *recvbuf = NULL; int *sendcounts = NULL; int *recvcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; int *sendtypes = NULL; int *recvtypes = NULL; signed char *buf_alias = NULL; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); *req = MPI_REQUEST_NULL; l->case_num = -1; l->comm = comm; l->buf = buf = malloc(COUNT*size*sizeof(int)); l->recvbuf = recvbuf = malloc(COUNT*size*sizeof(int)); l->sendcounts = sendcounts = malloc(size*sizeof(int)); l->recvcounts = recvcounts = malloc(size*sizeof(int)); l->sdispls = sdispls = malloc(size*sizeof(int)); l->rdispls = rdispls = malloc(size*sizeof(int)); l->sendtypes = sendtypes = malloc(size*sizeof(MPI_Datatype)); l->recvtypes = recvtypes = malloc(size*sizeof(MPI_Datatype)); #define NUM_CASES (21) l->case_num = rand_range(rndnum, 0, NUM_CASES); switch (l->case_num) { case 0: /* MPI_Ibcast */ for (i = 0; i < COUNT; ++i) { if (rank == 0) { buf[i] = i; } else { buf[i] = 0xdeadbeef; } } MPI_Ibcast(buf, COUNT, MPI_INT, 0, comm, req); break; case 1: /* MPI_Ibcast (again, but designed to stress scatter/allgather impls) */ /* FIXME fiddle with PRIME and buffer allocation s.t. PRIME is much larger (1021?) */ buf_alias = (signed char *)buf; my_assert(COUNT*size*sizeof(int) > PRIME); /* sanity */ for (i = 0; i < PRIME; ++i) { if (rank == 0) buf_alias[i] = i; else buf_alias[i] = 0xdb; } for (i = PRIME; i < COUNT * size * sizeof(int); ++i) { buf_alias[i] = 0xbf; } MPI_Ibcast(buf_alias, PRIME, MPI_SIGNED_CHAR, 0, comm, req); break; case 2: /* MPI_Ibarrier */ MPI_Ibarrier(comm, req); break; case 3: /* MPI_Ireduce */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, 0, comm, req); break; case 4: /* same again, use a user op and free it before the wait */ { MPI_Op op = MPI_OP_NULL; MPI_Op_create(sum_fn, /*commute=*/1, &op); for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, op, 0, comm, req); MPI_Op_free(&op); } break; case 5: /* MPI_Iallreduce */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iallreduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 6: /* MPI_Ialltoallv (a weak test, neither irregular nor sparse) */ for (i = 0; i < size; ++i) { sendcounts[i] = COUNT; recvcounts[i] = COUNT; sdispls[i] = COUNT * i; rdispls[i] = COUNT * i; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoallv(buf, sendcounts, sdispls, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req); break; case 7: /* MPI_Igather */ for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 8: /* same test again, just use a dup'ed datatype and free it before the wait */ { MPI_Datatype type = MPI_DATATYPE_NULL; MPI_Type_dup(MPI_INT, &type); for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, type, 0, comm, req); MPI_Type_free(&type); /* should cause implementations that don't refcount correctly to blow up or hang in the wait */ } break; case 9: /* MPI_Iscatter */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { if (rank == 0) buf[i*COUNT+j] = i + j; else buf[i*COUNT+j] = 0xdeadbeef; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Iscatter(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 10: /* MPI_Iscatterv */ for (i = 0; i < size; ++i) { /* weak test, just test the regular case where all counts are equal */ sendcounts[i] = COUNT; sdispls[i] = i * COUNT; for (j = 0; j < COUNT; ++j) { if (rank == 0) buf[i*COUNT+j] = i + j; else buf[i*COUNT+j] = 0xdeadbeef; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Iscatterv(buf, sendcounts, sdispls, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req); break; case 11: /* MPI_Ireduce_scatter */ for (i = 0; i < size; ++i) { recvcounts[i] = COUNT; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + i; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ireduce_scatter(buf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm, req); break; case 12: /* MPI_Ireduce_scatter_block */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + i; recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ireduce_scatter_block(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 13: /* MPI_Igatherv */ for (i = 0; i < size*COUNT; ++i) { buf[i] = 0xdeadbeef; recvbuf[i] = 0xdeadbeef; } for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; } for (i = 0; i < size; ++i) { recvcounts[i] = COUNT; rdispls[i] = i * COUNT; } MPI_Igatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, 0, comm, req); break; case 14: /* MPI_Ialltoall */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoall(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req); break; case 15: /* MPI_Iallgather */ for (i = 0; i < size*COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iallgather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req); break; case 16: /* MPI_Iallgatherv */ for (i = 0; i < size; ++i) { for (j = 0; j < COUNT; ++j) { recvbuf[i*COUNT+j] = 0xdeadbeef; } recvcounts[i] = COUNT; rdispls[i] = i * COUNT; } for (i = 0; i < COUNT; ++i) buf[i] = rank + i; MPI_Iallgatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req); break; case 17: /* MPI_Iscan */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 18: /* MPI_Iexscan */ for (i = 0; i < COUNT; ++i) { buf[i] = rank + i; recvbuf[i] = 0xdeadbeef; } MPI_Iexscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req); break; case 19: /* MPI_Ialltoallw (a weak test, neither irregular nor sparse) */ for (i = 0; i < size; ++i) { sendcounts[i] = COUNT; recvcounts[i] = COUNT; sdispls[i] = COUNT * i * sizeof(int); rdispls[i] = COUNT * i * sizeof(int); sendtypes[i] = MPI_INT; recvtypes[i] = MPI_INT; for (j = 0; j < COUNT; ++j) { buf[i*COUNT+j] = rank + (i * j); recvbuf[i*COUNT+j] = 0xdeadbeef; } } MPI_Ialltoallw(buf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req); break; case 20: /* basic pt2pt MPI_Isend/MPI_Irecv pairing */ /* even ranks send to odd ranks, but only if we have a full pair */ if ((rank % 2 != 0) || (rank != size-1)) { for (j = 0; j < COUNT; ++j) { buf[j] = j; recvbuf[j] = 0xdeadbeef; } if (rank % 2 == 0) MPI_Isend(buf, COUNT, MPI_INT, rank+1, 5, comm, req); else MPI_Irecv(recvbuf, COUNT, MPI_INT, rank-1, 5, comm, req); } break; default: fprintf(stderr, "unexpected value for l->case_num=%d)\n", (l->case_num)); MPI_Abort(comm, 1); exit(1); break; } }
static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,*tags,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,*usendreqs,*req,barrier; PetscSegBuffer segrank,segdata,segreq; PetscBool barrier_started; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); for (i=0; i<ntags; i++) { ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); } ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); /* Post synchronous sends */ for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the * synchronous messages above. */ for (i=0; i<nto; i++) { PetscMPIInt k; for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, * but we need to work around it. */ barrier_started = PETSC_FALSE; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank,k; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); nrecvs++; } if (!barrier_started) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif barrier_started = PETSC_TRUE; } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); *toreqs = usendreqs; ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); ierr = PetscFree(tags);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }