int main( int argc, char **argv ) { int *sendbuf; int block_size; int *recvbuf; int size, rank, i; MPI_Comm comm; MPI_Op left_op, right_op, nc_sum_op; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); #if MTEST_HAVE_MIN_MPI_VERSION(2,2) /* MPI_Reduce_scatter block was added in MPI-2.2 */ MPI_Op_create(&left, 0/*non-commutative*/, &left_op); MPI_Op_create(&right, 0/*non-commutative*/, &right_op); MPI_Op_create(&nc_sum, 0/*non-commutative*/, &nc_sum_op); for (block_size = 1; block_size < MAX_BLOCK_SIZE; block_size *= 2) { sendbuf = (int *) malloc( block_size * size * sizeof(int) ); recvbuf = malloc( block_size * sizeof(int) ); for (i=0; i<(size*block_size); i++) sendbuf[i] = rank + i; for (i=0; i<block_size; i++) recvbuf[i] = 0xdeadbeef; MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, left_op, comm ); for (i = 0; i < block_size; ++i) if (recvbuf[i] != (rank * block_size + i)) ++err; MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, right_op, comm ); for (i = 0; i < block_size; ++i) if (recvbuf[i] != ((size - 1) + (rank * block_size) + i)) ++err; MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, nc_sum_op, comm ); for (i = 0; i < block_size; ++i) { int x = rank * block_size + i; if (recvbuf[i] != (size*x + (size-1)*size/2)) ++err; } free(recvbuf); free(sendbuf); } MPI_Op_free(&left_op); MPI_Op_free(&right_op); MPI_Op_free(&nc_sum_op); #endif MTest_Finalize( err ); MPI_Finalize( ); return err; }
main() { int vals_array[DIM_VALS_ARR],resultant_lcm,res[DIM_VALS_ARR]; int root = 0; ...//Initialize vals_array MPI_Op LCM_OP; MPI_Op_create((MPI_User_function*)LCM_Op_function,1,&LCM_OP); /** Alternative implementation to local operation after reduce will be use MPI_Reduce_Scatter_Block instead of MPI_Reduce Then using MPI_Reduce on the scatter values. */ /**Possible Implementation with local operation */ MPI_Reduce(vals_array,res,DIM_VALS_ARR,MPI_INT,LCM_OP,root,MPI_COMM_WORLD); result = lcm_local_operation(res,DIM_VALS); MPI_BCAST(result,1,MPI_INT,root,MPI_COMM_WORLD); /** Possible Implementation with Reduce_Scatter method*/ MPI_Reduce_scatter_block(vals_array,res,1,MPI_INT,LCM_OP,root,MPI_COMM_WORLD); MPI_Reduce(res,resultant_lcm,1,MPI_INT,LCM_OP,root,MPI_COMM_WORLD); MPI_Op_Free(LCM_OP); }
// MPI_Allreduce with Reduce_scatterblock and Allgather inline void execute_GL_Allreduce_as_ReducescatterblockAllgather(collective_params_t* params) { MPI_Reduce_scatter_block(params->sbuf, params->tmp_buf, params->count, params->datatype, params->op, MPI_COMM_WORLD); MPI_Allgather(params->tmp_buf, params->count, params->datatype, params->rbuf, params->count, params->datatype, MPI_COMM_WORLD); }
static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; MPI_Aint lb,unitbytes; char *tdata,*fdata; MPI_Request *reqs,*sendreqs; MPI_Status *statuses; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); for (i=0; i<nto; i++) iflags[toranks[i]] = 1; ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); ierr = PetscFree(iflags);CHKERRQ(ierr); ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); tdata = (char*)todata; ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); sendreqs = reqs + nrecvs; for (i=0; i<nrecvs; i++) { ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); } for (i=0; i<nto; i++) { ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); *nfrom = nrecvs; *fromranks = franks; *(void**)fromdata = fdata; PetscFunctionReturn(0); }
int ZMPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) /* zmpi_func ZMPI_Reduce_scatter_block */ { #if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2 return MPI_Reduce_scatter_block((void *) sendbuf, recvbuf, recvcount, datatype,op, comm); #else int comm_size, *recvcounts, i, exit_code; MPI_Comm_size(comm, &comm_size); recvcounts = z_alloc(comm_size, sizeof(int)); for (i = 0; i < comm_size; ++i) recvcounts[i] = recvcount; exit_code = MPI_Reduce_scatter((void *) sendbuf, recvbuf, recvcounts, datatype, op, comm); z_free(recvcounts); return exit_code; #endif }
int main(int argc, char **argv) { int errs = 0; int i; int rank, size; int *sbuf = NULL; int *rbuf = NULL; int *scounts = NULL; int *rcounts = NULL; int *sdispls = NULL; int *rdispls = NULL; MPI_Datatype *types = NULL; MPI_Comm comm; /* intentionally not using MTest_Init/MTest_Finalize in order to make it * easy to take this test and use it as an NBC sanity test outside of the * MPICH test suite */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); /* enough space for every process to contribute at least NUM_INTS ints to any * collective operation */ sbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(sbuf); rbuf = malloc(NUM_INTS * size * sizeof(int)); my_assert(rbuf); scounts = malloc(size * sizeof(int)); my_assert(scounts); rcounts = malloc(size * sizeof(int)); my_assert(rcounts); sdispls = malloc(size * sizeof(int)); my_assert(sdispls); rdispls = malloc(size * sizeof(int)); my_assert(rdispls); types = malloc(size * sizeof(MPI_Datatype)); my_assert(types); for (i = 0; i < size; ++i) { sbuf[2 * i] = i; sbuf[2 * i + 1] = i; rbuf[2 * i] = i; rbuf[2 * i + 1] = i; scounts[i] = NUM_INTS; rcounts[i] = NUM_INTS; sdispls[i] = i * NUM_INTS; rdispls[i] = i * NUM_INTS; types[i] = MPI_INT; } if (rank == 0 && MPI_SUCCESS == MPI_Gather(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Gatherv(sbuf, NUM_INTS, MPI_INT, sbuf, rcounts, rdispls, MPI_INT, 0, comm)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Scatter(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Scatterv(sbuf, scounts, sdispls, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm)) errs++; if (MPI_SUCCESS == MPI_Allgather(&sbuf[rank], 1, MPI_INT, sbuf, 1, MPI_INT, comm)) errs++; if (MPI_SUCCESS == MPI_Allgatherv(&sbuf[rank * rcounts[rank]], rcounts[rank], MPI_INT, sbuf, rcounts, rdispls, MPI_INT, comm)) errs++; if (MPI_SUCCESS == MPI_Alltoall(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, comm)) errs++; if (MPI_SUCCESS == MPI_Alltoallv(sbuf, scounts, sdispls, MPI_INT, sbuf, scounts, sdispls, MPI_INT, comm)) errs++; if (MPI_SUCCESS == MPI_Alltoallw(sbuf, scounts, sdispls, types, sbuf, scounts, sdispls, types, comm)) errs++; if (rank == 0 && MPI_SUCCESS == MPI_Reduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm)) errs++; if (MPI_SUCCESS == MPI_Allreduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm)) errs++; if (MPI_SUCCESS == MPI_Reduce_scatter(sbuf, sbuf, rcounts, MPI_INT, MPI_SUM, comm)) errs++; if (MPI_SUCCESS == MPI_Reduce_scatter_block(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm)) errs++; if (MPI_SUCCESS == MPI_Scan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm)) errs++; if (MPI_SUCCESS == MPI_Exscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm)) errs++; if (sbuf) free(sbuf); if (rbuf) free(rbuf); if (scounts) free(scounts); if (rcounts) free(rcounts); if (sdispls) free(sdispls); if (rdispls) free(rdispls); if (types) free(types); if (rank == 0) { if (errs) fprintf(stderr, "Found %d errors\n", errs); else printf(" No errors\n"); } MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int err = 0; int size, rsize, rank, i; int recvcount, /* Each process receives this much data */ sendcount, /* Each process contributes this much data */ basecount; /* Unit of elements - basecount *rsize is recvcount, * etc. */ int isLeftGroup; long long *sendbuf, *recvbuf; long long sumval; MPI_Comm comm; MTest_Init(&argc, &argv); comm = MPI_COMM_WORLD; basecount = 1024; while (MTestGetIntercomm(&comm, &isLeftGroup, 2)) { if (comm == MPI_COMM_NULL) continue; MPI_Comm_remote_size(comm, &rsize); MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); if (0) { printf("[%d] %s (%d,%d) remote %d\n", rank, isLeftGroup ? "L" : "R", rank, size, rsize); } recvcount = basecount * rsize; sendcount = basecount * rsize * size; sendbuf = (long long *) malloc(sendcount * sizeof(long long)); if (!sendbuf) { fprintf(stderr, "Could not allocate %d ints for sendbuf\n", sendcount); MPI_Abort(MPI_COMM_WORLD, 1); } for (i = 0; i < sendcount; i++) { sendbuf[i] = (long long) (rank * sendcount + i); } recvbuf = (long long *) malloc(recvcount * sizeof(long long)); if (!recvbuf) { fprintf(stderr, "Could not allocate %d ints for recvbuf\n", recvcount); MPI_Abort(MPI_COMM_WORLD, 1); } for (i = 0; i < recvcount; i++) { recvbuf[i] = (long long) (-i); } MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, MPI_LONG_LONG, MPI_SUM, comm); /* Check received data */ for (i = 0; i < recvcount; i++) { sumval = (long long) (sendcount) * (long long) ((rsize * (rsize - 1)) / 2) + (long long) (i + rank * rsize * basecount) * (long long) rsize; if (recvbuf[i] != sumval) { err++; if (err < 4) { fprintf(stdout, "Did not get expected value for reduce scatter\n"); fprintf(stdout, "[%d] %s recvbuf[%d] = %lld, expected %lld\n", rank, isLeftGroup ? "L" : "R", i, recvbuf[i], sumval); } } } free(sendbuf); free(recvbuf); MTestFreeComm(&comm); } MTest_Finalize(err); MPI_Finalize(); return 0; }
FORT_DLL_SPEC void FORT_CALL mpi_reduce_scatter_block_ ( void*v1, void*v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *ierr ){ *ierr = MPI_Reduce_scatter_block( v1, v2, *v3, (MPI_Datatype)(*v4), *v5, (MPI_Comm)(*v6) ); }
int main(int argc, char **argv) { int err = 0; int toterr, size, rank; #if MTEST_HAVE_MIN_MPI_VERSION(2,2) int i, sumval; int *sendbuf; int *recvbuf; #endif MPI_Comm comm; MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); #if MTEST_HAVE_MIN_MPI_VERSION(2,2) /* MPI_Reduce_scatter block was added in MPI-2.2 */ sendbuf = (int *) malloc(size * sizeof(int)); recvbuf = (int *) malloc(size * sizeof(int)); if (!sendbuf || !recvbuf) { err++; fprintf(stderr, "unable to allocate send/recv buffers, aborting"); MPI_Abort(MPI_COMM_WORLD, 1); exit(1); } for (i=0; i<size; i++) sendbuf[i] = rank + i; MPI_Reduce_scatter_block(sendbuf, recvbuf, 1, MPI_INT, MPI_SUM, comm); sumval = size * rank + ((size - 1) * size)/2; if (recvbuf[0] != sumval) { err++; fprintf(stdout, "Did not get expected value for reduce scatter block\n"); fprintf(stdout, "[%d] Got %d expected %d\n", rank, recvbuf[0], sumval); } free(sendbuf); /* let's try it again with MPI_IN_PLACE this time */ for (i=0; i<size; i++) recvbuf[i] = rank + i; MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf, 1, MPI_INT, MPI_SUM, comm); sumval = size * rank + ((size - 1) * size)/2; if (recvbuf[0] != sumval) { err++; fprintf(stdout, "Did not get expected value for reduce scatter block\n"); fprintf(stdout, "[%d] Got %d expected %d\n", rank, recvbuf[0], sumval); } MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); if (MPI_SUCCESS == MPI_Reduce_scatter_block(recvbuf, recvbuf, 1, MPI_INT, MPI_SUM, comm)) err++; free(recvbuf); #endif MPI_Allreduce(&err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (rank == 0 && toterr == 0) { printf(" No Errors\n"); } MPI_Finalize(); return toterr; }
int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size!=4) { if (rank==0) printf("Use 4 processes\n"); MPI_Finalize(); return size; } { if (rank==0) printf("MPI_Reduce_scatter(sendbuf, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int sendbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvbuf[1] = {0}; int recvcounts[4] = {1,1,1,1}; MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n", rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int recvbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvcounts[4] = {1,1,1,1}; MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: recvbuf = {%d,%d,%d,%d} \n", rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce_scatter_block(sendbuf, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int sendbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvbuf[1] = {0}; int recvcount = 1; MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n", rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int recvbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvcount = 1; MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("%d: recvbuf = {%d,%d,%d,%d} \n", rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce(sendbuf, tempbuf... + MPI_Scatter(tempbuf, recvcount...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int sendbuf[4] = {junk, junk*2, junk*3, junk*4}; int tempbuf[4] = {0,0,0,0}; int recvbuf[1] = {0}; int recvcount = 1; MPI_Reduce(sendbuf, tempbuf, 4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD); MPI_Scatter(tempbuf, recvcount, MPI_INT, recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD); printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n", rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]); } fflush(stdout); usleep(1000); MPI_Barrier(MPI_COMM_WORLD); if (rank==0) printf("===================\n"); { if (rank==0) printf("MPI_Reduce(MPI_IN_PLACE, recvbuf... + MPI_Scatter(MPI_IN_PLACE, recvcount...\n"); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); int junk = rank+1; int recvbuf[4] = {junk, junk*2, junk*3, junk*4}; int recvcount = 1; MPI_Reduce(rank==0 ? MPI_IN_PLACE : recvbuf, rank==0 ? recvbuf : NULL, 4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD); MPI_Scatter(recvbuf, recvcount, MPI_INT, rank==0 ? MPI_IN_PLACE : recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD); printf("%d: recvbuf = {%d,%d,%d,%d} \n", rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]); } MPI_Finalize(); return 0; }