int MPIR_Reduce_intra_reduce_scatter_gather ( const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag ) { int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; int comm_size, rank, type_size ATTRIBUTE((unused)), pof2, rem, newrank; int mask, *cnts, *disps, i, j, send_idx=0; int recv_idx, last_idx=0, newdst; int dst, send_cnt, recv_cnt, newroot, newdst_tree_root, newroot_tree_root; MPI_Aint true_lb, true_extent, extent; void *tmp_buf; MPIR_CHKLMEM_DECL(4); comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* set op_errno to 0. stored in perthread structure */ { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); per_thread->op_errno = 0; } /* Create a temporary buffer */ MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPIR_Datatype_get_extent_macro(datatype, extent); /* I think this is the worse case, so we can avoid an assert() * inside the for loop */ /* should be buf+{this}? */ MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent)); MPIR_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "temporary buffer", MPL_MEM_BUFFER); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - true_lb); /* If I'm not the root, then my recvbuf may not be valid, therefore I have to allocate a temporary one */ if (rank != root) { MPIR_CHKLMEM_MALLOC(recvbuf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "receive buffer", MPL_MEM_BUFFER); recvbuf = (void *)((char*)recvbuf - true_lb); }
int MPIR_Ireduce_sched_intra_binomial(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPIR_Comm * comm_ptr, MPIR_Sched_t s) { int mpi_errno = MPI_SUCCESS; int comm_size, rank, is_commutative; int mask, relrank, source, lroot; MPI_Aint true_lb, true_extent, extent; void *tmp_buf; MPIR_SCHED_CHKPMEM_DECL(2); MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); if (count == 0) return MPI_SUCCESS; comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* set op_errno to 0. stored in perthread structure */ { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); per_thread->op_errno = 0; } /* Create a temporary buffer */ MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPIR_Datatype_get_extent_macro(datatype, extent); is_commutative = MPIR_Op_is_commutative(op); /* I think this is the worse case, so we can avoid an assert() * inside the for loop */ /* should be buf+{this}? */ MPIR_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent)); MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, count * (MPL_MAX(extent, true_extent)), mpi_errno, "temporary buffer", MPL_MEM_BUFFER); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *) ((char *) tmp_buf - true_lb); /* If I'm not the root, then my recvbuf may not be valid, therefore * I have to allocate a temporary one */ if (rank != root) { MPIR_SCHED_CHKPMEM_MALLOC(recvbuf, void *, count * (MPL_MAX(extent, true_extent)), mpi_errno, "receive buffer", MPL_MEM_BUFFER); recvbuf = (void *) ((char *) recvbuf - true_lb); }
void MPIR_MINLOC( void *invec, void *inoutvec, int *Len, MPI_Datatype *type ) { int mpi_errno = MPI_SUCCESS; int i, len = *Len; #ifdef HAVE_FORTRAN_BINDING #ifndef HAVE_NO_FORTRAN_MPI_TYPES_IN_C int flen = len * 2; /* used for Fortran types */ #endif #endif switch (*type) { /* first the C types */ case MPI_2INT: MPIR_MINLOC_C_CASE(MPIR_2int_loctype); case MPI_FLOAT_INT: MPIR_MINLOC_C_CASE(MPIR_floatint_loctype); case MPI_LONG_INT: MPIR_MINLOC_C_CASE(MPIR_longint_loctype); case MPI_SHORT_INT: MPIR_MINLOC_C_CASE(MPIR_shortint_loctype); case MPI_DOUBLE_INT: MPIR_MINLOC_C_CASE(MPIR_doubleint_loctype); #if defined(HAVE_LONG_DOUBLE) case MPI_LONG_DOUBLE_INT: MPIR_MINLOC_C_CASE(MPIR_longdoubleint_loctype); #endif /* now the Fortran types */ #ifdef HAVE_FORTRAN_BINDING #ifndef HAVE_NO_FORTRAN_MPI_TYPES_IN_C case MPI_2INTEGER: MPIR_MINLOC_F_CASE(MPI_Fint); case MPI_2REAL: MPIR_MINLOC_F_CASE(MPIR_FC_REAL_CTYPE); case MPI_2DOUBLE_PRECISION: MPIR_MINLOC_F_CASE(MPIR_FC_DOUBLE_CTYPE); #endif #endif /* --BEGIN ERROR HANDLING-- */ default: { MPIR_ERR_SET1(mpi_errno, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_MINLOC" ); { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); per_thread->op_errno = mpi_errno; } break; } /* --END ERROR HANDLING-- */ } }
int MPIR_Exscan ( const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag ) { MPI_Status status; int rank, comm_size; int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; int mask, dst, is_commutative, flag; MPI_Aint true_extent, true_lb, extent; void *partial_scan, *tmp_buf; MPIR_Op *op_ptr; MPIR_CHKLMEM_DECL(2); if (count == 0) return MPI_SUCCESS; comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* set op_errno to 0. stored in perthread structure */ { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); per_thread->op_errno = 0; } if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { is_commutative = 1; } else { MPIR_Op_get_ptr(op, op_ptr); if (op_ptr->kind == MPIR_OP_KIND__USER_NONCOMMUTE) is_commutative = 0; else is_commutative = 1; } /* need to allocate temporary buffer to store partial scan*/ MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPID_Datatype_get_extent_macro( datatype, extent ); MPIR_CHKLMEM_MALLOC(partial_scan, void *, (count*(MPL_MAX(true_extent,extent))), mpi_errno, "partial_scan"); /* adjust for potential negative lower bound in datatype */ partial_scan = (void *)((char*)partial_scan - true_lb); /* need to allocate temporary buffer to store incoming data*/ MPIR_CHKLMEM_MALLOC(tmp_buf, void *, (count*(MPL_MAX(true_extent,extent))), mpi_errno, "tmp_buf"); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - true_lb); mpi_errno = MPIR_Localcopy((sendbuf == MPI_IN_PLACE ? (const void *)recvbuf : sendbuf), count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); flag = 0; mask = 0x1; while (mask < comm_size) { dst = rank ^ mask; if (dst < comm_size) { /* Send partial_scan to dst. Recv into tmp_buf */ mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype, dst, MPIR_EXSCAN_TAG, tmp_buf, count, datatype, dst, MPIR_EXSCAN_TAG, comm_ptr, &status, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, *errflag, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } if (rank > dst) { mpi_errno = MPIR_Reduce_local_impl( tmp_buf, partial_scan, count, datatype, op ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* On rank 0, recvbuf is not defined. For sendbuf==MPI_IN_PLACE recvbuf must not change (per MPI-2.2). On rank 1, recvbuf is to be set equal to the value in sendbuf on rank 0. On others, recvbuf is the scan of values in the sendbufs on lower ranks. */ if (rank != 0) { if (flag == 0) { /* simply copy data recd from rank 0 into recvbuf */ mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype, recvbuf, count, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); flag = 1; } else { mpi_errno = MPIR_Reduce_local_impl( tmp_buf, recvbuf, count, datatype, op ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } } else { if (is_commutative) { mpi_errno = MPIR_Reduce_local_impl( tmp_buf, partial_scan, count, datatype, op ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { mpi_errno = MPIR_Reduce_local_impl( partial_scan, tmp_buf, count, datatype, op ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } } mask <<= 1; } { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); if (per_thread->op_errno) mpi_errno = per_thread->op_errno; } fn_exit: MPIR_CHKLMEM_FREEALL(); if (mpi_errno_ret) mpi_errno = mpi_errno_ret; else if (*errflag != MPIR_ERR_NONE) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail"); return mpi_errno; fn_fail: goto fn_exit; }
static int MPIR_Scan_generic ( const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, MPIR_Errflag_t *errflag ) { MPI_Status status; int rank, comm_size; int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; int mask, dst, is_commutative; MPI_Aint true_extent, true_lb, extent; void *partial_scan, *tmp_buf; MPID_Op *op_ptr; MPIU_CHKLMEM_DECL(2); if (count == 0) return MPI_SUCCESS; /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr ); comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* set op_errno to 0. stored in perthread structure */ { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIU_Assert(err == 0); per_thread->op_errno = 0; } if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { is_commutative = 1; } else { MPID_Op_get_ptr(op, op_ptr); if (op_ptr->kind == MPID_OP_USER_NONCOMMUTE) is_commutative = 0; else is_commutative = 1; } /* need to allocate temporary buffer to store partial scan*/ MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPID_Datatype_get_extent_macro(datatype, extent); MPIU_CHKLMEM_MALLOC(partial_scan, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "partial_scan"); /* This eventually gets malloc()ed as a temp buffer, not added to * any user buffers */ MPIU_Ensure_Aint_fits_in_pointer(count * MPL_MAX(extent, true_extent)); /* adjust for potential negative lower bound in datatype */ partial_scan = (void *)((char*)partial_scan - true_lb); /* need to allocate temporary buffer to store incoming data*/ MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPL_MAX(extent,true_extent)), mpi_errno, "tmp_buf"); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - true_lb); /* Since this is an inclusive scan, copy local contribution into recvbuf. */ if (sendbuf != MPI_IN_PLACE) { mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, recvbuf, count, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } if (sendbuf != MPI_IN_PLACE) mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, partial_scan, count, datatype); else mpi_errno = MPIR_Localcopy(recvbuf, count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mask = 0x1; while (mask < comm_size) { dst = rank ^ mask; if (dst < comm_size) { /* Send partial_scan to dst. Recv into tmp_buf */ mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype, dst, MPIR_SCAN_TAG, tmp_buf, count, datatype, dst, MPIR_SCAN_TAG, comm_ptr, &status, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, *errflag, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } if (rank > dst) { mpi_errno = MPIR_Reduce_local_impl( tmp_buf, partial_scan, count, datatype, op); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Reduce_local_impl( tmp_buf, recvbuf, count, datatype, op); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { if (is_commutative) { mpi_errno = MPIR_Reduce_local_impl( tmp_buf, partial_scan, count, datatype, op); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { mpi_errno = MPIR_Reduce_local_impl( partial_scan, tmp_buf, count, datatype, op); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } } mask <<= 1; } { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIU_Assert(err == 0); if (per_thread->op_errno) { mpi_errno = per_thread->op_errno; if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } fn_exit: MPIU_CHKLMEM_FREEALL(); /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr ); if (mpi_errno_ret) mpi_errno = mpi_errno_ret; else if (*errflag != MPIR_ERR_NONE) MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail"); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Reduce_local(const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op) { int mpi_errno = MPI_SUCCESS; MPIR_Op *op_ptr; MPI_User_function *uop; #ifdef HAVE_CXX_BINDING int is_cxx_uop = 0; #endif #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT) int is_f77_uop = 0; #endif if (count == 0) goto fn_exit; { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); per_thread->op_errno = MPI_SUCCESS; } if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { /* get the function by indexing into the op table */ uop = MPIR_OP_HDL_TO_FN(op); } else { MPIR_Op_get_ptr(op, op_ptr); #ifdef HAVE_CXX_BINDING if (op_ptr->language == MPIR_LANG__CXX) { uop = (MPI_User_function *) op_ptr->function.c_function; is_cxx_uop = 1; } else #endif { if (op_ptr->language == MPIR_LANG__C) { uop = (MPI_User_function *) op_ptr->function.c_function; } else { uop = (MPI_User_function *) op_ptr->function.f77_function; #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT) is_f77_uop = 1; #endif } } } /* actually perform the reduction */ #ifdef HAVE_CXX_BINDING if (is_cxx_uop) { (*MPIR_Process.cxx_call_op_fn) (inbuf, inoutbuf, count, datatype, uop); } else #endif { #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT) if (is_f77_uop) { MPI_Fint lcount = (MPI_Fint) count; MPI_Fint ldtype = (MPI_Fint) datatype; MPII_F77_User_function *uop_f77 = (MPII_F77_User_function *) uop; (*uop_f77) ((void *) inbuf, inoutbuf, &lcount, &ldtype); } else { (*uop) ((void *) inbuf, inoutbuf, &count, &datatype); } #else (*uop) ((void *) inbuf, inoutbuf, &count, &datatype); #endif } /* --BEGIN ERROR HANDLING-- */ { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); if (per_thread->op_errno) mpi_errno = per_thread->op_errno; } /* --END ERROR HANDLING-- */ fn_exit: return mpi_errno; }
/* Algorithm: Recursive halving * * This is a recursive-halving algorithm in which the first p/2 processes send * the second n/2 data to their counterparts in the other half and receive the * first n/2 data from them. This procedure continues recursively, halving the * data communicated at each step, for a total of lgp steps. If the number of * processes is not a power-of-two, we convert it to the nearest lower * power-of-two by having the first few even-numbered processes send their data * to the neighboring odd-numbered process at (rank+1). Those odd-numbered * processes compute the result for their left neighbor as well in the * recursive halving algorithm, and then at the end send the result back to * the processes that didn't participate. Therefore, if p is a power-of-two: * * Cost = lgp.alpha + n.((p-1)/p).beta + n.((p-1)/p).gamma * * If p is not a power-of-two: * * Cost = (floor(lgp)+2).alpha + n.(1+(p-1+n)/p).beta + n.(1+(p-1)/p).gamma * * The above cost in the non power-of-two case is approximate because there is * some imbalance in the amount of work each process does because some * processes do the work of their neighbors as well. */ int MPIR_Reduce_scatter_block_intra_recursive_halving ( const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr, MPIR_Errflag_t *errflag ) { int rank, comm_size, i; MPI_Aint extent, true_extent, true_lb; int *disps; void *tmp_recvbuf, *tmp_results; int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; int total_count, dst; int mask; int *newcnts, *newdisps, rem, newdst, send_idx, recv_idx, last_idx, send_cnt, recv_cnt; int pof2, old_i, newrank; MPIR_CHKLMEM_DECL(5); comm_size = comm_ptr->local_size; rank = comm_ptr->rank; #ifdef HAVE_ERROR_CHECKING { int is_commutative; is_commutative = MPIR_Op_is_commutative(op); MPIR_Assert(is_commutative); } #endif /* HAVE_ERROR_CHECKING */ /* set op_errno to 0. stored in perthread structure */ { MPIR_Per_thread_t *per_thread = NULL; int err = 0; MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, MPIR_Per_thread, per_thread, &err); MPIR_Assert(err == 0); per_thread->op_errno = 0; } if (recvcount == 0) { goto fn_exit; } MPIR_Datatype_get_extent_macro(datatype, extent); MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPIR_CHKLMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps", MPL_MEM_BUFFER); total_count = comm_size*recvcount; for (i=0; i<comm_size; i++) { disps[i] = i*recvcount; } /* total_count*extent eventually gets malloced. it isn't added to * a user-passed in buffer */ MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent)); /* commutative and short. use recursive halving algorithm */ /* allocate temp. buffer to receive incoming data */ MPIR_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf", MPL_MEM_BUFFER); /* adjust for potential negative lower bound in datatype */ tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb); /* need to allocate another temporary buffer to accumulate results because recvbuf may not be big enough */ MPIR_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results", MPL_MEM_BUFFER); /* adjust for potential negative lower bound in datatype */ tmp_results = (void *)((char*)tmp_results - true_lb); /* copy sendbuf into tmp_results */ if (sendbuf != MPI_IN_PLACE) mpi_errno = MPIR_Localcopy(sendbuf, total_count, datatype, tmp_results, total_count, datatype); else mpi_errno = MPIR_Localcopy(recvbuf, total_count, datatype, tmp_results, total_count, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); pof2 = comm_ptr->pof2; rem = comm_size - pof2; /* In the non-power-of-two case, all even-numbered processes of rank < 2*rem send their data to (rank+1). These even-numbered processes no longer participate in the algorithm until the very end. The remaining processes form a nice power-of-two. */ if (rank < 2*rem) { if (rank % 2 == 0) { /* even */ mpi_errno = MPIC_Send(tmp_results, total_count, datatype, rank+1, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm_ptr, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, *errflag, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } /* temporarily set the rank to -1 so that this process does not pariticipate in recursive doubling */ newrank = -1; } else { /* odd */ mpi_errno = MPIC_Recv(tmp_recvbuf, total_count, datatype, rank-1, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm_ptr, MPI_STATUS_IGNORE, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, *errflag, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } /* do the reduction on received data. since the ordering is right, it doesn't matter whether the operation is commutative or not. */ mpi_errno = MPIR_Reduce_local( tmp_recvbuf, tmp_results, total_count, datatype, op); /* change the rank */ newrank = rank / 2; } } else /* rank >= 2*rem */ newrank = rank - rem; if (newrank != -1) { /* recalculate the recvcnts and disps arrays because the even-numbered processes who no longer participate will have their result calculated by the process to their right (rank+1). */ MPIR_CHKLMEM_MALLOC(newcnts, int *, pof2*sizeof(int), mpi_errno, "newcnts", MPL_MEM_BUFFER); MPIR_CHKLMEM_MALLOC(newdisps, int *, pof2*sizeof(int), mpi_errno, "newdisps", MPL_MEM_BUFFER); for (i=0; i<pof2; i++) { /* what does i map to in the old ranking? */ old_i = (i < rem) ? i*2 + 1 : i + rem; if (old_i < 2*rem) { /* This process has to also do its left neighbor's work */ newcnts[i] = 2 * recvcount; } else newcnts[i] = recvcount; } newdisps[0] = 0; for (i=1; i<pof2; i++) newdisps[i] = newdisps[i-1] + newcnts[i-1]; mask = pof2 >> 1; send_idx = recv_idx = 0; last_idx = pof2; while (mask > 0) { newdst = newrank ^ mask; /* find real rank of dest */ dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem; send_cnt = recv_cnt = 0; if (newrank < newdst) { send_idx = recv_idx + mask; for (i=send_idx; i<last_idx; i++) send_cnt += newcnts[i]; for (i=recv_idx; i<send_idx; i++) recv_cnt += newcnts[i]; } else { recv_idx = send_idx + mask; for (i=send_idx; i<recv_idx; i++) send_cnt += newcnts[i]; for (i=recv_idx; i<last_idx; i++) recv_cnt += newcnts[i]; } /* printf("Rank %d, send_idx %d, recv_idx %d, send_cnt %d, recv_cnt %d, last_idx %d\n", newrank, send_idx, recv_idx, send_cnt, recv_cnt, last_idx); */ /* Send data from tmp_results. Recv into tmp_recvbuf */ if ((send_cnt != 0) && (recv_cnt != 0)) mpi_errno = MPIC_Sendrecv((char *) tmp_results + newdisps[send_idx]*extent, send_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, (char *) tmp_recvbuf + newdisps[recv_idx]*extent, recv_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm_ptr, MPI_STATUS_IGNORE, errflag); else if ((send_cnt == 0) && (recv_cnt != 0)) mpi_errno = MPIC_Recv((char *) tmp_recvbuf + newdisps[recv_idx]*extent, recv_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm_ptr, MPI_STATUS_IGNORE, errflag); else if ((recv_cnt == 0) && (send_cnt != 0)) mpi_errno = MPIC_Send((char *) tmp_results + newdisps[send_idx]*extent, send_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm_ptr, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, *errflag, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } /* tmp_recvbuf contains data received in this step. tmp_results contains data accumulated so far */ if (recv_cnt) { mpi_errno = MPIR_Reduce_local( (char *) tmp_recvbuf + newdisps[recv_idx]*extent, (char *) tmp_results + newdisps[recv_idx]*extent, recv_cnt, datatype, op); } /* update send_idx for next iteration */ send_idx = recv_idx; last_idx = recv_idx + mask; mask >>= 1; } /* copy this process's result from tmp_results to recvbuf */ mpi_errno = MPIR_Localcopy((char *)tmp_results + disps[rank]*extent, recvcount, datatype, recvbuf, recvcount, datatype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); }