void MPIR_MAXLOC( void *invec, void *inoutvec, int *Len, MPI_Datatype *type ) { int mpi_errno = MPI_SUCCESS; int i, len = *Len, flen; flen = len * 2; /* used for Fortran types */ switch (*type) { /* first the C types */ case MPI_2INT: MPIR_MAXLOC_C_CASE(MPIR_2int_loctype); case MPI_FLOAT_INT: MPIR_MAXLOC_C_CASE(MPIR_floatint_loctype); case MPI_LONG_INT: MPIR_MAXLOC_C_CASE(MPIR_longint_loctype); case MPI_SHORT_INT: MPIR_MAXLOC_C_CASE(MPIR_shortint_loctype); case MPI_DOUBLE_INT: MPIR_MAXLOC_C_CASE(MPIR_doubleint_loctype); #if defined(HAVE_LONG_DOUBLE) case MPI_LONG_DOUBLE_INT: MPIR_MAXLOC_C_CASE(MPIR_longdoubleint_loctype); #endif /* now the Fortran types */ #ifdef HAVE_FORTRAN_BINDING #ifndef HAVE_NO_FORTRAN_MPI_TYPES_IN_C case MPI_2INTEGER: MPIR_MAXLOC_F_CASE(int); case MPI_2REAL: MPIR_MAXLOC_F_CASE(float); case MPI_2DOUBLE_PRECISION: MPIR_MAXLOC_F_CASE(double); #endif #endif /* --BEGIN ERROR HANDLING-- */ default: { MPIU_THREADPRIV_DECL; MPIU_THREADPRIV_GET; MPIU_ERR_SET1(mpi_errno, MPI_ERR_OP, "**opundefined","**opundefined %s", "MPI_MAXLOC" ); MPIU_THREADPRIV_FIELD(op_errno) = mpi_errno; break; } /* --END ERROR HANDLING-- */ } }
static int MPIR_Scan_generic ( const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag ) { MPI_Status status; int rank, comm_size; int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; int mask, dst, is_commutative; MPI_Aint true_extent, true_lb, extent; void *partial_scan, *tmp_buf; MPID_Op *op_ptr; MPI_Comm comm; MPIU_THREADPRIV_DECL; MPIU_CHKLMEM_DECL(2); if (count == 0) return MPI_SUCCESS; /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr ); comm = comm_ptr->handle; comm_size = comm_ptr->local_size; rank = comm_ptr->rank; MPIU_THREADPRIV_GET; /* set op_errno to 0. stored in perthread structure */ MPIU_THREADPRIV_FIELD(op_errno) = 0; if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { is_commutative = 1; } else { MPID_Op_get_ptr(op, op_ptr); if (op_ptr->kind == MPID_OP_USER_NONCOMMUTE) is_commutative = 0; else is_commutative = 1; } /* need to allocate temporary buffer to store partial scan*/ MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPID_Datatype_get_extent_macro(datatype, extent); MPIU_CHKLMEM_MALLOC(partial_scan, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "partial_scan"); /* This eventually gets malloc()ed as a temp buffer, not added to * any user buffers */ MPID_Ensure_Aint_fits_in_pointer(count * MPIR_MAX(extent, true_extent)); /* adjust for potential negative lower bound in datatype */ partial_scan = (void *)((char*)partial_scan - true_lb); /* need to allocate temporary buffer to store incoming data*/ MPIU_CHKLMEM_MALLOC(tmp_buf, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "tmp_buf"); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - true_lb); /* Since this is an inclusive scan, copy local contribution into recvbuf. */ if (sendbuf != MPI_IN_PLACE) { mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, recvbuf, count, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } if (sendbuf != MPI_IN_PLACE) mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, partial_scan, count, datatype); else mpi_errno = MPIR_Localcopy(recvbuf, count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mask = 0x1; while (mask < comm_size) { dst = rank ^ mask; if (dst < comm_size) { /* Send partial_scan to dst. Recv into tmp_buf */ mpi_errno = MPIC_Sendrecv(partial_scan, count, datatype, dst, MPIR_SCAN_TAG, tmp_buf, count, datatype, dst, MPIR_SCAN_TAG, comm, &status, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } if (rank > dst) { mpi_errno = MPIR_Reduce_local_impl( tmp_buf, partial_scan, count, datatype, op); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPIR_Reduce_local_impl( tmp_buf, recvbuf, count, datatype, op); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } else { if (is_commutative) { mpi_errno = MPIR_Reduce_local_impl( tmp_buf, partial_scan, count, datatype, op); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } else { mpi_errno = MPIR_Reduce_local_impl( partial_scan, tmp_buf, count, datatype, op); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } } } mask <<= 1; } if (MPIU_THREADPRIV_FIELD(op_errno)) { mpi_errno = MPIU_THREADPRIV_FIELD(op_errno); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } fn_exit: MPIU_CHKLMEM_FREEALL(); /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr ); if (mpi_errno_ret) mpi_errno = mpi_errno_ret; else if (*errflag) MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail"); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Reduce_local_impl(void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op) { int mpi_errno = MPI_SUCCESS; MPID_Op *op_ptr; MPI_User_function *uop; #ifdef HAVE_CXX_BINDING int is_cxx_uop = 0; #endif #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT) int is_f77_uop = 0; #endif MPIU_THREADPRIV_DECL; if (count == 0) goto fn_exit; MPIU_THREADPRIV_GET; MPIU_THREADPRIV_FIELD(op_errno) = MPI_SUCCESS; if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { /* get the function by indexing into the op table */ uop = MPIR_Op_table[op%16 - 1]; } else { MPID_Op_get_ptr(op, op_ptr); #ifdef HAVE_CXX_BINDING if (op_ptr->language == MPID_LANG_CXX) { uop = (MPI_User_function *) op_ptr->function.c_function; is_cxx_uop = 1; } else #endif { if ((op_ptr->language == MPID_LANG_C)) { uop = (MPI_User_function *) op_ptr->function.c_function; } else { uop = (MPI_User_function *) op_ptr->function.f77_function; #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT) is_f77_uop = 1; #endif } } } /* actually perform the reduction */ #ifdef HAVE_CXX_BINDING if (is_cxx_uop) { (*MPIR_Process.cxx_call_op_fn)(inbuf, inoutbuf, count, datatype, uop); } else #endif { #if defined(HAVE_FORTRAN_BINDING) && !defined(HAVE_FINT_IS_INT) if (is_f77_uop) { MPI_Fint lcount = (MPI_Fint)count; MPI_Fint ldtype = (MPI_Fint)datatype; (*uop)(inbuf, inoutbuf, &lcount, &ldtype); } else { (*uop)(inbuf, inoutbuf, &count, &datatype); } #else (*uop)(inbuf, inoutbuf, &count, &datatype); #endif } /* --BEGIN ERROR HANDLING-- */ if (MPIU_THREADPRIV_FIELD(op_errno)) mpi_errno = MPIU_THREADPRIV_FIELD(op_errno); /* --END ERROR HANDLING-- */ fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Exscan ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag ) { MPI_Status status; int rank, comm_size; int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; int mask, dst, is_commutative, flag; MPI_Aint true_extent, true_lb, extent; void *partial_scan, *tmp_buf; MPI_User_function *uop; MPID_Op *op_ptr; MPI_Comm comm; MPIU_CHKLMEM_DECL(2); MPIU_THREADPRIV_DECL; #ifdef HAVE_CXX_BINDING int is_cxx_uop = 0; #endif if (count == 0) return MPI_SUCCESS; MPIU_THREADPRIV_GET; comm = comm_ptr->handle; comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* set op_errno to 0. stored in perthread structure */ MPIU_THREADPRIV_FIELD(op_errno) = 0; if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { is_commutative = 1; /* get the function by indexing into the op table */ uop = MPIR_Op_table[op%16 - 1]; } else { MPID_Op_get_ptr(op, op_ptr); if (op_ptr->kind == MPID_OP_USER_NONCOMMUTE) is_commutative = 0; else is_commutative = 1; #ifdef HAVE_CXX_BINDING if (op_ptr->language == MPID_LANG_CXX) { uop = (MPI_User_function *) op_ptr->function.c_function; is_cxx_uop = 1; } else #endif if ((op_ptr->language == MPID_LANG_C)) uop = (MPI_User_function *) op_ptr->function.c_function; else uop = (MPI_User_function *) op_ptr->function.f77_function; } /* need to allocate temporary buffer to store partial scan*/ MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); MPID_Datatype_get_extent_macro( datatype, extent ); MPIU_CHKLMEM_MALLOC(partial_scan, void *, (count*(MPIR_MAX(true_extent,extent))), mpi_errno, "partial_scan"); /* adjust for potential negative lower bound in datatype */ partial_scan = (void *)((char*)partial_scan - true_lb); /* need to allocate temporary buffer to store incoming data*/ MPIU_CHKLMEM_MALLOC(tmp_buf, void *, (count*(MPIR_MAX(true_extent,extent))), mpi_errno, "tmp_buf"); /* adjust for potential negative lower bound in datatype */ tmp_buf = (void *)((char*)tmp_buf - true_lb); mpi_errno = MPIR_Localcopy((sendbuf == MPI_IN_PLACE ? recvbuf : sendbuf), count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr ); flag = 0; mask = 0x1; while (mask < comm_size) { dst = rank ^ mask; if (dst < comm_size) { /* Send partial_scan to dst. Recv into tmp_buf */ mpi_errno = MPIC_Sendrecv_ft(partial_scan, count, datatype, dst, MPIR_EXSCAN_TAG, tmp_buf, count, datatype, dst, MPIR_EXSCAN_TAG, comm, &status, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } if (rank > dst) { call_uop(tmp_buf, partial_scan, count, datatype); /* On rank 0, recvbuf is not defined. For sendbuf==MPI_IN_PLACE recvbuf must not change (per MPI-2.2). On rank 1, recvbuf is to be set equal to the value in sendbuf on rank 0. On others, recvbuf is the scan of values in the sendbufs on lower ranks. */ if (rank != 0) { if (flag == 0) { /* simply copy data recd from rank 0 into recvbuf */ mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype, recvbuf, count, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); flag = 1; } else { call_uop(tmp_buf, recvbuf, count, datatype); } } } else { if (is_commutative) { call_uop(tmp_buf, partial_scan, count, datatype); } else { call_uop(partial_scan, tmp_buf, count, datatype); mpi_errno = MPIR_Localcopy(tmp_buf, count, datatype, partial_scan, count, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } } } mask <<= 1; } /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr ); if (MPIU_THREADPRIV_FIELD(op_errno)) mpi_errno = MPIU_THREADPRIV_FIELD(op_errno); fn_exit: MPIU_CHKLMEM_FREEALL(); if (mpi_errno_ret) mpi_errno = mpi_errno_ret; else if (*errflag) MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail"); return mpi_errno; fn_fail: goto fn_exit; }
/* not declared static because a machine-specific function may call this one in some cases */ int MPIR_Reduce_scatter_block_intra ( const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPID_Comm *comm_ptr, int *errflag ) { int rank, comm_size, i; MPI_Aint extent, true_extent, true_lb; int *disps; void *tmp_recvbuf, *tmp_results; int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; int type_size, dis[2], blklens[2], total_count, nbytes, src, dst; int mask, dst_tree_root, my_tree_root, j, k; int *newcnts, *newdisps, rem, newdst, send_idx, recv_idx, last_idx, send_cnt, recv_cnt; int pof2, old_i, newrank, received; MPI_Datatype sendtype, recvtype; int nprocs_completed, tmp_mask, tree_root, is_commutative; MPID_Op *op_ptr; MPI_Comm comm; MPIU_THREADPRIV_DECL; MPIU_CHKLMEM_DECL(5); comm = comm_ptr->handle; comm_size = comm_ptr->local_size; rank = comm_ptr->rank; /* set op_errno to 0. stored in perthread structure */ MPIU_THREADPRIV_GET; MPIU_THREADPRIV_FIELD(op_errno) = 0; if (recvcount == 0) { goto fn_exit; } MPID_Datatype_get_extent_macro(datatype, extent); MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { is_commutative = 1; } else { MPID_Op_get_ptr(op, op_ptr); if (op_ptr->kind == MPID_OP_USER_NONCOMMUTE) is_commutative = 0; else is_commutative = 1; } MPIU_CHKLMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps"); total_count = comm_size*recvcount; for (i=0; i<comm_size; i++) { disps[i] = i*recvcount; } MPID_Datatype_get_size_macro(datatype, type_size); nbytes = total_count * type_size; /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr ); /* total_count*extent eventually gets malloced. it isn't added to * a user-passed in buffer */ MPID_Ensure_Aint_fits_in_pointer(total_count * MPIR_MAX(true_extent, extent)); if ((is_commutative) && (nbytes < MPIR_PARAM_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) { /* commutative and short. use recursive halving algorithm */ /* allocate temp. buffer to receive incoming data */ MPIU_CHKLMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPIR_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf"); /* adjust for potential negative lower bound in datatype */ tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb); /* need to allocate another temporary buffer to accumulate results because recvbuf may not be big enough */ MPIU_CHKLMEM_MALLOC(tmp_results, void *, total_count*(MPIR_MAX(true_extent,extent)), mpi_errno, "tmp_results"); /* adjust for potential negative lower bound in datatype */ tmp_results = (void *)((char*)tmp_results - true_lb); /* copy sendbuf into tmp_results */ if (sendbuf != MPI_IN_PLACE) mpi_errno = MPIR_Localcopy(sendbuf, total_count, datatype, tmp_results, total_count, datatype); else mpi_errno = MPIR_Localcopy(recvbuf, total_count, datatype, tmp_results, total_count, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); pof2 = 1; while (pof2 <= comm_size) pof2 <<= 1; pof2 >>=1; rem = comm_size - pof2; /* In the non-power-of-two case, all even-numbered processes of rank < 2*rem send their data to (rank+1). These even-numbered processes no longer participate in the algorithm until the very end. The remaining processes form a nice power-of-two. */ if (rank < 2*rem) { if (rank % 2 == 0) { /* even */ mpi_errno = MPIC_Send_ft(tmp_results, total_count, datatype, rank+1, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } /* temporarily set the rank to -1 so that this process does not pariticipate in recursive doubling */ newrank = -1; } else { /* odd */ mpi_errno = MPIC_Recv_ft(tmp_recvbuf, total_count, datatype, rank-1, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, MPI_STATUS_IGNORE, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } /* do the reduction on received data. since the ordering is right, it doesn't matter whether the operation is commutative or not. */ mpi_errno = MPIR_Reduce_local_impl( tmp_recvbuf, tmp_results, total_count, datatype, op); /* change the rank */ newrank = rank / 2; } } else /* rank >= 2*rem */ newrank = rank - rem; if (newrank != -1) { /* recalculate the recvcnts and disps arrays because the even-numbered processes who no longer participate will have their result calculated by the process to their right (rank+1). */ MPIU_CHKLMEM_MALLOC(newcnts, int *, pof2*sizeof(int), mpi_errno, "newcnts"); MPIU_CHKLMEM_MALLOC(newdisps, int *, pof2*sizeof(int), mpi_errno, "newdisps"); for (i=0; i<pof2; i++) { /* what does i map to in the old ranking? */ old_i = (i < rem) ? i*2 + 1 : i + rem; if (old_i < 2*rem) { /* This process has to also do its left neighbor's work */ newcnts[i] = 2 * recvcount; } else newcnts[i] = recvcount; } newdisps[0] = 0; for (i=1; i<pof2; i++) newdisps[i] = newdisps[i-1] + newcnts[i-1]; mask = pof2 >> 1; send_idx = recv_idx = 0; last_idx = pof2; while (mask > 0) { newdst = newrank ^ mask; /* find real rank of dest */ dst = (newdst < rem) ? newdst*2 + 1 : newdst + rem; send_cnt = recv_cnt = 0; if (newrank < newdst) { send_idx = recv_idx + mask; for (i=send_idx; i<last_idx; i++) send_cnt += newcnts[i]; for (i=recv_idx; i<send_idx; i++) recv_cnt += newcnts[i]; } else { recv_idx = send_idx + mask; for (i=send_idx; i<recv_idx; i++) send_cnt += newcnts[i]; for (i=recv_idx; i<last_idx; i++) recv_cnt += newcnts[i]; } /* printf("Rank %d, send_idx %d, recv_idx %d, send_cnt %d, recv_cnt %d, last_idx %d\n", newrank, send_idx, recv_idx, send_cnt, recv_cnt, last_idx); */ /* Send data from tmp_results. Recv into tmp_recvbuf */ if ((send_cnt != 0) && (recv_cnt != 0)) mpi_errno = MPIC_Sendrecv_ft((char *) tmp_results + newdisps[send_idx]*extent, send_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, (char *) tmp_recvbuf + newdisps[recv_idx]*extent, recv_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, MPI_STATUS_IGNORE, errflag); else if ((send_cnt == 0) && (recv_cnt != 0)) mpi_errno = MPIC_Recv_ft((char *) tmp_recvbuf + newdisps[recv_idx]*extent, recv_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, MPI_STATUS_IGNORE, errflag); else if ((recv_cnt == 0) && (send_cnt != 0)) mpi_errno = MPIC_Send_ft((char *) tmp_results + newdisps[send_idx]*extent, send_cnt, datatype, dst, MPIR_REDUCE_SCATTER_BLOCK_TAG, comm, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = TRUE; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } /* tmp_recvbuf contains data received in this step. tmp_results contains data accumulated so far */ if (recv_cnt) { mpi_errno = MPIR_Reduce_local_impl( (char *) tmp_recvbuf + newdisps[recv_idx]*extent, (char *) tmp_results + newdisps[recv_idx]*extent, recv_cnt, datatype, op); } /* update send_idx for next iteration */ send_idx = recv_idx; last_idx = recv_idx + mask; mask >>= 1; } /* copy this process's result from tmp_results to recvbuf */ mpi_errno = MPIR_Localcopy((char *)tmp_results + disps[rank]*extent, recvcount, datatype, recvbuf, recvcount, datatype); if (mpi_errno) MPIU_ERR_POP(mpi_errno); }