/*@ MPI_Reduce - Reduces values on all processes to a single value Input Parameters: + sendbuf - address of send buffer (choice) . count - number of elements in send buffer (integer) . datatype - data type of elements of send buffer (handle) . op - reduce operation (handle) . root - rank of root process (integer) - comm - communicator (handle) Output Parameters: . recvbuf - address of receive buffer (choice, significant only at 'root') .N ThreadSafe .N Fortran .N collops .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_COUNT .N MPI_ERR_TYPE .N MPI_ERR_BUFFER .N MPI_ERR_BUFFER_ALIAS @*/ int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { int mpi_errno = MPI_SUCCESS; MPIR_Comm *comm_ptr = NULL; MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_REDUCE); MPIR_ERRTEST_INITIALIZED_ORDIE(); MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_REDUCE); /* Validate parameters, especially handles needing to be converted */ # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { MPIR_ERRTEST_COMM(comm, mpi_errno); } MPID_END_ERROR_CHECKS; } # endif /* HAVE_ERROR_CHECKING */ /* Convert MPI object handles to object pointers */ MPIR_Comm_get_ptr( comm, comm_ptr ); /* Validate parameters and objects (post conversion) */ # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { MPIR_Datatype *datatype_ptr = NULL; MPIR_Op *op_ptr = NULL; int rank; MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, FALSE ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) { MPIR_ERRTEST_INTRA_ROOT(comm_ptr, root, mpi_errno); MPIR_ERRTEST_COUNT(count, mpi_errno); MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPIR_Datatype_get_ptr(datatype, datatype_ptr); MPIR_Datatype_valid_ptr( datatype_ptr, mpi_errno ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; MPIR_Datatype_committed_ptr( datatype_ptr, mpi_errno ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (sendbuf != MPI_IN_PLACE) MPIR_ERRTEST_USERBUFFER(sendbuf,count,datatype,mpi_errno); rank = comm_ptr->rank; if (rank == root) { MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, count, mpi_errno); MPIR_ERRTEST_USERBUFFER(recvbuf,count,datatype,mpi_errno); if (count != 0 && sendbuf != MPI_IN_PLACE) { MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno); } } else MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, count, mpi_errno); } if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) { MPIR_ERRTEST_INTER_ROOT(comm_ptr, root, mpi_errno); if (root == MPI_ROOT) { MPIR_ERRTEST_COUNT(count, mpi_errno); MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPIR_Datatype_get_ptr(datatype, datatype_ptr); MPIR_Datatype_valid_ptr( datatype_ptr, mpi_errno ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; MPIR_Datatype_committed_ptr( datatype_ptr, mpi_errno ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, count, mpi_errno); MPIR_ERRTEST_USERBUFFER(recvbuf,count,datatype,mpi_errno); } else if (root != MPI_PROC_NULL) { MPIR_ERRTEST_COUNT(count, mpi_errno); MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPIR_Datatype_get_ptr(datatype, datatype_ptr); MPIR_Datatype_valid_ptr( datatype_ptr, mpi_errno ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; MPIR_Datatype_committed_ptr( datatype_ptr, mpi_errno ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, count, mpi_errno); MPIR_ERRTEST_USERBUFFER(sendbuf,count,datatype,mpi_errno); } } MPIR_ERRTEST_OP(op, mpi_errno); if (mpi_errno != MPI_SUCCESS) goto fn_fail; if (HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN) { MPIR_Op_get_ptr(op, op_ptr); MPIR_Op_valid_ptr( op_ptr, mpi_errno ); } if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) { mpi_errno = ( * MPIR_OP_HDL_TO_DTYPE_FN(op) )(datatype); } if (mpi_errno != MPI_SUCCESS) goto fn_fail; } MPID_END_ERROR_CHECKS; } # endif /* HAVE_ERROR_CHECKING */ /* ... body of routine ... */ mpi_errno = MPIR_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* ... end of body of routine ... */ fn_exit: MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_REDUCE); MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ # ifdef HAVE_ERROR_CHECKING { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_reduce", "**mpi_reduce %p %p %d %D %O %d %C", sendbuf, recvbuf, count, datatype, op, root, comm); } # endif mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno ); goto fn_exit; /* --END ERROR HANDLING-- */ }
int MPIDO_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPID_Comm *comm_ptr, int *mpierrno) { #ifndef HAVE_PAMI_IN_PLACE if (sendbuf == MPI_IN_PLACE) { MPID_Abort (NULL, 0, 1, "'MPI_IN_PLACE' requries support for `PAMI_IN_PLACE`"); return -1; } #endif MPID_Datatype *dt_null = NULL; MPI_Aint true_lb = 0; int dt_contig ATTRIBUTE((unused)), tsize; int mu; char *sbuf, *rbuf; pami_data_function pop; pami_type_t pdt; int rc; int alg_selected = 0; const int rank = comm_ptr->rank; #if ASSERT_LEVEL==0 /* We can't afford the tracing in ndebug/performance libraries */ const unsigned verbose = 0; #else const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0); #endif const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid); const int selected_type = mpid->user_selected_type[PAMI_XFER_REDUCE]; rc = MPIDI_Datatype_to_pami(datatype, &pdt, op, &pop, &mu); if(unlikely(verbose)) fprintf(stderr,"reduce - rc %u, root %u, count %d, dt: %p, op: %p, mu: %u, selectedvar %u != %u (MPICH) sendbuf %p, recvbuf %p\n", rc, root, count, pdt, pop, mu, (unsigned)selected_type, MPID_COLL_USE_MPICH,sendbuf, recvbuf); pami_xfer_t reduce; pami_algorithm_t my_reduce=0; const pami_metadata_t *my_md = (pami_metadata_t *)NULL; int queryreq = 0; volatile unsigned reduce_active = 1; MPIDI_Datatype_get_info(count, datatype, dt_contig, tsize, dt_null, true_lb); rbuf = (char *)recvbuf + true_lb; sbuf = (char *)sendbuf + true_lb; if(sendbuf == MPI_IN_PLACE) { if(unlikely(verbose)) fprintf(stderr,"reduce MPI_IN_PLACE send buffering (%d,%d)\n",count,tsize); sbuf = PAMI_IN_PLACE; } reduce.cb_done = reduce_cb_done; reduce.cookie = (void *)&reduce_active; if(mpid->optreduce) /* GLUE_ALLREDUCE */ { char* tbuf = NULL; if(unlikely(verbose)) fprintf(stderr,"Using protocol GLUE_ALLREDUCE for reduce (%d,%d)\n",count,tsize); MPIDI_Update_last_algorithm(comm_ptr, "REDUCE_OPT_ALLREDUCE"); void *destbuf = recvbuf; if(rank != root) /* temp buffer for non-root destbuf */ { tbuf = destbuf = MPL_malloc(tsize); } /* Switch to comm->coll_fns->fn() */ MPIDO_Allreduce(sendbuf, destbuf, count, datatype, op, comm_ptr, mpierrno); if(tbuf) MPL_free(tbuf); return 0; } if(selected_type == MPID_COLL_USE_MPICH || rc != MPI_SUCCESS) { if(unlikely(verbose)) fprintf(stderr,"Using MPICH reduce algorithm\n"); #if CUDA_AWARE_SUPPORT if(MPIDI_Process.cuda_aware_support_on) { MPI_Aint dt_extent; MPID_Datatype_get_extent_macro(datatype, dt_extent); char *scbuf = NULL; char *rcbuf = NULL; int is_send_dev_buf = MPIDI_cuda_is_device_buf(sendbuf); int is_recv_dev_buf = MPIDI_cuda_is_device_buf(recvbuf); if(is_send_dev_buf) { scbuf = MPL_malloc(dt_extent * count); cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost); if (cudaSuccess != cudaerr) fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr)); } else scbuf = sendbuf; if(is_recv_dev_buf) { rcbuf = MPL_malloc(dt_extent * count); if(sendbuf == MPI_IN_PLACE) { cudaError_t cudaerr = CudaMemcpy(rcbuf, recvbuf, dt_extent * count, cudaMemcpyDeviceToHost); if (cudaSuccess != cudaerr) fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr)); } else memset(rcbuf, 0, dt_extent * count); } else rcbuf = recvbuf; int cuda_res = MPIR_Reduce(scbuf, rcbuf, count, datatype, op, root, comm_ptr, mpierrno); if(is_send_dev_buf)MPL_free(scbuf); if(is_recv_dev_buf) { cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice); if (cudaSuccess != cudaerr) fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr)); MPL_free(rcbuf); } return cuda_res; } else #endif return MPIR_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, mpierrno); } if(selected_type == MPID_COLL_OPTIMIZED) { if((mpid->cutoff_size[PAMI_XFER_REDUCE][0] == 0) || (mpid->cutoff_size[PAMI_XFER_REDUCE][0] >= tsize && mpid->cutoff_size[PAMI_XFER_REDUCE][0] > 0)) { TRACE_ERR("Optimized Reduce (%s) was pre-selected\n", mpid->opt_protocol_md[PAMI_XFER_REDUCE][0].name); my_reduce = mpid->opt_protocol[PAMI_XFER_REDUCE][0]; my_md = &mpid->opt_protocol_md[PAMI_XFER_REDUCE][0]; queryreq = mpid->must_query[PAMI_XFER_REDUCE][0]; } } else { TRACE_ERR("Optimized reduce (%s) was specified by user\n", mpid->user_metadata[PAMI_XFER_REDUCE].name); my_reduce = mpid->user_selected[PAMI_XFER_REDUCE]; my_md = &mpid->user_metadata[PAMI_XFER_REDUCE]; queryreq = selected_type; } reduce.algorithm = my_reduce; reduce.cmd.xfer_reduce.sndbuf = sbuf; reduce.cmd.xfer_reduce.rcvbuf = rbuf; reduce.cmd.xfer_reduce.stype = pdt; reduce.cmd.xfer_reduce.rtype = pdt; reduce.cmd.xfer_reduce.stypecount = count; reduce.cmd.xfer_reduce.rtypecount = count; reduce.cmd.xfer_reduce.op = pop; reduce.cmd.xfer_reduce.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0); if(unlikely(queryreq == MPID_COLL_ALWAYS_QUERY || queryreq == MPID_COLL_CHECK_FN_REQUIRED)) { metadata_result_t result = {0}; TRACE_ERR("Querying reduce protocol %s, type was %d\n", my_md->name, queryreq); if(my_md->check_fn == NULL) { /* process metadata bits */ if((!my_md->check_correct.values.inplace) && (sendbuf == MPI_IN_PLACE)) result.check.unspecified = 1; if(my_md->check_correct.values.rangeminmax) { MPI_Aint data_true_lb ATTRIBUTE((unused)); MPID_Datatype *data_ptr; int data_size, data_contig ATTRIBUTE((unused)); MPIDI_Datatype_get_info(count, datatype, data_contig, data_size, data_ptr, data_true_lb); if((my_md->range_lo <= data_size) && (my_md->range_hi >= data_size)) ; /* ok, algorithm selected */ else { result.check.range = 1; if(unlikely(verbose)) { fprintf(stderr,"message size (%u) outside range (%zu<->%zu) for %s.\n", data_size, my_md->range_lo, my_md->range_hi, my_md->name); } } } }