int ompi_coll_base_reduce_intra_binomial( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { int segcount = count; size_t typelng; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binomial rank %d ss %5d", ompi_comm_rank(comm), segsize)); COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root ); /** * Determine number of segments and number of elements * sent per operation */ ompi_datatype_type_size( datatype, &typelng ); COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm, module, data->cached_in_order_bmtree, segcount, max_outstanding_reqs ); }
/* * reduce_intra_in_order_binary * * Function: Logarithmic reduce operation for non-commutative operations. * Acecpts: same as MPI_Reduce() * Returns: MPI_SUCCESS or error code */ int ompi_coll_base_reduce_intra_in_order_binary( const void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { int ret, rank, size, io_root, segcount = count; void *use_this_sendbuf = NULL; void *use_this_recvbuf = NULL; char *tmpbuf_free = NULL; size_t typelng; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; rank = ompi_comm_rank(comm); size = ompi_comm_size(comm); OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_in_order_binary rank %d ss %5d", rank, segsize)); COLL_BASE_UPDATE_IN_ORDER_BINTREE( comm, base_module ); /** * Determine number of segments and number of elements * sent per operation */ ompi_datatype_type_size( datatype, &typelng ); COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount ); /* An in-order binary tree must use root (size-1) to preserve the order of operations. Thus, if root is not rank (size - 1), then we must handle 1. MPI_IN_PLACE option on real root, and 2. we must allocate temporary recvbuf on rank (size - 1). Note that generic function must be careful not to switch order of operations for non-commutative ops. */ io_root = size - 1; use_this_sendbuf = (void *)sendbuf; use_this_recvbuf = recvbuf; if (io_root != root) { ptrdiff_t dsize, gap = 0; char *tmpbuf; dsize = opal_datatype_span(&datatype->super, count, &gap); if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { tmpbuf_free = (char *) malloc(dsize); if (NULL == tmpbuf_free) { return MPI_ERR_INTERN; } tmpbuf = tmpbuf_free - gap; ompi_datatype_copy_content_same_ddt(datatype, count, (char*)tmpbuf, (char*)recvbuf); use_this_sendbuf = tmpbuf; } else if (io_root == rank) { tmpbuf_free = (char *) malloc(dsize); if (NULL == tmpbuf_free) { return MPI_ERR_INTERN; } tmpbuf = tmpbuf_free - gap; use_this_recvbuf = tmpbuf; } } /* Use generic reduce with in-order binary tree topology and io_root */ ret = ompi_coll_base_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype, op, io_root, comm, module, data->cached_in_order_bintree, segcount, max_outstanding_reqs ); if (MPI_SUCCESS != ret) { return ret; } /* Clean up */ if (io_root != root) { if (root == rank) { /* Receive result from rank io_root to recvbuf */ ret = MCA_PML_CALL(recv(recvbuf, count, datatype, io_root, MCA_COLL_BASE_TAG_REDUCE, comm, MPI_STATUS_IGNORE)); if (MPI_SUCCESS != ret) { return ret; } } else if (io_root == rank) { /* Send result from use_this_recvbuf to root */ ret = MCA_PML_CALL(send(use_this_recvbuf, count, datatype, root, MCA_COLL_BASE_TAG_REDUCE, MCA_PML_BASE_SEND_STANDARD, comm)); if (MPI_SUCCESS != ret) { return ret; } } } if (NULL != tmpbuf_free) { free(tmpbuf_free); } return MPI_SUCCESS; }
/* * gather_intra_linear_sync * * Function: - synchronized gather operation with * Accepts: - same arguments as MPI_Gather(), first segment size * Returns: - MPI_SUCCESS or error code */ int ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module, int first_segment_size) { int i, ret, line, rank, size, first_segment_count; ompi_request_t **reqs = NULL; MPI_Aint extent, lb; size_t typelng; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "ompi_coll_base_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size)); if (rank != root) { /* Non-root processes: - receive zero byte message from the root, - send the first segment of the data synchronously, - send the second segment of the data. */ ompi_datatype_type_size(sdtype, &typelng); ompi_datatype_get_extent(sdtype, &lb, &extent); first_segment_count = scount; COLL_BASE_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, first_segment_count ); ret = MCA_PML_CALL(recv(sbuf, 0, MPI_BYTE, root, MCA_COLL_BASE_TAG_GATHER, comm, MPI_STATUS_IGNORE)); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } ret = MCA_PML_CALL(send(sbuf, first_segment_count, sdtype, root, MCA_COLL_BASE_TAG_GATHER, MCA_PML_BASE_SEND_STANDARD, comm)); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } ret = MCA_PML_CALL(send((char*)sbuf + extent * first_segment_count, (scount - first_segment_count), sdtype, root, MCA_COLL_BASE_TAG_GATHER, MCA_PML_BASE_SEND_STANDARD, comm)); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } else { /* Root process, - For every non-root node: - post irecv for the first segment of the message - send zero byte message to signal node to send the message - post irecv for the second segment of the message - wait for the first segment to complete - Copy local data if necessary - Waitall for all the second segments to complete. */ char *ptmp; ompi_request_t *first_segment_req; reqs = (ompi_request_t**) calloc(size, sizeof(ompi_request_t*)); if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; } ompi_datatype_type_size(rdtype, &typelng); ompi_datatype_get_extent(rdtype, &lb, &extent); first_segment_count = rcount; COLL_BASE_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, first_segment_count ); ptmp = (char *) rbuf; for (i = 0; i < size; ++i) { if (i == rank) { /* skip myself */ reqs[i] = MPI_REQUEST_NULL; continue; } /* irecv for the first segment from i */ ptmp = (char*)rbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * extent; ret = MCA_PML_CALL(irecv(ptmp, first_segment_count, rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm, &first_segment_req)); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } /* send sync message */ ret = MCA_PML_CALL(send(rbuf, 0, MPI_BYTE, i, MCA_COLL_BASE_TAG_GATHER, MCA_PML_BASE_SEND_STANDARD, comm)); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } /* irecv for the second segment */ ptmp = (char*)rbuf + ((ptrdiff_t)i * (ptrdiff_t)rcount + first_segment_count) * extent; ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count), rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm, &reqs[i])); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } /* wait on the first segment to complete */ ret = ompi_request_wait(&first_segment_req, MPI_STATUS_IGNORE); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } /* copy local data if necessary */ if (MPI_IN_PLACE != sbuf) { ret = ompi_datatype_sndrcv((void *)sbuf, scount, sdtype, (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent, rcount, rdtype); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } /* wait all second segments to complete */ ret = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } free(reqs); } /* All done */ return MPI_SUCCESS; error_hndl: if (NULL != reqs) { free(reqs); } OPAL_OUTPUT (( ompi_coll_base_framework.framework_output, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret )); return ret; }