/* * Finalize module on the communicator */ int ompi_coll_tuned_module_finalize(struct ompi_communicator_t *comm) { if (NULL == comm->c_coll_selected_module) { return OMPI_SUCCESS; } #if OMPI_ENABLE_DEBUG /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing the generel c_coll_selected_data */ comm->c_coll_selected_data->mcct_reqs = NULL; comm->c_coll_selected_data->mcct_num_reqs = 0; #endif /* free any cached information that has been allocated */ if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree); } if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree); } if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree); } if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_chain); } if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */ ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_pipeline); } /* if any algorithm rules are cached on the communicator, only free them if its MCW */ /* as this is the only place they are allocated by reading the decision configure file */ if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) { if (comm->c_coll_selected_data->all_base_rules) { ompi_coll_tuned_free_all_rules (comm->c_coll_selected_data->all_base_rules, COLLCOUNT); } } /* if allocated memory free it */ if (comm->c_coll_selected_data) { free(comm->c_coll_selected_data); comm->c_coll_selected_data = NULL; } return OMPI_SUCCESS; }
static void mca_coll_tuned_module_destruct(mca_coll_tuned_module_t *module) { mca_coll_tuned_comm_t *data; /* Free the space in the data mpool and the data hanging off the communicator */ data = module->tuned_data; if (NULL != data) { #if OPAL_ENABLE_DEBUG /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing the generel c_coll_selected_data */ data->mcct_reqs = NULL; data->mcct_num_reqs = 0; #endif /* free any cached information that has been allocated */ if (data->cached_ntree) { /* destroy general tree if defined */ ompi_coll_tuned_topo_destroy_tree (&data->cached_ntree); } if (data->cached_bintree) { /* destroy bintree if defined */ ompi_coll_tuned_topo_destroy_tree (&data->cached_bintree); } if (data->cached_bmtree) { /* destroy bmtree if defined */ ompi_coll_tuned_topo_destroy_tree (&data->cached_bmtree); } if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */ ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bmtree); } if (data->cached_chain) { /* destroy general chain if defined */ ompi_coll_tuned_topo_destroy_tree (&data->cached_chain); } if (data->cached_pipeline) { /* destroy pipeline if defined */ ompi_coll_tuned_topo_destroy_tree (&data->cached_pipeline); } if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */ ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree); } free(data); } }
int Coll_gather_ompi_binomial::gather(void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, int root, MPI_Comm comm) { int line = -1; int i; int rank; int vrank; int size; int total_recv = 0; char *ptmp = NULL; char *tempbuf = NULL; int err; ompi_coll_tree_t* bmtree; MPI_Status status; MPI_Aint sextent, slb, strue_lb, strue_extent; MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; size = comm->size(); rank = comm->rank(); XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d", rank); /* create the binomial tree */ // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root ); bmtree = ompi_coll_tuned_topo_build_in_order_bmtree(comm, root); // data->cached_in_order_bmtree; sdtype->extent(&slb, &sextent); sdtype->extent(&strue_lb, &strue_extent); vrank = (rank - root + size) % size; if (rank == root) { rdtype->extent(&rlb, &rextent); rdtype->extent(&rtrue_lb, &rtrue_extent); if (0 == root) { /* root on 0, just use the recv buffer */ ptmp = (char*)rbuf; if (sbuf != MPI_IN_PLACE) { err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } } else { /* root is not on 0, allocate temp buffer for recv, * rotate data at the end */ tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); if (NULL == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl; } ptmp = tempbuf - rlb; if (sbuf != MPI_IN_PLACE) { /* copy from sbuf to temp buffer */ err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } else { /* copy from rbuf to temp buffer */ err = Datatype::copy((char*)rbuf + rank * rextent * rcount, rcount, rdtype, ptmp, rcount, rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } } } total_recv = rcount; } else if (!(vrank % 2)) { /* other non-leaf nodes, allocate temp buffer for data received from * children, the most we need is half of the total data elements due * to the property of binimoal tree */ tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); if (NULL == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl; } ptmp = tempbuf - slb; /* local copy to tempbuf */ err = Datatype::copy(sbuf, scount, sdtype, ptmp, scount, sdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } /* use sdtype,scount as rdtype,rdcount since they are ignored on * non-root procs */ rdtype = sdtype; rcount = scount; rextent = sextent; total_recv = rcount; } else { /* leaf nodes, no temp buffer needed, use sdtype,scount as * rdtype,rdcount since they are ignored on non-root procs */ ptmp = (char*)sbuf; total_recv = scount; } if (!(vrank % 2)) { /* all non-leaf nodes recv from children */ for (i = 0; i < bmtree->tree_nextsize; i++) { int mycount = 0, vkid; /* figure out how much data I have to send to this child */ vkid = (bmtree->tree_next[i] - root + size) % size; mycount = vkid - vrank; if (mycount > (size - vkid)) mycount = size - vkid; mycount *= rcount; XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d recv %d mycount = %d", rank, bmtree->tree_next[i], mycount); Request::recv(ptmp + total_recv * rextent, mycount, rdtype, bmtree->tree_next[i], COLL_TAG_GATHER, comm, &status); total_recv += mycount; } } if (rank != root) { /* all nodes except root send to parents */ XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", rank, bmtree->tree_prev, total_recv); Request::send(ptmp, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm); } if (rank == root) { if (root != 0) { /* rotate received data on root if root != 0 */ err = Datatype::copy(ptmp, rcount * (size - root), rdtype, (char*)rbuf + rextent * root * rcount, rcount * (size - root), rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } err = Datatype::copy(ptmp + rextent * rcount * (size - root), rcount * root, rdtype, (char*)rbuf, rcount * root, rdtype); if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } smpi_free_tmp_buffer(tempbuf); } } else if (!(vrank % 2)) { /* other non-leaf nodes */ smpi_free_tmp_buffer(tempbuf); } ompi_coll_tuned_topo_destroy_tree(&bmtree); return MPI_SUCCESS; err_hndl: if (NULL != tempbuf) smpi_free_tmp_buffer(tempbuf); XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank); return err; }