int MPIR_Intercomm_merge_impl(MPID_Comm *comm_ptr, int high, MPID_Comm **new_intracomm_ptr) { int mpi_errno = MPI_SUCCESS; int local_high, remote_high, new_size; MPIU_Context_id_t new_context_id; MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_INTERCOMM_MERGE_IMPL); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_INTERCOMM_MERGE_IMPL); /* Make sure that we have a local intercommunicator */ if (!comm_ptr->local_comm) { /* Manufacture the local communicator */ mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* Find the "high" value of the other group of processes. This will be used to determine which group is ordered first in the generated communicator. high is logical */ local_high = high; if (comm_ptr->rank == 0) { /* This routine allows use to use the collective communication context rather than the point-to-point context. */ mpi_errno = MPIC_Sendrecv( &local_high, 1, MPI_INT, 0, 0, &remote_high, 1, MPI_INT, 0, 0, comm_ptr, MPI_STATUS_IGNORE, &errflag ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* If local_high and remote_high are the same, then order is arbitrary. we use the gpids of the rank 0 member of the local and remote groups to choose an order in this case. */ if (local_high == remote_high) { MPID_Gpid ingpid, outgpid; mpi_errno = MPID_GPID_Get( comm_ptr, 0, &ingpid ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIC_Sendrecv( &ingpid, sizeof(MPID_Gpid), MPI_BYTE, 0, 1, &outgpid, sizeof(MPID_Gpid), MPI_BYTE, 0, 1, comm_ptr, MPI_STATUS_IGNORE, &errflag ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Note that the gpids cannot be the same because we are starting from a valid intercomm */ int rc = memcmp(&ingpid,&outgpid,sizeof(MPID_Gpid)); if(rc < 0) local_high = 1; else if(rc > 0) local_high = 0; else { /* req#3930: The merge algorithm will deadlock if the gpids are inadvertently the same due to implementation bugs in the MPID_GPID_Get() function */ MPIU_Assert(rc != 0); } } } /* All processes in the local group now need to get the value of local_high, which may have changed if both groups of processes had the same value for high */ mpi_errno = MPIR_Bcast_impl( &local_high, 1, MPI_INT, 0, comm_ptr->local_comm, &errflag ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); mpi_errno = MPIR_Comm_create( new_intracomm_ptr ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); new_size = comm_ptr->local_size + comm_ptr->remote_size; /* FIXME: For the intracomm, we need a consistent context id. That means that one of the two groups needs to use the recvcontext_id and the other must use the context_id */ if (local_high) { (*new_intracomm_ptr)->context_id = comm_ptr->recvcontext_id + 2; /* See below */ } else { (*new_intracomm_ptr)->context_id = comm_ptr->context_id + 2; /* See below */ } (*new_intracomm_ptr)->recvcontext_id = (*new_intracomm_ptr)->context_id; (*new_intracomm_ptr)->remote_size = (*new_intracomm_ptr)->local_size = new_size; (*new_intracomm_ptr)->rank = -1; (*new_intracomm_ptr)->comm_kind = MPID_INTRACOMM; /* Now we know which group comes first. Build the new mapping from the existing comm */ mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* We've setup a temporary context id, based on the context id used by the intercomm. This allows us to perform the allreduce operations within the context id algorithm, since we already have a valid (almost - see comm_create_hook) communicator. */ mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* printf( "About to get context id \n" ); fflush( stdout ); */ /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the calling routine already holds the single criticial section */ new_context_id = 0; mpi_errno = MPIR_Get_contextid_sparse( (*new_intracomm_ptr), &new_context_id, FALSE ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIU_Assert(new_context_id != 0); /* We release this communicator that was involved just to * get valid context id and create true one */ mpi_errno = MPIR_Comm_release(*new_intracomm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Comm_create( new_intracomm_ptr ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); (*new_intracomm_ptr)->remote_size = (*new_intracomm_ptr)->local_size = new_size; (*new_intracomm_ptr)->rank = -1; (*new_intracomm_ptr)->comm_kind = MPID_INTRACOMM; (*new_intracomm_ptr)->context_id = new_context_id; (*new_intracomm_ptr)->recvcontext_id = new_context_id; mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_INTERCOMM_MERGE_IMPL); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Intercomm_merge_impl(MPIR_Comm * comm_ptr, int high, MPIR_Comm ** new_intracomm_ptr) { int mpi_errno = MPI_SUCCESS; int local_high, remote_high, new_size; MPIR_Context_id_t new_context_id; MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL); /* Make sure that we have a local intercommunicator */ if (!comm_ptr->local_comm) { /* Manufacture the local communicator */ mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* Find the "high" value of the other group of processes. This * will be used to determine which group is ordered first in * the generated communicator. high is logical */ local_high = high; if (comm_ptr->rank == 0) { /* This routine allows use to use the collective communication * context rather than the point-to-point context. */ mpi_errno = MPIC_Sendrecv(&local_high, 1, MPI_INT, 0, 0, &remote_high, 1, MPI_INT, 0, 0, comm_ptr, MPI_STATUS_IGNORE, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* If local_high and remote_high are the same, then order is arbitrary. * we use the is_low_group in the intercomm in this case. */ MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "local_high=%d remote_high=%d is_low_group=%d", local_high, remote_high, comm_ptr->is_low_group)); if (local_high == remote_high) { local_high = !(comm_ptr->is_low_group); } } /* * All processes in the local group now need to get the * value of local_high, which may have changed if both groups * of processes had the same value for high */ mpi_errno = MPIR_Bcast(&local_high, 1, MPI_INT, 0, comm_ptr->local_comm, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); /* * For the intracomm, we need a consistent context id. * That means that one of the two groups needs to use * the recvcontext_id and the other must use the context_id * The recvcontext_id is unique on each process, but another * communicator may use the context_id. Therefore, we do a small hack. * We set both flags indicating a sub-communicator (intra-node and * inter-node) to one. This is normally not possible (a communicator * is either intra- or inter-node) - which makes this context_id unique. * */ new_size = comm_ptr->local_size + comm_ptr->remote_size; mpi_errno = MPIR_Comm_create(new_intracomm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (local_high) { (*new_intracomm_ptr)->context_id = MPIR_CONTEXT_SET_FIELD(SUBCOMM, comm_ptr->recvcontext_id, 3); } else { (*new_intracomm_ptr)->context_id = MPIR_CONTEXT_SET_FIELD(SUBCOMM, comm_ptr->context_id, 3); } (*new_intracomm_ptr)->recvcontext_id = (*new_intracomm_ptr)->context_id; (*new_intracomm_ptr)->remote_size = (*new_intracomm_ptr)->local_size = new_size; (*new_intracomm_ptr)->pof2 = MPL_pof2(new_size); (*new_intracomm_ptr)->rank = -1; (*new_intracomm_ptr)->comm_kind = MPIR_COMM_KIND__INTRACOMM; /* Now we know which group comes first. Build the new mapping * from the existing comm */ mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* We've setup a temporary context id, based on the context id * used by the intercomm. This allows us to perform the allreduce * operations within the context id algorithm, since we already * have a valid (almost - see comm_create_hook) communicator. */ mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* printf("About to get context id \n"); fflush(stdout); */ /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the * calling routine already holds the single criticial section */ new_context_id = 0; mpi_errno = MPIR_Get_contextid_sparse((*new_intracomm_ptr), &new_context_id, FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Assert(new_context_id != 0); /* We release this communicator that was involved just to * get valid context id and create true one */ mpi_errno = MPIR_Comm_release(*new_intracomm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Comm_create(new_intracomm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); (*new_intracomm_ptr)->remote_size = (*new_intracomm_ptr)->local_size = new_size; (*new_intracomm_ptr)->pof2 = MPL_pof2(new_size); (*new_intracomm_ptr)->rank = -1; (*new_intracomm_ptr)->comm_kind = MPIR_COMM_KIND__INTRACOMM; (*new_intracomm_ptr)->context_id = new_context_id; (*new_intracomm_ptr)->recvcontext_id = new_context_id; mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr)); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL); return mpi_errno; fn_fail: goto fn_exit; }