/* comm shrink impl; assumes that standard error checking has already taken * place in the calling function */ int MPIR_Comm_shrink(MPID_Comm *comm_ptr, MPID_Comm **newcomm_ptr) { int mpi_errno = MPI_SUCCESS; MPID_Group *global_failed, *comm_grp, *new_group_ptr; int attempts = 0; int errflag = 0, tmp_errflag = 0; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_SHRINK); /* TODO - Implement this function for intercommunicators */ MPIR_Comm_group_impl(comm_ptr, &comm_grp); do { mpi_errno = MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG); /* Ignore the mpi_errno value here as it will definitely communicate * with failed procs */ mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr); if (mpi_errno) MPIU_ERR_POP(mpi_errno); if (MPID_Group_empty != global_failed) MPIR_Group_release(global_failed); mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr); errflag = mpi_errno || *newcomm_ptr == NULL; mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, &tmp_errflag); MPIR_Group_release(new_group_ptr); if (errflag) MPIU_Object_set_ref(new_group_ptr, 0); } while (errflag && ++attempts < 5); if (errflag && attempts >= 5) goto fn_fail; else mpi_errno = MPI_SUCCESS; fn_exit: MPIR_Group_release(comm_grp); MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_SHRINK); return mpi_errno; fn_fail: if (*newcomm_ptr) MPIU_Object_set_ref(*newcomm_ptr, 0); MPIU_Object_set_ref(global_failed, 0); MPIU_Object_set_ref(new_group_ptr, 0); goto fn_exit; }
int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag) { int mpi_errno = MPI_SUCCESS, mpi_errno_tmp = MPI_SUCCESS; MPIR_Group *comm_grp, *failed_grp, *new_group_ptr, *global_failed; int result, success = 1; MPIR_Errflag_t errflag = MPIR_ERR_NONE; int values[2]; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_AGREE); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_AGREE); MPIR_Comm_group_impl(comm_ptr, &comm_grp); /* Get the locally known (not acknowledged) group of failed procs */ mpi_errno = MPID_Comm_failure_get_acked(comm_ptr, &failed_grp); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* First decide on the group of failed procs. */ mpi_errno = MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_AGREE_TAG); if (mpi_errno) errflag = MPIR_ERR_PROC_FAILED; mpi_errno = MPIR_Group_compare_impl(failed_grp, global_failed, &result); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Create a subgroup without the failed procs */ mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* If that group isn't the same as what we think is failed locally, then * mark it as such. */ if (result == MPI_UNEQUAL || errflag) success = 0; /* Do an allreduce to decide whether or not anyone thinks the group * has changed */ mpi_errno_tmp = MPIR_Allreduce_group(MPI_IN_PLACE, &success, 1, MPI_INT, MPI_MIN, comm_ptr, new_group_ptr, MPIR_AGREE_TAG, &errflag); if (!success || errflag || mpi_errno_tmp) success = 0; values[0] = success; values[1] = *flag; /* Determine both the result of this function (mpi_errno) and the result * of flag that will be returned to the user. */ MPIR_Allreduce_group(MPI_IN_PLACE, values, 2, MPI_INT, MPI_BAND, comm_ptr, new_group_ptr, MPIR_AGREE_TAG, &errflag); /* Ignore the result of the operation this time. Everyone will either * return a failure because of !success earlier or they will return * something useful for flag because of this operation. If there was a new * failure in between the first allreduce and the second one, it's ignored * here. */ if (failed_grp != MPIR_Group_empty) MPIR_Group_release(failed_grp); MPIR_Group_release(new_group_ptr); MPIR_Group_release(comm_grp); if (global_failed != MPIR_Group_empty) MPIR_Group_release(global_failed); success = values[0]; *flag = values[1]; if (!success) { MPIR_ERR_SET(mpi_errno_tmp, MPIX_ERR_PROC_FAILED, "**mpix_comm_agree"); MPIR_ERR_ADD(mpi_errno, mpi_errno_tmp); } fn_exit: MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_AGREE); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag, MPIR_Context_id_t * context_id, int ignore_id) { int mpi_errno = MPI_SUCCESS; MPIR_Errflag_t errflag = MPIR_ERR_NONE; struct gcn_state st; struct gcn_state *tmp; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_CONTEXTID); st.first_iter = 1; st.comm_ptr = comm_ptr; st.tag = tag; st.own_mask = 0; st.own_eager_mask = 0; /* Group-collective and ignore_id should never be combined */ MPIR_Assert(!(group_ptr != NULL && ignore_id)); *context_id = 0; MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "Entering; shared state is %d:%d, my ctx id is %d, tag=%d", mask_in_use, eager_in_use, comm_ptr->context_id, tag)); while (*context_id == 0) { /* We lock only around access to the mask (except in the global locking * case). If another thread is using the mask, we take a mask of zero. */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (initialize_context_mask) { context_id_init(); } if (eager_nelem < 0) { /* Ensure that at least one word of deadlock-free context IDs is * always set aside for the base protocol */ MPIR_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 && MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1); eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE; } if (ignore_id) { /* We are not participating in the resulting communicator, so our * context ID space doesn't matter. Set the mask to "all available". */ memset(st.local_mask, 0xff, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_mask = 0; /* don't need to touch mask_in_use/lowest_context_id b/c our thread * doesn't ever need to "win" the mask */ } /* Deadlock avoidance: Only participate in context id loop when all * processes have called this routine. On the first iteration, use the * "eager" allocation protocol. */ else if (st.first_iter) { memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_eager_mask = 0; /* Attempt to reserve the eager mask segment */ if (!eager_in_use && eager_nelem > 0) { int i; for (i = 0; i < eager_nelem; i++) st.local_mask[i] = context_mask[i]; eager_in_use = 1; st.own_eager_mask = 1; } } else { MPIR_Assert(next_gcn != NULL); /*If we are here, at least one element must be in the list, at least myself */ /* only the first element in the list can own the mask. However, maybe the mask is used * by another thread, which added another allcoation to the list bevore. So we have to check, * if the mask is used and mark, if we own it */ if (mask_in_use || &st != next_gcn) { memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_mask = 0; MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "Mask is in use, my context_id is %d, owner context id is %d", st.comm_ptr->context_id, next_gcn->comm_ptr->context_id)); } else { int i; /* Copy safe mask segment to local_mask */ for (i = 0; i < eager_nelem; i++) st.local_mask[i] = 0; for (i = eager_nelem; i < MPIR_MAX_CONTEXT_MASK; i++) st.local_mask[i] = context_mask[i]; mask_in_use = 1; st.own_mask = 1; MPL_DBG_MSG(MPIR_DBG_COMM, VERBOSE, "Copied local_mask"); } } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); /* Note: MPIR_MAX_CONTEXT_MASK elements of local_mask are used by the * context ID allocation algorithm. The additional element is ignored * by the context ID mask access routines and is used as a flag for * detecting context ID exhaustion (explained below). */ if (st.own_mask || ignore_id) st.local_mask[ALL_OWN_MASK_FLAG] = 1; else st.local_mask[ALL_OWN_MASK_FLAG] = 0; /* Now, try to get a context id */ MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); /* In the global and brief-global cases, note that this routine will * release that global lock when it needs to wait. That will allow * other processes to enter the global or brief global critical section. */ if (group_ptr != NULL) { int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */ mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, MPI_INT, MPI_BAND, comm_ptr, group_ptr, coll_tag, &errflag); } else { mpi_errno = MPID_Allreduce(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, MPI_INT, MPI_BAND, comm_ptr, &errflag); } if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); /* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and * narrow this region */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (ignore_id) { /* we don't care what the value was, but make sure that everyone * who did care agreed on a value */ *context_id = locate_context_bit(st.local_mask); /* used later in out-of-context ids check and outer while loop condition */ } else if (st.own_eager_mask) { /* There is a chance that we've found a context id */ /* Find_and_allocate_context_id updates the context_mask if it finds a match */ *context_id = find_and_allocate_context_id(st.local_mask); MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id); st.own_eager_mask = 0; eager_in_use = 0; if (*context_id <= 0) { /* else we did not find a context id. Give up the mask in case * there is another thread (with a lower input context id) * waiting for it. We need to ensure that any other threads * have the opportunity to run, hence yielding */ /* FIXME: Do we need to do an GLOBAL yield here? * When we do a collective operation, we anyway yield * for other others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } else if (st.own_mask) { /* There is a chance that we've found a context id */ /* Find_and_allocate_context_id updates the context_mask if it finds a match */ *context_id = find_and_allocate_context_id(st.local_mask); MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id); mask_in_use = 0; if (*context_id > 0) { /* If we found a new context id, we have to remove the element from the list, so the * next allocation can own the mask */ if (next_gcn == &st) { next_gcn = st.next; } else { for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next); /* avoid compiler warnings */ tmp->next = st.next; } } else { /* else we did not find a context id. Give up the mask in case * there is another thread in the gcn_next_list * waiting for it. We need to ensure that any other threads * have the opportunity to run, hence yielding */ /* FIXME: Do we need to do an GLOBAL yield here? * When we do a collective operation, we anyway yield * for other others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } else { /* As above, force this thread to yield */ /* FIXME: Do we need to do an GLOBAL yield here? When we * do a collective operation, we anyway yield for other * others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); /* Test for context ID exhaustion: All threads that will participate in * the new communicator owned the mask and could not allocate a context * ID. This indicates that either some process has no context IDs * available, or that some are available, but the allocation cannot * succeed because there is no common context ID. */ if (*context_id == 0 && st.local_mask[ALL_OWN_MASK_FLAG] == 1) { /* --BEGIN ERROR HANDLING-- */ int nfree = 0; int ntotal = 0; int minfree; if (st.own_mask) { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); mask_in_use = 0; MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } context_mask_stats(&nfree, &ntotal); if (ignore_id) minfree = INT_MAX; else minfree = nfree; if (group_ptr != NULL) { int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */ mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN, comm_ptr, group_ptr, coll_tag, &errflag); } else { mpi_errno = MPID_Allreduce(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN, comm_ptr, &errflag); } if (minfree > 0) { MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER, "**toomanycommfrag", "**toomanycommfrag %d %d %d", nfree, ntotal, ignore_id); } else { MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER, "**toomanycomm", "**toomanycomm %d %d %d", nfree, ntotal, ignore_id); } /* --END ERROR HANDLING-- */ } if (st.first_iter == 1) { st.first_iter = 0; /* to avoid deadlocks, the element is not added to the list bevore the first iteration */ if (!ignore_id && *context_id == 0) { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); add_gcn_to_list(&st); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } } fn_exit: if (ignore_id) *context_id = MPIR_INVALID_CONTEXT_ID; MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "Context mask = %s", context_mask_to_str()); MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_CONTEXTID); return mpi_errno; /* --BEGIN ERROR HANDLING-- */ fn_fail: /* Release the masks */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (st.own_mask) { mask_in_use = 0; } /*If in list, remove it */ if (!st.first_iter && !ignore_id) { if (next_gcn == &st) { next_gcn = st.next; } else { for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next); tmp->next = st.next; } } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); goto fn_exit; /* --END ERROR HANDLING-- */ }