int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag, MPIR_Context_id_t * context_id, int ignore_id) { int mpi_errno = MPI_SUCCESS; MPIR_Errflag_t errflag = MPIR_ERR_NONE; struct gcn_state st; struct gcn_state *tmp; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_CONTEXTID); st.first_iter = 1; st.comm_ptr = comm_ptr; st.tag = tag; st.own_mask = 0; st.own_eager_mask = 0; /* Group-collective and ignore_id should never be combined */ MPIR_Assert(!(group_ptr != NULL && ignore_id)); *context_id = 0; MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "Entering; shared state is %d:%d, my ctx id is %d, tag=%d", mask_in_use, eager_in_use, comm_ptr->context_id, tag)); while (*context_id == 0) { /* We lock only around access to the mask (except in the global locking * case). If another thread is using the mask, we take a mask of zero. */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (initialize_context_mask) { context_id_init(); } if (eager_nelem < 0) { /* Ensure that at least one word of deadlock-free context IDs is * always set aside for the base protocol */ MPIR_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 && MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1); eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE; } if (ignore_id) { /* We are not participating in the resulting communicator, so our * context ID space doesn't matter. Set the mask to "all available". */ memset(st.local_mask, 0xff, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_mask = 0; /* don't need to touch mask_in_use/lowest_context_id b/c our thread * doesn't ever need to "win" the mask */ } /* Deadlock avoidance: Only participate in context id loop when all * processes have called this routine. On the first iteration, use the * "eager" allocation protocol. */ else if (st.first_iter) { memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_eager_mask = 0; /* Attempt to reserve the eager mask segment */ if (!eager_in_use && eager_nelem > 0) { int i; for (i = 0; i < eager_nelem; i++) st.local_mask[i] = context_mask[i]; eager_in_use = 1; st.own_eager_mask = 1; } } else { MPIR_Assert(next_gcn != NULL); /*If we are here, at least one element must be in the list, at least myself */ /* only the first element in the list can own the mask. However, maybe the mask is used * by another thread, which added another allcoation to the list bevore. So we have to check, * if the mask is used and mark, if we own it */ if (mask_in_use || &st != next_gcn) { memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_mask = 0; MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "Mask is in use, my context_id is %d, owner context id is %d", st.comm_ptr->context_id, next_gcn->comm_ptr->context_id)); } else { int i; /* Copy safe mask segment to local_mask */ for (i = 0; i < eager_nelem; i++) st.local_mask[i] = 0; for (i = eager_nelem; i < MPIR_MAX_CONTEXT_MASK; i++) st.local_mask[i] = context_mask[i]; mask_in_use = 1; st.own_mask = 1; MPL_DBG_MSG(MPIR_DBG_COMM, VERBOSE, "Copied local_mask"); } } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); /* Note: MPIR_MAX_CONTEXT_MASK elements of local_mask are used by the * context ID allocation algorithm. The additional element is ignored * by the context ID mask access routines and is used as a flag for * detecting context ID exhaustion (explained below). */ if (st.own_mask || ignore_id) st.local_mask[ALL_OWN_MASK_FLAG] = 1; else st.local_mask[ALL_OWN_MASK_FLAG] = 0; /* Now, try to get a context id */ MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); /* In the global and brief-global cases, note that this routine will * release that global lock when it needs to wait. That will allow * other processes to enter the global or brief global critical section. */ if (group_ptr != NULL) { int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */ mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, MPI_INT, MPI_BAND, comm_ptr, group_ptr, coll_tag, &errflag); } else { mpi_errno = MPID_Allreduce(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, MPI_INT, MPI_BAND, comm_ptr, &errflag); } if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); /* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and * narrow this region */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (ignore_id) { /* we don't care what the value was, but make sure that everyone * who did care agreed on a value */ *context_id = locate_context_bit(st.local_mask); /* used later in out-of-context ids check and outer while loop condition */ } else if (st.own_eager_mask) { /* There is a chance that we've found a context id */ /* Find_and_allocate_context_id updates the context_mask if it finds a match */ *context_id = find_and_allocate_context_id(st.local_mask); MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id); st.own_eager_mask = 0; eager_in_use = 0; if (*context_id <= 0) { /* else we did not find a context id. Give up the mask in case * there is another thread (with a lower input context id) * waiting for it. We need to ensure that any other threads * have the opportunity to run, hence yielding */ /* FIXME: Do we need to do an GLOBAL yield here? * When we do a collective operation, we anyway yield * for other others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } else if (st.own_mask) { /* There is a chance that we've found a context id */ /* Find_and_allocate_context_id updates the context_mask if it finds a match */ *context_id = find_and_allocate_context_id(st.local_mask); MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id); mask_in_use = 0; if (*context_id > 0) { /* If we found a new context id, we have to remove the element from the list, so the * next allocation can own the mask */ if (next_gcn == &st) { next_gcn = st.next; } else { for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next); /* avoid compiler warnings */ tmp->next = st.next; } } else { /* else we did not find a context id. Give up the mask in case * there is another thread in the gcn_next_list * waiting for it. We need to ensure that any other threads * have the opportunity to run, hence yielding */ /* FIXME: Do we need to do an GLOBAL yield here? * When we do a collective operation, we anyway yield * for other others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } else { /* As above, force this thread to yield */ /* FIXME: Do we need to do an GLOBAL yield here? When we * do a collective operation, we anyway yield for other * others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); /* Test for context ID exhaustion: All threads that will participate in * the new communicator owned the mask and could not allocate a context * ID. This indicates that either some process has no context IDs * available, or that some are available, but the allocation cannot * succeed because there is no common context ID. */ if (*context_id == 0 && st.local_mask[ALL_OWN_MASK_FLAG] == 1) { /* --BEGIN ERROR HANDLING-- */ int nfree = 0; int ntotal = 0; int minfree; if (st.own_mask) { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); mask_in_use = 0; MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } context_mask_stats(&nfree, &ntotal); if (ignore_id) minfree = INT_MAX; else minfree = nfree; if (group_ptr != NULL) { int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */ mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN, comm_ptr, group_ptr, coll_tag, &errflag); } else { mpi_errno = MPID_Allreduce(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN, comm_ptr, &errflag); } if (minfree > 0) { MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER, "**toomanycommfrag", "**toomanycommfrag %d %d %d", nfree, ntotal, ignore_id); } else { MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER, "**toomanycomm", "**toomanycomm %d %d %d", nfree, ntotal, ignore_id); } /* --END ERROR HANDLING-- */ } if (st.first_iter == 1) { st.first_iter = 0; /* to avoid deadlocks, the element is not added to the list bevore the first iteration */ if (!ignore_id && *context_id == 0) { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); add_gcn_to_list(&st); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } } fn_exit: if (ignore_id) *context_id = MPIR_INVALID_CONTEXT_ID; MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "Context mask = %s", context_mask_to_str()); MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_CONTEXTID); return mpi_errno; /* --BEGIN ERROR HANDLING-- */ fn_fail: /* Release the masks */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (st.own_mask) { mask_in_use = 0; } /*If in list, remove it */ if (!st.first_iter && !ignore_id) { if (next_gcn == &st) { next_gcn = st.next; } else { for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next); tmp->next = st.next; } } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); goto fn_exit; /* --END ERROR HANDLING-- */ }
void MPIR_Free_contextid(MPIR_Context_id_t context_id) { int idx, bitpos, raw_prefix; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_FREE_CONTEXTID); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_FREE_CONTEXTID); /* Convert the context id to the bit position */ raw_prefix = MPIR_CONTEXT_READ_FIELD(PREFIX, context_id); idx = raw_prefix / MPIR_CONTEXT_INT_BITS; bitpos = raw_prefix % MPIR_CONTEXT_INT_BITS; /* --BEGIN ERROR HANDLING-- */ if (idx < 0 || idx >= MPIR_MAX_CONTEXT_MASK) { MPID_Abort(0, MPI_ERR_INTERN, 1, "In MPIR_Free_contextid, idx is out of range"); } /* --END ERROR HANDLING-- */ /* The low order bits for dynamic context IDs don't have meaning the * same way that low bits of non-dynamic ctx IDs do. So we have to * check the dynamic case first. */ if (MPIR_CONTEXT_READ_FIELD(DYNAMIC_PROC, context_id)) { MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "skipping dynamic process ctx id, context_id=%d", context_id); goto fn_exit; } else { /* non-dynamic context ID */ /* In terms of the context ID bit vector, intercomms and their constituent * localcomms have the same value. To avoid a double-free situation we just * don't free the context ID for localcomms and assume it will be cleaned up * when the parent intercomm is itself completely freed. */ if (MPIR_CONTEXT_READ_FIELD(IS_LOCALCOMM, context_id)) { #ifdef MPL_USE_DBG_LOGGING char dump_str[1024]; dump_context_id(context_id, dump_str, sizeof(dump_str)); MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "skipping localcomm id: %s", dump_str); #endif goto fn_exit; } else if (MPIR_CONTEXT_READ_FIELD(SUBCOMM, context_id)) { MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "skipping non-parent communicator ctx id, context_id=%d", context_id); goto fn_exit; } } /* --BEGIN ERROR HANDLING-- */ /* Check that this context id has been allocated */ if ((context_mask[idx] & (0x1 << bitpos)) != 0) { #ifdef MPL_USE_DBG_LOGGING char dump_str[1024]; dump_context_id(context_id, dump_str, sizeof(dump_str)); MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "context dump: %s", dump_str); MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "context mask = %s", context_mask_to_str()); #endif MPID_Abort(0, MPI_ERR_INTERN, 1, "In MPIR_Free_contextid, the context id is not in use"); } /* --END ERROR HANDLING-- */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); /* MT: Note that this update must be done atomically in the multithreaedd * case. In the "one, single lock" implementation, that lock is indeed * held when this operation is called. */ context_mask[idx] |= (0x1 << bitpos); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "Freed context %d, mask[%d] bit %d (prefix=%#x)", context_id, idx, bitpos, raw_prefix)); fn_exit: MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_FREE_CONTEXTID); }
int MPIDI_CH3U_Check_for_failed_procs(void) { int mpi_errno = MPI_SUCCESS; int pmi_errno; int len; char *kvsname; MPIR_Group *prev_failed_group, *new_failed_group; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); /* FIXME: Currently this only handles failed processes in comm_world. We need to fix hydra to include the pgid along with the rank, then we need to create the failed group from something bigger than comm_world. */ mpi_errno = MPIDI_PG_GetConnKVSname(&kvsname); if (mpi_errno) MPIR_ERR_POP(mpi_errno); #ifdef USE_PMI2_API { int vallen = 0; pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", MPIDI_failed_procs_string, PMI2_MAX_VALLEN, &vallen); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); } #else pmi_errno = PMI_KVS_Get_value_length_max(&len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max"); pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", MPIDI_failed_procs_string, len); MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); #endif if (*MPIDI_failed_procs_string == '\0') { /* there are no failed processes */ MPIDI_Failed_procs_group = MPIR_Group_empty; goto fn_exit; } MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER, TYPICAL, "Received proc fail notification: %s", MPIDI_failed_procs_string); /* save reference to previous group so we can identify new failures */ prev_failed_group = MPIDI_Failed_procs_group; /* Parse the list of failed processes */ MPIDI_CH3U_Get_failed_group(-2, &MPIDI_Failed_procs_group); /* get group of newly failed processes */ mpi_errno = MPIR_Group_difference_impl(MPIDI_Failed_procs_group, prev_failed_group, &new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (new_failed_group != MPIR_Group_empty) { mpi_errno = MPIDI_CH3I_Comm_handle_failed_procs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = terminate_failed_VCs(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIR_Group_release(new_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* free prev group */ if (prev_failed_group != MPIR_Group_empty) { mpi_errno = MPIR_Group_release(prev_failed_group); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS); return mpi_errno; fn_oom: /* out-of-memory handler for utarray operations */ MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray"); fn_fail: goto fn_exit; }
int MPIR_Group_translate_ranks_impl(MPIR_Group * gp1, int n, const int ranks1[], MPIR_Group * gp2, int ranks2[]) { int mpi_errno = MPI_SUCCESS; int i, g2_idx, l1_pid, l2_pid; MPL_DBG_MSG_S(MPIR_DBG_OTHER, VERBOSE, "gp2->is_local_dense_monotonic=%s", (gp2->is_local_dense_monotonic ? "TRUE" : "FALSE")); /* Initialize the output ranks */ for (i = 0; i < n; i++) ranks2[i] = MPI_UNDEFINED; if (gp2->size > 0 && gp2->is_local_dense_monotonic) { /* g2 probably == group_of(MPI_COMM_WORLD); use fast, constant-time lookup */ int lpid_offset = gp2->lrank_to_lpid[0].lpid; MPIR_Assert(lpid_offset >= 0); for (i = 0; i < n; ++i) { int g1_lpid; if (ranks1[i] == MPI_PROC_NULL) { ranks2[i] = MPI_PROC_NULL; continue; } /* "adjusted" lpid from g1 */ g1_lpid = gp1->lrank_to_lpid[ranks1[i]].lpid - lpid_offset; if ((g1_lpid >= 0) && (g1_lpid < gp2->size)) { ranks2[i] = g1_lpid; } /* else leave UNDEFINED */ } } else { /* general, slow path; lookup time is dependent on the user-provided rank values! */ g2_idx = gp2->idx_of_first_lpid; if (g2_idx < 0) { MPII_Group_setup_lpid_list(gp2); g2_idx = gp2->idx_of_first_lpid; } if (g2_idx >= 0) { /* g2_idx can be < 0 if the g2 group is empty */ l2_pid = gp2->lrank_to_lpid[g2_idx].lpid; for (i = 0; i < n; i++) { if (ranks1[i] == MPI_PROC_NULL) { ranks2[i] = MPI_PROC_NULL; continue; } l1_pid = gp1->lrank_to_lpid[ranks1[i]].lpid; /* Search for this l1_pid in group2. Use the following * optimization: start from the last position in the lpid list * if possible. A more sophisticated version could use a * tree based or even hashed search to speed the translation. */ if (l1_pid < l2_pid || g2_idx < 0) { /* Start over from the beginning */ g2_idx = gp2->idx_of_first_lpid; l2_pid = gp2->lrank_to_lpid[g2_idx].lpid; } while (g2_idx >= 0 && l1_pid > l2_pid) { g2_idx = gp2->lrank_to_lpid[g2_idx].next_lpid; if (g2_idx >= 0) l2_pid = gp2->lrank_to_lpid[g2_idx].lpid; else l2_pid = -1; } if (l1_pid == l2_pid) ranks2[i] = g2_idx; } } } return mpi_errno; }
int MPIDI_CH3_PktPrint_Close( FILE *fp, MPIDI_CH3_Pkt_t *pkt ) { MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,TERSE," type ......... MPIDI_CH3_PKT_CLOSE\n"); MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER,TERSE," ack ......... %s\n", pkt->close.ack ? "TRUE" : "FALSE"); return MPI_SUCCESS; }