/* This routine is for a thread to yield control when the thread is waiting for * the completion of communication inside a ROMIO routine but the progress * engine is blocked by another thread. */ void MPIR_Ext_cs_yield(void) { /* TODO: check whether the progress engine is blocked */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); }
int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag, MPIR_Context_id_t * context_id, int ignore_id) { int mpi_errno = MPI_SUCCESS; MPIR_Errflag_t errflag = MPIR_ERR_NONE; struct gcn_state st; struct gcn_state *tmp; MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID); MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_CONTEXTID); st.first_iter = 1; st.comm_ptr = comm_ptr; st.tag = tag; st.own_mask = 0; st.own_eager_mask = 0; /* Group-collective and ignore_id should never be combined */ MPIR_Assert(!(group_ptr != NULL && ignore_id)); *context_id = 0; MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "Entering; shared state is %d:%d, my ctx id is %d, tag=%d", mask_in_use, eager_in_use, comm_ptr->context_id, tag)); while (*context_id == 0) { /* We lock only around access to the mask (except in the global locking * case). If another thread is using the mask, we take a mask of zero. */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (initialize_context_mask) { context_id_init(); } if (eager_nelem < 0) { /* Ensure that at least one word of deadlock-free context IDs is * always set aside for the base protocol */ MPIR_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 && MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1); eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE; } if (ignore_id) { /* We are not participating in the resulting communicator, so our * context ID space doesn't matter. Set the mask to "all available". */ memset(st.local_mask, 0xff, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_mask = 0; /* don't need to touch mask_in_use/lowest_context_id b/c our thread * doesn't ever need to "win" the mask */ } /* Deadlock avoidance: Only participate in context id loop when all * processes have called this routine. On the first iteration, use the * "eager" allocation protocol. */ else if (st.first_iter) { memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_eager_mask = 0; /* Attempt to reserve the eager mask segment */ if (!eager_in_use && eager_nelem > 0) { int i; for (i = 0; i < eager_nelem; i++) st.local_mask[i] = context_mask[i]; eager_in_use = 1; st.own_eager_mask = 1; } } else { MPIR_Assert(next_gcn != NULL); /*If we are here, at least one element must be in the list, at least myself */ /* only the first element in the list can own the mask. However, maybe the mask is used * by another thread, which added another allcoation to the list bevore. So we have to check, * if the mask is used and mark, if we own it */ if (mask_in_use || &st != next_gcn) { memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int)); st.own_mask = 0; MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST, "Mask is in use, my context_id is %d, owner context id is %d", st.comm_ptr->context_id, next_gcn->comm_ptr->context_id)); } else { int i; /* Copy safe mask segment to local_mask */ for (i = 0; i < eager_nelem; i++) st.local_mask[i] = 0; for (i = eager_nelem; i < MPIR_MAX_CONTEXT_MASK; i++) st.local_mask[i] = context_mask[i]; mask_in_use = 1; st.own_mask = 1; MPL_DBG_MSG(MPIR_DBG_COMM, VERBOSE, "Copied local_mask"); } } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); /* Note: MPIR_MAX_CONTEXT_MASK elements of local_mask are used by the * context ID allocation algorithm. The additional element is ignored * by the context ID mask access routines and is used as a flag for * detecting context ID exhaustion (explained below). */ if (st.own_mask || ignore_id) st.local_mask[ALL_OWN_MASK_FLAG] = 1; else st.local_mask[ALL_OWN_MASK_FLAG] = 0; /* Now, try to get a context id */ MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM); /* In the global and brief-global cases, note that this routine will * release that global lock when it needs to wait. That will allow * other processes to enter the global or brief global critical section. */ if (group_ptr != NULL) { int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */ mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, MPI_INT, MPI_BAND, comm_ptr, group_ptr, coll_tag, &errflag); } else { mpi_errno = MPID_Allreduce(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1, MPI_INT, MPI_BAND, comm_ptr, &errflag); } if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); /* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and * narrow this region */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (ignore_id) { /* we don't care what the value was, but make sure that everyone * who did care agreed on a value */ *context_id = locate_context_bit(st.local_mask); /* used later in out-of-context ids check and outer while loop condition */ } else if (st.own_eager_mask) { /* There is a chance that we've found a context id */ /* Find_and_allocate_context_id updates the context_mask if it finds a match */ *context_id = find_and_allocate_context_id(st.local_mask); MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id); st.own_eager_mask = 0; eager_in_use = 0; if (*context_id <= 0) { /* else we did not find a context id. Give up the mask in case * there is another thread (with a lower input context id) * waiting for it. We need to ensure that any other threads * have the opportunity to run, hence yielding */ /* FIXME: Do we need to do an GLOBAL yield here? * When we do a collective operation, we anyway yield * for other others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } else if (st.own_mask) { /* There is a chance that we've found a context id */ /* Find_and_allocate_context_id updates the context_mask if it finds a match */ *context_id = find_and_allocate_context_id(st.local_mask); MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id); mask_in_use = 0; if (*context_id > 0) { /* If we found a new context id, we have to remove the element from the list, so the * next allocation can own the mask */ if (next_gcn == &st) { next_gcn = st.next; } else { for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next); /* avoid compiler warnings */ tmp->next = st.next; } } else { /* else we did not find a context id. Give up the mask in case * there is another thread in the gcn_next_list * waiting for it. We need to ensure that any other threads * have the opportunity to run, hence yielding */ /* FIXME: Do we need to do an GLOBAL yield here? * When we do a collective operation, we anyway yield * for other others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } else { /* As above, force this thread to yield */ /* FIXME: Do we need to do an GLOBAL yield here? When we * do a collective operation, we anyway yield for other * others */ MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); /* Test for context ID exhaustion: All threads that will participate in * the new communicator owned the mask and could not allocate a context * ID. This indicates that either some process has no context IDs * available, or that some are available, but the allocation cannot * succeed because there is no common context ID. */ if (*context_id == 0 && st.local_mask[ALL_OWN_MASK_FLAG] == 1) { /* --BEGIN ERROR HANDLING-- */ int nfree = 0; int ntotal = 0; int minfree; if (st.own_mask) { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); mask_in_use = 0; MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } context_mask_stats(&nfree, &ntotal); if (ignore_id) minfree = INT_MAX; else minfree = nfree; if (group_ptr != NULL) { int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */ mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN, comm_ptr, group_ptr, coll_tag, &errflag); } else { mpi_errno = MPID_Allreduce(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN, comm_ptr, &errflag); } if (minfree > 0) { MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER, "**toomanycommfrag", "**toomanycommfrag %d %d %d", nfree, ntotal, ignore_id); } else { MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER, "**toomanycomm", "**toomanycomm %d %d %d", nfree, ntotal, ignore_id); } /* --END ERROR HANDLING-- */ } if (st.first_iter == 1) { st.first_iter = 0; /* to avoid deadlocks, the element is not added to the list bevore the first iteration */ if (!ignore_id && *context_id == 0) { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); add_gcn_to_list(&st); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); } } } fn_exit: if (ignore_id) *context_id = MPIR_INVALID_CONTEXT_ID; MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "Context mask = %s", context_mask_to_str()); MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_CONTEXTID); return mpi_errno; /* --BEGIN ERROR HANDLING-- */ fn_fail: /* Release the masks */ MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); if (st.own_mask) { mask_in_use = 0; } /*If in list, remove it */ if (!st.first_iter && !ignore_id) { if (next_gcn == &st) { next_gcn = st.next; } else { for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next); tmp->next = st.next; } } MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX); goto fn_exit; /* --END ERROR HANDLING-- */ }
int MPID_Mprobe(int source, int tag, MPID_Comm *comm, int context_offset, MPID_Request **message, MPI_Status *status) { int mpi_errno = MPI_SUCCESS; MPID_Progress_state progress_state; int found = FALSE; int context_id = comm->recvcontext_id + context_offset; *message = NULL; if (source == MPI_PROC_NULL) { MPIR_Status_set_procnull(status); found = TRUE; *message = NULL; /* should be interpreted as MPI_MESSAGE_NO_PROC */ goto fn_exit; } /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } #ifdef ENABLE_COMM_OVERRIDES if (MPIDI_Anysource_improbe_fn) { if (source == MPI_ANY_SOURCE) { /* if it's anysource, loop while checking the shm recv queue and improbing the netmod, then do a progress test to make some progress. */ do { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX); *message = MPIDI_CH3U_Recvq_FDU_matchonly(source, tag, context_id, comm,&found); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_MSGQ_MUTEX); if (found) goto fn_exit; mpi_errno = MPIDI_Anysource_improbe_fn(tag, comm, context_offset, &found, message, status); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (found) goto fn_exit; MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); /* FIXME could this be replaced with a progress_wait? */ mpi_errno = MPIDI_CH3_Progress_test(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (1); } else { /* it's not anysource, see if this is for the netmod */ MPIDI_VC_t * vc; MPIDI_Comm_get_vc_set_active(comm, source, &vc); if (vc->comm_ops && vc->comm_ops->improbe) { /* netmod has overridden improbe */ do { mpi_errno = vc->comm_ops->improbe(vc, source, tag, comm, context_offset, &found, message, status); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (found) goto fn_exit; MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); /* FIXME could this be replaced with a progress_wait? */ mpi_errno = MPIDI_CH3_Progress_test(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (1); } /* fall-through to shm case */ } } #endif /* Inefficient implementation: we poll the unexpected queue looking for a * matching request, interleaved with calls to progress. If there are many * non-matching unexpected messages in the queue then we will end up * needlessly scanning the UQ. * * A smarter implementation would enqueue a partial request (one lacking the * recv buffer triple) onto the PQ. Unfortunately, this is a lot harder to * do than it seems at first because of the spread-out nature of callers to * various CH3U_Recvq routines and especially because of the enqueue/dequeue * hooks for native MX tag matching support. */ MPIDI_CH3_Progress_start(&progress_state); do { MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX); *message = MPIDI_CH3U_Recvq_FDU_matchonly(source, tag, context_id, comm, &found); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_MSGQ_MUTEX); if (found) break; mpi_errno = MPIDI_CH3_Progress_wait(&progress_state); } while(mpi_errno == MPI_SUCCESS); MPIDI_CH3_Progress_end(&progress_state); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (*message) { (*message)->kind = MPID_REQUEST_MPROBE; MPIR_Request_extract_status((*message), status); } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }