Esempio n. 1
0
int MPIR_Get_contextid_sparse_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag,
                                    MPIR_Context_id_t * context_id, int ignore_id)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    struct gcn_state st;
    struct gcn_state *tmp;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);

    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);

    st.first_iter = 1;
    st.comm_ptr = comm_ptr;
    st.tag = tag;
    st.own_mask = 0;
    st.own_eager_mask = 0;
    /* Group-collective and ignore_id should never be combined */
    MPIR_Assert(!(group_ptr != NULL && ignore_id));

    *context_id = 0;

    MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST,
                                     "Entering; shared state is %d:%d, my ctx id is %d, tag=%d",
                                     mask_in_use, eager_in_use, comm_ptr->context_id, tag));

    while (*context_id == 0) {
        /* We lock only around access to the mask (except in the global locking
         * case).  If another thread is using the mask, we take a mask of zero. */
        MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);

        if (initialize_context_mask) {
            context_id_init();
        }

        if (eager_nelem < 0) {
            /* Ensure that at least one word of deadlock-free context IDs is
             * always set aside for the base protocol */
            MPIR_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 &&
                        MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1);
            eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE;
        }

        if (ignore_id) {
            /* We are not participating in the resulting communicator, so our
             * context ID space doesn't matter.  Set the mask to "all available". */
            memset(st.local_mask, 0xff, MPIR_MAX_CONTEXT_MASK * sizeof(int));
            st.own_mask = 0;
            /* don't need to touch mask_in_use/lowest_context_id b/c our thread
             * doesn't ever need to "win" the mask */
        }

        /* Deadlock avoidance: Only participate in context id loop when all
         * processes have called this routine.  On the first iteration, use the
         * "eager" allocation protocol.
         */
        else if (st.first_iter) {
            memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
            st.own_eager_mask = 0;
            /* Attempt to reserve the eager mask segment */
            if (!eager_in_use && eager_nelem > 0) {
                int i;
                for (i = 0; i < eager_nelem; i++)
                    st.local_mask[i] = context_mask[i];

                eager_in_use = 1;
                st.own_eager_mask = 1;
            }
        }

        else {
            MPIR_Assert(next_gcn != NULL);
            /*If we are here, at least one element must be in the list, at least myself */

            /* only the first element in the list can own the mask. However, maybe the mask is used
             * by another thread, which added another allcoation to the list bevore. So we have to check,
             * if the mask is used and mark, if we own it */
            if (mask_in_use || &st != next_gcn) {
                memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
                st.own_mask = 0;
                MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE, (MPL_DBG_FDEST,
                                                 "Mask is in use, my context_id is %d, owner context id is %d",
                                                 st.comm_ptr->context_id,
                                                 next_gcn->comm_ptr->context_id));
            }
            else {
                int i;
                /* Copy safe mask segment to local_mask */
                for (i = 0; i < eager_nelem; i++)
                    st.local_mask[i] = 0;
                for (i = eager_nelem; i < MPIR_MAX_CONTEXT_MASK; i++)
                    st.local_mask[i] = context_mask[i];

                mask_in_use = 1;
                st.own_mask = 1;
                MPL_DBG_MSG(MPIR_DBG_COMM, VERBOSE, "Copied local_mask");
            }
        }
        MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);

        /* Note: MPIR_MAX_CONTEXT_MASK elements of local_mask are used by the
         * context ID allocation algorithm.  The additional element is ignored
         * by the context ID mask access routines and is used as a flag for
         * detecting context ID exhaustion (explained below). */
        if (st.own_mask || ignore_id)
            st.local_mask[ALL_OWN_MASK_FLAG] = 1;
        else
            st.local_mask[ALL_OWN_MASK_FLAG] = 0;

        /* Now, try to get a context id */
        MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);
        /* In the global and brief-global cases, note that this routine will
         * release that global lock when it needs to wait.  That will allow
         * other processes to enter the global or brief global critical section.
         */
        if (group_ptr != NULL) {
            int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */
            mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1,
                                             MPI_INT, MPI_BAND, comm_ptr, group_ptr, coll_tag,
                                             &errflag);
        }
        else {
            mpi_errno = MPID_Allreduce(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1,
                                            MPI_INT, MPI_BAND, comm_ptr, &errflag);
        }
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");

        /* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and
         * narrow this region */
        MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
        if (ignore_id) {
            /* we don't care what the value was, but make sure that everyone
             * who did care agreed on a value */
            *context_id = locate_context_bit(st.local_mask);
            /* used later in out-of-context ids check and outer while loop condition */
        }
        else if (st.own_eager_mask) {
            /* There is a chance that we've found a context id */
            /* Find_and_allocate_context_id updates the context_mask if it finds a match */
            *context_id = find_and_allocate_context_id(st.local_mask);
            MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id);

            st.own_eager_mask = 0;
            eager_in_use = 0;
            if (*context_id <= 0) {
                /* else we did not find a context id. Give up the mask in case
                 * there is another thread (with a lower input context id)
                 * waiting for it.  We need to ensure that any other threads
                 * have the opportunity to run, hence yielding */
                /* FIXME: Do we need to do an GLOBAL yield here?
                 * When we do a collective operation, we anyway yield
                 * for other others */
                MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
                MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
            }
        }
        else if (st.own_mask) {
            /* There is a chance that we've found a context id */
            /* Find_and_allocate_context_id updates the context_mask if it finds a match */
            *context_id = find_and_allocate_context_id(st.local_mask);
            MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "Context id is now %hd", *context_id);

            mask_in_use = 0;

            if (*context_id > 0) {
                /* If we found a new context id, we have to remove the element from the list, so the
                 * next allocation can own the mask */
                if (next_gcn == &st) {
                    next_gcn = st.next;
                }
                else {
                    for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next);    /* avoid compiler warnings */
                    tmp->next = st.next;
                }
            }
            else {
                /* else we did not find a context id. Give up the mask in case
                 * there is another thread in the gcn_next_list
                 * waiting for it.  We need to ensure that any other threads
                 * have the opportunity to run, hence yielding */
                /* FIXME: Do we need to do an GLOBAL yield here?
                 * When we do a collective operation, we anyway yield
                 * for other others */
                MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
                MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
            }
        }
        else {
            /* As above, force this thread to yield */
            /* FIXME: Do we need to do an GLOBAL yield here?  When we
             * do a collective operation, we anyway yield for other
             * others */
            MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
            MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
        }
        MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);

        /* Test for context ID exhaustion: All threads that will participate in
         * the new communicator owned the mask and could not allocate a context
         * ID.  This indicates that either some process has no context IDs
         * available, or that some are available, but the allocation cannot
         * succeed because there is no common context ID. */
        if (*context_id == 0 && st.local_mask[ALL_OWN_MASK_FLAG] == 1) {
            /* --BEGIN ERROR HANDLING-- */
            int nfree = 0;
            int ntotal = 0;
            int minfree;

            if (st.own_mask) {
                MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
                mask_in_use = 0;
                MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
            }

            context_mask_stats(&nfree, &ntotal);
            if (ignore_id)
                minfree = INT_MAX;
            else
                minfree = nfree;

            if (group_ptr != NULL) {
                int coll_tag = tag | MPIR_Process.tagged_coll_mask;     /* Shift tag into the tagged coll space */
                mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN,
                                                 comm_ptr, group_ptr, coll_tag, &errflag);
            }
            else {
                mpi_errno = MPID_Allreduce(MPI_IN_PLACE, &minfree, 1, MPI_INT,
                                                MPI_MIN, comm_ptr, &errflag);
            }

            if (minfree > 0) {
                MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER,
                                     "**toomanycommfrag", "**toomanycommfrag %d %d %d",
                                     nfree, ntotal, ignore_id);
            }
            else {
                MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER,
                                     "**toomanycomm", "**toomanycomm %d %d %d",
                                     nfree, ntotal, ignore_id);
            }
            /* --END ERROR HANDLING-- */
        }
        if (st.first_iter == 1) {
            st.first_iter = 0;
            /* to avoid deadlocks, the element is not added to the list bevore the first iteration */
            if (!ignore_id && *context_id == 0) {
                MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
                add_gcn_to_list(&st);
                MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
            }
        }
    }

  fn_exit:
    if (ignore_id)
        *context_id = MPIR_INVALID_CONTEXT_ID;
    MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "Context mask = %s", context_mask_to_str());
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_GET_CONTEXTID);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    /* Release the masks */
    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
    if (st.own_mask) {
        mask_in_use = 0;
    }
    /*If in list, remove it */
    if (!st.first_iter && !ignore_id) {
        if (next_gcn == &st) {
            next_gcn = st.next;
        }
        else {
            for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next);
            tmp->next = st.next;
        }
    }
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);


    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Esempio n. 2
0
void MPIR_Free_contextid(MPIR_Context_id_t context_id)
{
    int idx, bitpos, raw_prefix;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_FREE_CONTEXTID);

    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_FREE_CONTEXTID);

    /* Convert the context id to the bit position */
    raw_prefix = MPIR_CONTEXT_READ_FIELD(PREFIX, context_id);
    idx = raw_prefix / MPIR_CONTEXT_INT_BITS;
    bitpos = raw_prefix % MPIR_CONTEXT_INT_BITS;

    /* --BEGIN ERROR HANDLING-- */
    if (idx < 0 || idx >= MPIR_MAX_CONTEXT_MASK) {
        MPID_Abort(0, MPI_ERR_INTERN, 1, "In MPIR_Free_contextid, idx is out of range");
    }
    /* --END ERROR HANDLING-- */

    /* The low order bits for dynamic context IDs don't have meaning the
     * same way that low bits of non-dynamic ctx IDs do.  So we have to
     * check the dynamic case first. */
    if (MPIR_CONTEXT_READ_FIELD(DYNAMIC_PROC, context_id)) {
        MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "skipping dynamic process ctx id, context_id=%d", context_id);
        goto fn_exit;
    }
    else {      /* non-dynamic context ID */
        /* In terms of the context ID bit vector, intercomms and their constituent
         * localcomms have the same value.  To avoid a double-free situation we just
         * don't free the context ID for localcomms and assume it will be cleaned up
         * when the parent intercomm is itself completely freed. */
        if (MPIR_CONTEXT_READ_FIELD(IS_LOCALCOMM, context_id)) {
#ifdef MPL_USE_DBG_LOGGING
            char dump_str[1024];
            dump_context_id(context_id, dump_str, sizeof(dump_str));
            MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "skipping localcomm id: %s", dump_str);
#endif
            goto fn_exit;
        }
        else if (MPIR_CONTEXT_READ_FIELD(SUBCOMM, context_id)) {
            MPL_DBG_MSG_D(MPIR_DBG_COMM, VERBOSE, "skipping non-parent communicator ctx id, context_id=%d",
                           context_id);
            goto fn_exit;
        }
    }

    /* --BEGIN ERROR HANDLING-- */
    /* Check that this context id has been allocated */
    if ((context_mask[idx] & (0x1 << bitpos)) != 0) {
#ifdef MPL_USE_DBG_LOGGING
        char dump_str[1024];
        dump_context_id(context_id, dump_str, sizeof(dump_str));
        MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "context dump: %s", dump_str);
        MPL_DBG_MSG_S(MPIR_DBG_COMM, VERBOSE, "context mask = %s", context_mask_to_str());
#endif
        MPID_Abort(0, MPI_ERR_INTERN, 1, "In MPIR_Free_contextid, the context id is not in use");
    }
    /* --END ERROR HANDLING-- */

    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
    /* MT: Note that this update must be done atomically in the multithreaedd
     * case.  In the "one, single lock" implementation, that lock is indeed
     * held when this operation is called. */
    context_mask[idx] |= (0x1 << bitpos);
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);

    MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE,
                     (MPL_DBG_FDEST,
                      "Freed context %d, mask[%d] bit %d (prefix=%#x)",
                      context_id, idx, bitpos, raw_prefix));
  fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_FREE_CONTEXTID);
}
Esempio n. 3
0
int MPIDI_CH3U_Check_for_failed_procs(void)
{
    int mpi_errno = MPI_SUCCESS;
    int pmi_errno;
    int len;
    char *kvsname;
    MPIR_Group *prev_failed_group, *new_failed_group;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);

    /* FIXME: Currently this only handles failed processes in
       comm_world.  We need to fix hydra to include the pgid along
       with the rank, then we need to create the failed group from
       something bigger than comm_world. */
    mpi_errno = MPIDI_PG_GetConnKVSname(&kvsname);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
#ifdef USE_PMI2_API
    {
        int vallen = 0;
        pmi_errno = PMI2_KVS_Get(kvsname, PMI2_ID_NULL, "PMI_dead_processes", MPIDI_failed_procs_string, PMI2_MAX_VALLEN, &vallen);
        MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
    }
#else
    pmi_errno = PMI_KVS_Get_value_length_max(&len);
    MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max");
    pmi_errno = PMI_KVS_Get(kvsname, "PMI_dead_processes", MPIDI_failed_procs_string, len);
    MPIR_ERR_CHKANDJUMP(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get");
#endif

    if (*MPIDI_failed_procs_string == '\0') {
        /* there are no failed processes */
        MPIDI_Failed_procs_group = MPIR_Group_empty;
        goto fn_exit;
    }

    MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER, TYPICAL, "Received proc fail notification: %s", MPIDI_failed_procs_string);

    /* save reference to previous group so we can identify new failures */
    prev_failed_group = MPIDI_Failed_procs_group;

    /* Parse the list of failed processes */
    MPIDI_CH3U_Get_failed_group(-2, &MPIDI_Failed_procs_group);

    /* get group of newly failed processes */
    mpi_errno = MPIR_Group_difference_impl(MPIDI_Failed_procs_group, prev_failed_group, &new_failed_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    if (new_failed_group != MPIR_Group_empty) {
        mpi_errno = MPIDI_CH3I_Comm_handle_failed_procs(new_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = terminate_failed_VCs(new_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Group_release(new_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    /* free prev group */
    if (prev_failed_group != MPIR_Group_empty) {
        mpi_errno = MPIR_Group_release(prev_failed_group);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_CHECK_FOR_FAILED_PROCS);
    return mpi_errno;

 fn_oom: /* out-of-memory handler for utarray operations */
    MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray");
 fn_fail:
    goto fn_exit;
}
Esempio n. 4
0
int MPIR_Group_translate_ranks_impl(MPIR_Group * gp1, int n, const int ranks1[],
                                    MPIR_Group * gp2, int ranks2[])
{
    int mpi_errno = MPI_SUCCESS;
    int i, g2_idx, l1_pid, l2_pid;

    MPL_DBG_MSG_S(MPIR_DBG_OTHER, VERBOSE, "gp2->is_local_dense_monotonic=%s",
                  (gp2->is_local_dense_monotonic ? "TRUE" : "FALSE"));

    /* Initialize the output ranks */
    for (i = 0; i < n; i++)
        ranks2[i] = MPI_UNDEFINED;

    if (gp2->size > 0 && gp2->is_local_dense_monotonic) {
        /* g2 probably == group_of(MPI_COMM_WORLD); use fast, constant-time lookup */
        int lpid_offset = gp2->lrank_to_lpid[0].lpid;

        MPIR_Assert(lpid_offset >= 0);
        for (i = 0; i < n; ++i) {
            int g1_lpid;

            if (ranks1[i] == MPI_PROC_NULL) {
                ranks2[i] = MPI_PROC_NULL;
                continue;
            }
            /* "adjusted" lpid from g1 */
            g1_lpid = gp1->lrank_to_lpid[ranks1[i]].lpid - lpid_offset;
            if ((g1_lpid >= 0) && (g1_lpid < gp2->size)) {
                ranks2[i] = g1_lpid;
            }
            /* else leave UNDEFINED */
        }
    } else {
        /* general, slow path; lookup time is dependent on the user-provided rank values! */
        g2_idx = gp2->idx_of_first_lpid;
        if (g2_idx < 0) {
            MPII_Group_setup_lpid_list(gp2);
            g2_idx = gp2->idx_of_first_lpid;
        }
        if (g2_idx >= 0) {
            /* g2_idx can be < 0 if the g2 group is empty */
            l2_pid = gp2->lrank_to_lpid[g2_idx].lpid;
            for (i = 0; i < n; i++) {
                if (ranks1[i] == MPI_PROC_NULL) {
                    ranks2[i] = MPI_PROC_NULL;
                    continue;
                }
                l1_pid = gp1->lrank_to_lpid[ranks1[i]].lpid;
                /* Search for this l1_pid in group2.  Use the following
                 * optimization: start from the last position in the lpid list
                 * if possible.  A more sophisticated version could use a
                 * tree based or even hashed search to speed the translation. */
                if (l1_pid < l2_pid || g2_idx < 0) {
                    /* Start over from the beginning */
                    g2_idx = gp2->idx_of_first_lpid;
                    l2_pid = gp2->lrank_to_lpid[g2_idx].lpid;
                }
                while (g2_idx >= 0 && l1_pid > l2_pid) {
                    g2_idx = gp2->lrank_to_lpid[g2_idx].next_lpid;
                    if (g2_idx >= 0)
                        l2_pid = gp2->lrank_to_lpid[g2_idx].lpid;
                    else
                        l2_pid = -1;
                }
                if (l1_pid == l2_pid)
                    ranks2[i] = g2_idx;
            }
        }
    }
    return mpi_errno;
}
Esempio n. 5
0
int MPIDI_CH3_PktPrint_Close( FILE *fp, MPIDI_CH3_Pkt_t *pkt )
{
    MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,TERSE," type ......... MPIDI_CH3_PKT_CLOSE\n");
    MPL_DBG_MSG_S(MPIDI_CH3_DBG_OTHER,TERSE," ack ......... %s\n", pkt->close.ack ? "TRUE" : "FALSE");
    return MPI_SUCCESS;
}