Пример #1
0
static int MPIDI_CH3I_SHM_Wins_match(MPIR_Win ** win_ptr, MPIR_Win ** matched_win,
                                     MPI_Aint ** base_shm_offs_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int i, comm_size;
    int node_size, node_rank, shm_node_size;
    int group_diff;
    int base_diff;

    MPIR_Comm *node_comm_ptr = NULL, *shm_node_comm_ptr = NULL;
    int *node_ranks = NULL, *node_ranks_in_shm_node = NULL;
    MPIR_Group *node_group_ptr = NULL, *shm_node_group_ptr = NULL;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    MPI_Aint *base_shm_offs;

    MPIDI_SHM_Win_t *elem = shm_wins_list;

    MPIR_CHKLMEM_DECL(2);
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);

    *matched_win = NULL;
    base_shm_offs = *base_shm_offs_ptr;
    node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
    MPIR_Assert(node_comm_ptr != NULL);
    node_size = node_comm_ptr->local_size;
    node_rank = node_comm_ptr->rank;

    comm_size = (*win_ptr)->comm_ptr->local_size;

    MPIR_CHKLMEM_MALLOC(node_ranks, int *, node_size * sizeof(int), mpi_errno, "node_ranks");
    MPIR_CHKLMEM_MALLOC(node_ranks_in_shm_node, int *, node_size * sizeof(int),
                        mpi_errno, "node_ranks_in_shm_comm");

    for (i = 0; i < node_size; i++) {
        node_ranks[i] = i;
    }

    mpi_errno = MPIR_Comm_group_impl(node_comm_ptr, &node_group_ptr);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    while (elem != NULL) {
        MPIR_Win *shm_win = elem->win;
        if (!shm_win)
            MPIDI_SHM_Wins_next_and_continue(elem);

        /* Compare node_comm.
         *
         * Only support shm if new node_comm is equal to or a subset of shm node_comm.
         * Shm node_comm == a subset of node_comm is not supported, because it means
         * some processes of node_comm cannot be shared, but RMA operation simply checks
         * the node_id of a target process for distinguishing shm target.  */
        shm_node_comm_ptr = shm_win->comm_ptr->node_comm;
        shm_node_size = shm_node_comm_ptr->local_size;

        if (node_size > shm_node_size)
            MPIDI_SHM_Wins_next_and_continue(elem);

        mpi_errno = MPIR_Comm_group_impl(shm_win->comm_ptr, &shm_node_group_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Group_translate_ranks_impl(node_group_ptr, node_size,
                                                    node_ranks, shm_node_group_ptr,
                                                    node_ranks_in_shm_node);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Group_free_impl(shm_node_group_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        shm_node_group_ptr = NULL;

        group_diff = 0;
        for (i = 0; i < node_size; i++) {
            /* not exist in shm_comm->node_comm */
            if (node_ranks_in_shm_node[i] == MPI_UNDEFINED) {
                group_diff = 1;
                break;
            }
        }
        if (group_diff)
            MPIDI_SHM_Wins_next_and_continue(elem);

        /* Gather the offset of base_addr from all local processes. Match only
         * when all of them are included in the shm segment in current shm_win.
         *
         * Note that this collective call must be called after checking the
         * group match in order to guarantee all the local processes can perform
         * this call. */
        base_shm_offs[node_rank] = (MPI_Aint) ((*win_ptr)->base)
            - (MPI_Aint) (shm_win->shm_base_addr);
        mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
                                        base_shm_offs, 1, MPI_AINT, node_comm_ptr, &errflag);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");

        base_diff = 0;
        for (i = 0; i < comm_size; ++i) {
            int i_node_rank = (*win_ptr)->comm_ptr->intranode_table[i];
            if (i_node_rank >= 0) {
                MPIR_Assert(i_node_rank < node_size);

                if (base_shm_offs[i_node_rank] < 0 ||
                    base_shm_offs[i_node_rank] + (*win_ptr)->basic_info_table[i].size >
                    shm_win->shm_segment_len) {
                    base_diff = 1;
                    break;
                }
            }
        }

        if (base_diff)
            MPIDI_SHM_Wins_next_and_continue(elem);

        /* Found the first matched shm_win */
        *matched_win = shm_win;
        break;
    }

  fn_exit:
    if (node_group_ptr != NULL)
        mpi_errno = MPIR_Group_free_impl(node_group_ptr);
    /* Only free it here when group_translate_ranks fails. */
    if (shm_node_group_ptr != NULL)
        mpi_errno = MPIR_Group_free_impl(shm_node_group_ptr);

    MPIR_CHKLMEM_FREEALL();
    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Пример #2
0
/*@
 MPI_Group_translate_ranks - Translates the ranks of processes in one group to
                             those in another group

Input Parameters:
+ group1 - group1 (handle)
. n - number of ranks in  'ranks1' and 'ranks2'  arrays (integer)
. ranks1 - array of zero or more valid ranks in 'group1'
- group2 - group2 (handle)

Output Parameters:
. ranks2 - array of corresponding ranks in group2,  'MPI_UNDEFINED'  when no
  correspondence exists.

  As a special case (see the MPI-2 errata), if the input rank is
  'MPI_PROC_NULL', 'MPI_PROC_NULL' is given as the output rank.

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
@*/
int MPI_Group_translate_ranks(MPI_Group group1, int n, const int ranks1[],
                              MPI_Group group2, int ranks2[])
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Group *group_ptr1 = NULL;
    MPIR_Group *group_ptr2 = NULL;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);

    MPIR_ERRTEST_INITIALIZED_ORDIE();

    /* The routines that setup the group data structures must be executed
     * within a mutex.  As most of the group routines are not performance
     * critical, we simple run these routines within the SINGLE_CS */
    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);

    /* Validate parameters, especially handles needing to be converted */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_ERRTEST_GROUP(group1, mpi_errno);
            MPIR_ERRTEST_GROUP(group2, mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#endif

    /* Convert MPI object handles to object pointers */
    MPIR_Group_get_ptr(group1, group_ptr1);
    MPIR_Group_get_ptr(group2, group_ptr2);

    /* Validate parameters and objects (post conversion) */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            /* Validate group_ptr */
            MPIR_Group_valid_ptr(group_ptr1, mpi_errno);
            MPIR_Group_valid_ptr(group_ptr2, mpi_errno);
            /* If either group_ptr is not valid, it will be reset to null */

            MPIR_ERRTEST_ARGNEG(n, "n", mpi_errno);
            if (group_ptr1) {
                /* Check that the rank entries are valid */
                int size1 = group_ptr1->size;
                int i;
                for (i = 0; i < n; i++) {
                    if ((ranks1[i] < 0 && ranks1[i] != MPI_PROC_NULL) || ranks1[i] >= size1) {
                        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                                         MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                                         MPI_ERR_RANK,
                                                         "**rank", "**rank %d %d",
                                                         ranks1[i], size1);
                        goto fn_fail;
                    }
                }
            }
            MPIR_ERRTEST_ARGNULL(ranks2, "ranks2", mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    mpi_errno = MPIR_Group_translate_ranks_impl(group_ptr1, n, ranks1, group_ptr2, ranks2);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    /* ... end of body of routine ... */

  fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_GROUP_TRANSLATE_RANKS);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    {
        mpi_errno =
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
                                 "**mpi_group_translate_ranks",
                                 "**mpi_group_translate_ranks %G %d %p %G %p", group1, n, ranks1,
                                 group2, ranks2);
    }
    mpi_errno = MPIR_Err_return_comm(NULL, FCNAME, mpi_errno);
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
int MPID_Comm_failed_bitarray(MPID_Comm *comm_ptr, uint32_t **bitarray, int acked)
{
    int mpi_errno = MPI_SUCCESS;
    int size, i;
    uint32_t bit;
    int *failed_procs, *group_procs;
    MPID_Group *failed_group, *comm_group;
    MPIU_CHKLMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_COMM_FAILED_BITARRAY);

    MPIDI_FUNC_ENTER(MPID_STATE_COMM_FAILED_BITARRAY);

    /* TODO - Fix this for intercommunicators */
    size = comm_ptr->local_size;

    /* We can fit sizeof(uint32_t) * 8 ranks in one uint64_t so divide the
     * size by that */
    /* This buffer will be handed back to the calling function so we use a
     * "real" malloc here and expect the caller to free the buffer later. The
     * other buffers in this function are temporary and will be automatically
     * cleaned up at the end of the function. */
    *bitarray = (uint32_t *) MPIU_Malloc(sizeof(uint32_t) * (size / (sizeof(uint32_t) * 8)+1));
    if (!(*bitarray)) {
        fprintf(stderr, "Could not allocate space for bitarray\n");
        PMPI_Abort(MPI_COMM_WORLD, 1);
    }
    for (i = 0; i <= size/(sizeof(uint32_t)*8); i++) *bitarray[i] = 0;

    mpi_errno = MPIDI_CH3U_Check_for_failed_procs();
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    if (acked)
        MPIDI_CH3U_Get_failed_group(comm_ptr->dev.last_ack_rank, &failed_group);
    else
        MPIDI_CH3U_Get_failed_group(-2, &failed_group);

    if (failed_group == MPID_Group_empty) goto fn_exit;


    MPIU_CHKLMEM_MALLOC(group_procs, int *, sizeof(int)*failed_group->size, mpi_errno, "group_procs");
    for (i = 0; i < failed_group->size; i++) group_procs[i] = i;
    MPIU_CHKLMEM_MALLOC(failed_procs, int *, sizeof(int)*failed_group->size, mpi_errno, "failed_procs");

    MPIR_Comm_group_impl(comm_ptr, &comm_group);

    MPIR_Group_translate_ranks_impl(failed_group, failed_group->size, group_procs, comm_group, failed_procs);

    /* The bits will actually be ordered in decending order rather than
     * ascending. This is purely for readability since it makes no practical
     * difference. So if the bits look like this:
     *
     * 10001100 01001000 00000000 00000001
     *
     * Then processes 1, 5, 6, 9, 12, and 32 have failed. */
    for (i = 0; i < failed_group->size; i++) {
        bit = 0x80000000;
        bit >>= failed_procs[i] % (sizeof(uint32_t) * 8);

        *bitarray[failed_procs[i] / (sizeof(uint32_t) * 8)] |= bit;
    }

    MPIR_Group_free_impl(comm_group);

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_FUNC_EXIT(MPID_STATE_COMM_FAILED_BITARRAY);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}