Exemplo n.º 1
0
int MPIDI_CH3U_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info *info, MPID_Comm *comm_ptr,
                                  void **base_ptr, MPID_Win **win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm *comm_self_ptr = NULL;
    MPID_Group *group_comm, *group_self;
    int result;
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_SHARED);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_SHARED);

#ifdef HAVE_ERROR_CHECKING
    /* The baseline CH3 implementation only works with MPI_COMM_SELF */
    MPID_Comm_get_ptr( MPI_COMM_SELF, comm_self_ptr );

    mpi_errno = MPIR_Comm_group_impl(comm_ptr, &group_comm);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPIR_Comm_group_impl(comm_self_ptr, &group_self);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPIR_Group_compare_impl(group_comm, group_self, &result);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPIR_Group_free_impl(group_comm);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPIR_Group_free_impl(group_self);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    if (result != MPI_IDENT) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_RMA_SHARED, "**ch3|win_shared_comm");
    }
#endif

    mpi_errno = MPIDI_CH3U_Win_allocate(size, disp_unit, info, comm_ptr,
                                        base_ptr, win_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    MPIU_CHKPMEM_MALLOC((*win_ptr)->shm_base_addrs, void **,
                        1 /* comm_size */ * sizeof(void *),
                        mpi_errno, "(*win_ptr)->shm_base_addrs");

    (*win_ptr)->shm_base_addrs[0] = *base_ptr;

    /* Register the shared memory window free function, which will free the
       memory allocated here. */
    (*win_ptr)->RMAFns.Win_free = MPIDI_SHM_Win_free;

fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_WIN_ALLOCATE_SHARED);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
int MPID_Comm_failure_get_acked(MPID_Comm *comm_ptr, MPID_Group **group_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Group *failed_group, *comm_group;
    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);

    /* Get the group of all failed processes */
    MPIDI_CH3U_Check_for_failed_procs();
    MPIDI_CH3U_Get_failed_group(comm_ptr->dev.last_ack_rank, &failed_group);
    if (failed_group == MPID_Group_empty) {
        *group_ptr = MPID_Group_empty;
        goto fn_exit;
    }

    MPIR_Comm_group_impl(comm_ptr, &comm_group);

    /* Get the intersection of all falied processes in this communicator */
    MPIR_Group_intersection_impl(failed_group, comm_group, group_ptr);

    MPIR_Group_release(comm_group);
    MPIR_Group_release(failed_group);

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_FAILURE_GET_ACKED);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Exemplo n.º 3
0
/* comm shrink impl; assumes that standard error checking has already taken
 * place in the calling function */
int MPIR_Comm_shrink(MPID_Comm *comm_ptr, MPID_Comm **newcomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Group *global_failed, *comm_grp, *new_group_ptr;
    int attempts = 0;
    int errflag = 0, tmp_errflag = 0;

    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK);
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_SHRINK);

    /* TODO - Implement this function for intercommunicators */
    MPIR_Comm_group_impl(comm_ptr, &comm_grp);

    do {
        mpi_errno = MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG);
        /* Ignore the mpi_errno value here as it will definitely communicate
         * with failed procs */

        mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        if (MPID_Group_empty != global_failed) MPIR_Group_release(global_failed);

        mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr);
        errflag = mpi_errno || *newcomm_ptr == NULL;

        mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr,
            new_group_ptr, MPIR_SHRINK_TAG, &tmp_errflag);
        MPIR_Group_release(new_group_ptr);

        if (errflag) MPIU_Object_set_ref(new_group_ptr, 0);
    } while (errflag && ++attempts < 5);

    if (errflag && attempts >= 5) goto fn_fail;
    else mpi_errno = MPI_SUCCESS;

  fn_exit:
    MPIR_Group_release(comm_grp);
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_SHRINK);
    return mpi_errno;
  fn_fail:
    if (*newcomm_ptr) MPIU_Object_set_ref(*newcomm_ptr, 0);
    MPIU_Object_set_ref(global_failed, 0);
    MPIU_Object_set_ref(new_group_ptr, 0);
    goto fn_exit;
}
Exemplo n.º 4
0
pami_result_t MPIDI_Comm_create_from_pami_geom(pami_geometry_range_t  *task_slices,
                                               size_t                  slice_count,
                                               pami_geometry_t        *geometry,
                                               void                  **cookie)
{
  int         mpi_errno = MPI_SUCCESS;
  int         num_tasks = 0;
  int        *ranks     = NULL;
  MPID_Comm  *comm_ptr  = NULL,  *new_comm_ptr  = NULL;
  MPID_Group *group_ptr = NULL,  *new_group_ptr = NULL;
  int i = 0, j = 0;

  /* Get comm_ptr for MPI_COMM_WORLD and get the group_ptr for it */
  MPID_Comm_get_ptr( MPI_COMM_WORLD, comm_ptr );
  mpi_errno = MPIR_Comm_group_impl(comm_ptr, &group_ptr);
  if (mpi_errno) 
  {
    TRACE_ERR("Error while creating group_ptr from MPI_COMM_WORLD in MPIDI_Comm_create_from_pami_geom\n");
    return PAMI_ERROR;
  }

  /* Create the ranks list from the pami_geometry_range_t array */
  for(i = 0; i < slice_count; i++)
  {
    num_tasks += (task_slices[i].hi - task_slices[i].lo) + 1;
  }
  ranks = MPIU_Calloc0(num_tasks, int);
  for(i = 0; i < slice_count; i++)
  {
    int slice_sz = (task_slices[i].hi - task_slices[i].lo) + 1;
    int k = 0;
    for(k = 0; k < slice_sz; k++)
    {
      ranks[j] = task_slices[i].lo + k;
      j++;
    }
  }

  /* Now we have all we need to create the new group. Create it */
  mpi_errno = MPIR_Group_incl_impl(group_ptr, num_tasks, ranks, &new_group_ptr);
  if (mpi_errno) 
  {
    TRACE_ERR("Error while creating new_group_ptr from group_ptr in MPIDI_Comm_create_from_pami_geom\n");
    return PAMI_ERROR;
  }

  /* Now create the communicator using the new_group_ptr */
  mpi_errno = MPIR_Comm_create_intra(comm_ptr, new_group_ptr, &new_comm_ptr);
  if (mpi_errno)
  {
    TRACE_ERR("Error while creating new_comm_ptr from group_ptr in MPIDI_Comm_create_from_pami_geom\n");
    return PAMI_ERROR;
  }

  if(new_comm_ptr)
  {
    /* Get the geometry from the communicator and set the out parameters */
    *geometry = new_comm_ptr->mpid.geometry;
    *cookie   = new_comm_ptr;
  }
  else
  {
    *geometry = PAMI_GEOMETRY_NULL;
    *cookie   = NULL;
  }

  /* Cleanup */
  MPIU_TestFree(&ranks);

  return PAMI_SUCCESS;
}
Exemplo n.º 5
0
int MPIR_Comm_agree(MPIR_Comm *comm_ptr, int *flag)
{
    int mpi_errno = MPI_SUCCESS, mpi_errno_tmp = MPI_SUCCESS;
    MPIR_Group *comm_grp, *failed_grp, *new_group_ptr, *global_failed;
    int result, success = 1;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    int values[2];

    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_AGREE);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_AGREE);

    MPIR_Comm_group_impl(comm_ptr, &comm_grp);

    /* Get the locally known (not acknowledged) group of failed procs */
    mpi_errno = MPID_Comm_failure_get_acked(comm_ptr, &failed_grp);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* First decide on the group of failed procs. */
    mpi_errno = MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_AGREE_TAG);
    if (mpi_errno) errflag = MPIR_ERR_PROC_FAILED;

    mpi_errno = MPIR_Group_compare_impl(failed_grp, global_failed, &result);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* Create a subgroup without the failed procs */
    mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* If that group isn't the same as what we think is failed locally, then
     * mark it as such. */
    if (result == MPI_UNEQUAL || errflag)
        success = 0;

    /* Do an allreduce to decide whether or not anyone thinks the group
     * has changed */
    mpi_errno_tmp = MPIR_Allreduce_group(MPI_IN_PLACE, &success, 1, MPI_INT, MPI_MIN, comm_ptr,
                                         new_group_ptr, MPIR_AGREE_TAG, &errflag);
    if (!success || errflag || mpi_errno_tmp)
        success = 0;

    values[0] = success;
    values[1] = *flag;

    /* Determine both the result of this function (mpi_errno) and the result
     * of flag that will be returned to the user. */
    MPIR_Allreduce_group(MPI_IN_PLACE, values, 2, MPI_INT, MPI_BAND, comm_ptr,
                         new_group_ptr, MPIR_AGREE_TAG, &errflag);
    /* Ignore the result of the operation this time. Everyone will either
     * return a failure because of !success earlier or they will return
     * something useful for flag because of this operation. If there was a new
     * failure in between the first allreduce and the second one, it's ignored
     * here. */

    if (failed_grp != MPIR_Group_empty)
        MPIR_Group_release(failed_grp);
    MPIR_Group_release(new_group_ptr);
    MPIR_Group_release(comm_grp);
    if (global_failed != MPIR_Group_empty)
        MPIR_Group_release(global_failed);

    success = values[0];
    *flag = values[1];

    if (!success) {
        MPIR_ERR_SET(mpi_errno_tmp, MPIX_ERR_PROC_FAILED, "**mpix_comm_agree");
        MPIR_ERR_ADD(mpi_errno, mpi_errno_tmp);
    }

  fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_AGREE);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Exemplo n.º 6
0
/* comm shrink impl; assumes that standard error checking has already taken
 * place in the calling function */
int MPIR_Comm_shrink(MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Group *global_failed = NULL, *comm_grp = NULL, *new_group_ptr = NULL;
    int attempts = 0;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;

    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SHRINK);

    /* TODO - Implement this function for intercommunicators */
    MPIR_Comm_group_impl(comm_ptr, &comm_grp);

    do {
        errflag = MPIR_ERR_NONE;

        MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG);
        /* Ignore the mpi_errno value here as it will definitely communicate
         * with failed procs */

        mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        if (MPIR_Group_empty != global_failed)
            MPIR_Group_release(global_failed);

        mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr);
        if (*newcomm_ptr == NULL) {
            errflag = MPIR_ERR_PROC_FAILED;
        } else if (mpi_errno) {
            errflag =
                MPIX_ERR_PROC_FAILED ==
                MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
            MPIR_Comm_release(*newcomm_ptr);
        }

        mpi_errno = MPII_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr,
                                         new_group_ptr, MPIR_SHRINK_TAG, &errflag);
        MPIR_Group_release(new_group_ptr);

        if (errflag) {
            if (*newcomm_ptr != NULL && MPIR_Object_get_ref(*newcomm_ptr) > 0) {
                MPIR_Object_set_ref(*newcomm_ptr, 1);
                MPIR_Comm_release(*newcomm_ptr);
            }
            if (MPIR_Object_get_ref(new_group_ptr) > 0) {
                MPIR_Object_set_ref(new_group_ptr, 1);
                MPIR_Group_release(new_group_ptr);
            }
        }
    } while (errflag && ++attempts < 5);

    if (errflag && attempts >= 5)
        goto fn_fail;
    else
        mpi_errno = MPI_SUCCESS;

  fn_exit:
    MPIR_Group_release(comm_grp);
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SHRINK);
    return mpi_errno;
  fn_fail:
    if (*newcomm_ptr)
        MPIR_Object_set_ref(*newcomm_ptr, 0);
    MPIR_Object_set_ref(global_failed, 0);
    MPIR_Object_set_ref(new_group_ptr, 0);
    goto fn_exit;
}
Exemplo n.º 7
0
int MPIDI_CH3U_Get_failed_group(int last_rank, MPIR_Group **failed_group)
{
    char *c;
    int i, mpi_errno = MPI_SUCCESS, rank;
    UT_array *failed_procs = NULL;
    MPIR_Group *world_group;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_GET_FAILED_GROUP);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_GET_FAILED_GROUP);

    MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER, VERBOSE, "Getting failed group with %d as last acknowledged\n", last_rank);

    if (-1 == last_rank) {
        MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "No failure acknowledged");
        *failed_group = MPIR_Group_empty;
        goto fn_exit;
    }

    if (*MPIDI_failed_procs_string == '\0') {
        MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, VERBOSE, "Found no failed ranks");
        *failed_group = MPIR_Group_empty;
        goto fn_exit;
    }

    utarray_new(failed_procs, &ut_int_icd);

    /* parse list of failed processes.  This is a comma separated list
       of ranks or ranges of ranks (e.g., "1, 3-5, 11") */
    i = 0;
    c = MPIDI_failed_procs_string;
    while(1) {
        parse_rank(&rank);
        ++i;
        MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER, VERBOSE, "Found failed rank: %d", rank);
        utarray_push_back(failed_procs, &rank);
        MPIDI_last_known_failed = rank;
        MPIR_ERR_CHKINTERNAL(*c != ',' && *c != '\0', mpi_errno, "error parsing failed process list");
        if (*c == '\0' || last_rank == rank)
            break;
        ++c; /* skip ',' */
    }

    /* Create group of failed processes for comm_world.  Failed groups for other
       communicators can be created from this one using group_intersection. */
    mpi_errno = MPIR_Comm_group_impl(MPIR_Process.comm_world, &world_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_incl_impl(world_group, i, ut_int_array(failed_procs), failed_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Group_release(world_group);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_GET_FAILED_GROUP);
    if (failed_procs)
        utarray_free(failed_procs);
    return mpi_errno;
fn_oom:
    MPIR_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "utarray");
fn_fail:
    goto fn_exit;
}
Exemplo n.º 8
0
static int MPIDI_CH3I_SHM_Wins_match(MPIR_Win ** win_ptr, MPIR_Win ** matched_win,
                                     MPI_Aint ** base_shm_offs_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int i, comm_size;
    int node_size, node_rank, shm_node_size;
    int group_diff;
    int base_diff;

    MPIR_Comm *node_comm_ptr = NULL, *shm_node_comm_ptr = NULL;
    int *node_ranks = NULL, *node_ranks_in_shm_node = NULL;
    MPIR_Group *node_group_ptr = NULL, *shm_node_group_ptr = NULL;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    MPI_Aint *base_shm_offs;

    MPIDI_SHM_Win_t *elem = shm_wins_list;

    MPIR_CHKLMEM_DECL(2);
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);

    *matched_win = NULL;
    base_shm_offs = *base_shm_offs_ptr;
    node_comm_ptr = (*win_ptr)->comm_ptr->node_comm;
    MPIR_Assert(node_comm_ptr != NULL);
    node_size = node_comm_ptr->local_size;
    node_rank = node_comm_ptr->rank;

    comm_size = (*win_ptr)->comm_ptr->local_size;

    MPIR_CHKLMEM_MALLOC(node_ranks, int *, node_size * sizeof(int), mpi_errno, "node_ranks");
    MPIR_CHKLMEM_MALLOC(node_ranks_in_shm_node, int *, node_size * sizeof(int),
                        mpi_errno, "node_ranks_in_shm_comm");

    for (i = 0; i < node_size; i++) {
        node_ranks[i] = i;
    }

    mpi_errno = MPIR_Comm_group_impl(node_comm_ptr, &node_group_ptr);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    while (elem != NULL) {
        MPIR_Win *shm_win = elem->win;
        if (!shm_win)
            MPIDI_SHM_Wins_next_and_continue(elem);

        /* Compare node_comm.
         *
         * Only support shm if new node_comm is equal to or a subset of shm node_comm.
         * Shm node_comm == a subset of node_comm is not supported, because it means
         * some processes of node_comm cannot be shared, but RMA operation simply checks
         * the node_id of a target process for distinguishing shm target.  */
        shm_node_comm_ptr = shm_win->comm_ptr->node_comm;
        shm_node_size = shm_node_comm_ptr->local_size;

        if (node_size > shm_node_size)
            MPIDI_SHM_Wins_next_and_continue(elem);

        mpi_errno = MPIR_Comm_group_impl(shm_win->comm_ptr, &shm_node_group_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Group_translate_ranks_impl(node_group_ptr, node_size,
                                                    node_ranks, shm_node_group_ptr,
                                                    node_ranks_in_shm_node);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Group_free_impl(shm_node_group_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        shm_node_group_ptr = NULL;

        group_diff = 0;
        for (i = 0; i < node_size; i++) {
            /* not exist in shm_comm->node_comm */
            if (node_ranks_in_shm_node[i] == MPI_UNDEFINED) {
                group_diff = 1;
                break;
            }
        }
        if (group_diff)
            MPIDI_SHM_Wins_next_and_continue(elem);

        /* Gather the offset of base_addr from all local processes. Match only
         * when all of them are included in the shm segment in current shm_win.
         *
         * Note that this collective call must be called after checking the
         * group match in order to guarantee all the local processes can perform
         * this call. */
        base_shm_offs[node_rank] = (MPI_Aint) ((*win_ptr)->base)
            - (MPI_Aint) (shm_win->shm_base_addr);
        mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
                                        base_shm_offs, 1, MPI_AINT, node_comm_ptr, &errflag);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");

        base_diff = 0;
        for (i = 0; i < comm_size; ++i) {
            int i_node_rank = (*win_ptr)->comm_ptr->intranode_table[i];
            if (i_node_rank >= 0) {
                MPIR_Assert(i_node_rank < node_size);

                if (base_shm_offs[i_node_rank] < 0 ||
                    base_shm_offs[i_node_rank] + (*win_ptr)->basic_info_table[i].size >
                    shm_win->shm_segment_len) {
                    base_diff = 1;
                    break;
                }
            }
        }

        if (base_diff)
            MPIDI_SHM_Wins_next_and_continue(elem);

        /* Found the first matched shm_win */
        *matched_win = shm_win;
        break;
    }

  fn_exit:
    if (node_group_ptr != NULL)
        mpi_errno = MPIR_Group_free_impl(node_group_ptr);
    /* Only free it here when group_translate_ranks fails. */
    if (shm_node_group_ptr != NULL)
        mpi_errno = MPIR_Group_free_impl(shm_node_group_ptr);

    MPIR_CHKLMEM_FREEALL();
    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_SHM_WINS_MATCH);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Exemplo n.º 9
0
/*@

MPI_Comm_group - Accesses the group associated with given communicator

Input Parameters:
. comm - Communicator (handle)

Output Parameters:
. group - Group in communicator (handle)

Notes:
.N COMMNULL

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_COMM
@*/
int MPI_Comm_group(MPI_Comm comm, MPI_Group *group)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm *comm_ptr = NULL;
    MPID_Group *group_ptr;
    MPID_MPI_STATE_DECL(MPID_STATE_MPI_COMM_GROUP);

    MPIR_ERRTEST_INITIALIZED_ORDIE();
    
    MPIU_THREAD_CS_ENTER(ALLFUNC,);
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_COMM_GROUP);
    
    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_COMM(comm, mpi_errno);
	}
        MPID_END_ERROR_CHECKS;
    }
    
#   endif /* HAVE_ERROR_CHECKING */

    /* Convert MPI object handles to object pointers */
    MPID_Comm_get_ptr( comm, comm_ptr );
    
    /* Validate parameters and objects (post conversion) */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            /* Validate comm_ptr */
            MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
	    /* If comm_ptr is not valid, it will be reset to null */
            if (mpi_errno) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */
    
    mpi_errno = MPIR_Comm_group_impl(comm_ptr, &group_ptr);
    if (mpi_errno) goto fn_fail;
    *group = group_ptr->handle;
    /* ... end of body of routine ... */

  fn_exit:
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_COMM_GROUP);
    MPIU_THREAD_CS_EXIT(ALLFUNC,);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_comm_group",
	    "**mpi_comm_group %C %p", comm, group);
    }
#   endif
    mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
int MPID_Comm_failed_bitarray(MPID_Comm *comm_ptr, uint32_t **bitarray, int acked)
{
    int mpi_errno = MPI_SUCCESS;
    int size, i;
    uint32_t bit;
    int *failed_procs, *group_procs;
    MPID_Group *failed_group, *comm_group;
    MPIU_CHKLMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_COMM_FAILED_BITARRAY);

    MPIDI_FUNC_ENTER(MPID_STATE_COMM_FAILED_BITARRAY);

    /* TODO - Fix this for intercommunicators */
    size = comm_ptr->local_size;

    /* We can fit sizeof(uint32_t) * 8 ranks in one uint64_t so divide the
     * size by that */
    /* This buffer will be handed back to the calling function so we use a
     * "real" malloc here and expect the caller to free the buffer later. The
     * other buffers in this function are temporary and will be automatically
     * cleaned up at the end of the function. */
    *bitarray = (uint32_t *) MPIU_Malloc(sizeof(uint32_t) * (size / (sizeof(uint32_t) * 8)+1));
    if (!(*bitarray)) {
        fprintf(stderr, "Could not allocate space for bitarray\n");
        PMPI_Abort(MPI_COMM_WORLD, 1);
    }
    for (i = 0; i <= size/(sizeof(uint32_t)*8); i++) *bitarray[i] = 0;

    mpi_errno = MPIDI_CH3U_Check_for_failed_procs();
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    if (acked)
        MPIDI_CH3U_Get_failed_group(comm_ptr->dev.last_ack_rank, &failed_group);
    else
        MPIDI_CH3U_Get_failed_group(-2, &failed_group);

    if (failed_group == MPID_Group_empty) goto fn_exit;


    MPIU_CHKLMEM_MALLOC(group_procs, int *, sizeof(int)*failed_group->size, mpi_errno, "group_procs");
    for (i = 0; i < failed_group->size; i++) group_procs[i] = i;
    MPIU_CHKLMEM_MALLOC(failed_procs, int *, sizeof(int)*failed_group->size, mpi_errno, "failed_procs");

    MPIR_Comm_group_impl(comm_ptr, &comm_group);

    MPIR_Group_translate_ranks_impl(failed_group, failed_group->size, group_procs, comm_group, failed_procs);

    /* The bits will actually be ordered in decending order rather than
     * ascending. This is purely for readability since it makes no practical
     * difference. So if the bits look like this:
     *
     * 10001100 01001000 00000000 00000001
     *
     * Then processes 1, 5, 6, 9, 12, and 32 have failed. */
    for (i = 0; i < failed_group->size; i++) {
        bit = 0x80000000;
        bit >>= failed_procs[i] % (sizeof(uint32_t) * 8);

        *bitarray[failed_procs[i] / (sizeof(uint32_t) * 8)] |= bit;
    }

    MPIR_Group_free_impl(comm_group);

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_FUNC_EXIT(MPID_STATE_COMM_FAILED_BITARRAY);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}