예제 #1
0
int MPIR_Comm_commit(MPID_Comm * comm)
{
    int mpi_errno = MPI_SUCCESS;
    int num_local = -1, num_external = -1;
    int local_rank = -1, external_rank = -1;
    int *local_procs = NULL, *external_procs = NULL;
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT);

    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COMMIT);

    /* It's OK to relax these assertions, but we should do so very
     * intentionally.  For now this function is the only place that we create
     * our hierarchy of communicators */
    MPIU_Assert(comm->node_comm == NULL);
    MPIU_Assert(comm->node_roots_comm == NULL);

    mpi_errno = set_collops(comm);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    /* Notify device of communicator creation */
    mpi_errno = MPID_Dev_comm_create_hook(comm);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    MPIR_Comm_map_free(comm);

    if (comm->comm_kind == MPID_INTRACOMM) {

        mpi_errno = MPIU_Find_local_and_external(comm,
                                                 &num_local, &local_rank, &local_procs,
                                                 &num_external, &external_rank, &external_procs,
                                                 &comm->intranode_table, &comm->internode_table);
        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno) {
            if (MPIR_Err_is_fatal(mpi_errno))
                MPIR_ERR_POP(mpi_errno);

            /* Non-fatal errors simply mean that this communicator will not have
             * any node awareness.  Node-aware collectives are an optimization. */
            MPIU_DBG_MSG_P(COMM, VERBOSE, "MPIU_Find_local_and_external failed for comm_ptr=%p",
                           comm);
            if (comm->intranode_table)
                MPIU_Free(comm->intranode_table);
            if (comm->internode_table)
                MPIU_Free(comm->internode_table);

            mpi_errno = MPI_SUCCESS;
            goto fn_exit;
        }
        /* --END ERROR HANDLING-- */

        /* defensive checks */
        MPIU_Assert(num_local > 0);
        MPIU_Assert(num_local > 1 || external_rank >= 0);
        MPIU_Assert(external_rank < 0 || external_procs != NULL);

        /* if the node_roots_comm and comm would be the same size, then creating
         * the second communicator is useless and wasteful. */
        if (num_external == comm->remote_size) {
            MPIU_Assert(num_local == 1);
            goto fn_exit;
        }

        /* we don't need a local comm if this process is the only one on this node */
        if (num_local > 1) {
            mpi_errno = MPIR_Comm_create(&comm->node_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            comm->node_comm->context_id = comm->context_id + MPID_CONTEXT_INTRANODE_OFFSET;
            comm->node_comm->recvcontext_id = comm->node_comm->context_id;
            comm->node_comm->rank = local_rank;
            comm->node_comm->comm_kind = MPID_INTRACOMM;
            comm->node_comm->hierarchy_kind = MPID_HIERARCHY_NODE;
            comm->node_comm->local_comm = NULL;
            MPIU_DBG_MSG_D(CH3_OTHER, VERBOSE, "Create node_comm=%p\n", comm->node_comm);

            comm->node_comm->local_size = num_local;
            comm->node_comm->remote_size = num_local;

            MPIR_Comm_map_irregular(comm->node_comm, comm, local_procs,
                                    num_local, MPIR_COMM_MAP_DIR_L2L, NULL);

            mpi_errno = set_collops(comm->node_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            /* Notify device of communicator creation */
            mpi_errno = MPID_Dev_comm_create_hook(comm->node_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            /* don't call MPIR_Comm_commit here */

            MPIR_Comm_map_free(comm->node_comm);
        }


        /* this process may not be a member of the node_roots_comm */
        if (local_rank == 0) {
            mpi_errno = MPIR_Comm_create(&comm->node_roots_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            comm->node_roots_comm->context_id = comm->context_id + MPID_CONTEXT_INTERNODE_OFFSET;
            comm->node_roots_comm->recvcontext_id = comm->node_roots_comm->context_id;
            comm->node_roots_comm->rank = external_rank;
            comm->node_roots_comm->comm_kind = MPID_INTRACOMM;
            comm->node_roots_comm->hierarchy_kind = MPID_HIERARCHY_NODE_ROOTS;
            comm->node_roots_comm->local_comm = NULL;

            comm->node_roots_comm->local_size = num_external;
            comm->node_roots_comm->remote_size = num_external;

            MPIR_Comm_map_irregular(comm->node_roots_comm, comm,
                                    external_procs, num_external, MPIR_COMM_MAP_DIR_L2L, NULL);

            mpi_errno = set_collops(comm->node_roots_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            /* Notify device of communicator creation */
            mpi_errno = MPID_Dev_comm_create_hook(comm->node_roots_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            /* don't call MPIR_Comm_commit here */

            MPIR_Comm_map_free(comm->node_roots_comm);
        }

        comm->hierarchy_kind = MPID_HIERARCHY_PARENT;
    }

  fn_exit:
    if (external_procs != NULL)
        MPIU_Free(external_procs);
    if (local_procs != NULL)
        MPIU_Free(local_procs);

    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COMMIT);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
예제 #2
0
static int MPIDI_CH3I_Initialize_tmp_comm(MPID_Comm **comm_pptr, 
					  MPIDI_VC_t *vc_ptr, int is_low_group, int context_id_offset)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm *tmp_comm, *commself_ptr;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);

    MPID_Comm_get_ptr( MPI_COMM_SELF, commself_ptr );

    /* WDG-old code allocated a context id that was then discarded */
    mpi_errno = MPIR_Comm_create(&tmp_comm);
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_POP(mpi_errno);
    }
    /* fill in all the fields of tmp_comm. */

    /* We use the second half of the context ID bits for dynamic
     * processes. This assumes that the context ID mask array is made
     * up of uint32_t's. */
    /* FIXME: This code is still broken for the following case:
     * If the same process opens connections to the multiple
     * processes, this context ID might get out of sync.
     */
    tmp_comm->context_id     = MPID_CONTEXT_SET_FIELD(DYNAMIC_PROC, context_id_offset, 1);
    tmp_comm->recvcontext_id = tmp_comm->context_id;

    /* sanity: the INVALID context ID value could potentially conflict with the
     * dynamic proccess space */
    MPIU_Assert(tmp_comm->context_id     != MPIR_INVALID_CONTEXT_ID);
    MPIU_Assert(tmp_comm->recvcontext_id != MPIR_INVALID_CONTEXT_ID);

    /* FIXME - we probably need a unique context_id. */
    tmp_comm->remote_size = 1;

    /* Fill in new intercomm */
    tmp_comm->local_size   = 1;
    tmp_comm->rank         = 0;
    tmp_comm->comm_kind    = MPID_INTERCOMM;
    tmp_comm->local_comm   = NULL;
    tmp_comm->is_low_group = is_low_group;

    /* No pg structure needed since vc has already been set up 
       (connection has been established). */

    /* Point local vcr, vcrt at those of commself_ptr */
    /* FIXME: Explain why */
    tmp_comm->local_vcrt = commself_ptr->vcrt;
    MPID_VCRT_Add_ref(commself_ptr->vcrt);
    tmp_comm->local_vcr  = commself_ptr->vcr;

    /* No pg needed since connection has already been formed. 
       FIXME - ensure that the comm_release code does not try to
       free an unallocated pg */

    /* Set up VC reference table */
    mpi_errno = MPID_VCRT_Create(tmp_comm->remote_size, &tmp_comm->vcrt);
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**init_vcrt");
    }
    mpi_errno = MPID_VCRT_Get_ptr(tmp_comm->vcrt, &tmp_comm->vcr);
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**init_getptr");
    }

    /* FIXME: Why do we do a dup here? */
    MPID_VCR_Dup(vc_ptr, tmp_comm->vcr);

    /* Even though this is a tmp comm and we don't call
       MPI_Comm_commit, we still need to call the creation hook
       because the destruction hook will be called in comm_release */
    mpi_errno = MPID_Dev_comm_create_hook(tmp_comm);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    
    *comm_pptr = tmp_comm;

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_INITIALIZE_TMP_COMM);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
예제 #3
0
/* comm create impl for intracommunicators, assumes that the standard error
 * checking has already taken place in the calling function */
PMPI_LOCAL int MPIR_Comm_create_intra(MPID_Comm *comm_ptr, MPID_Group *group_ptr, MPI_Comm *newcomm)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Context_id_t new_context_id = 0;
    MPID_Comm *newcomm_ptr = NULL;
    int *mapping = NULL;
    int n;
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTRA);

    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTRA);

    MPIU_Assert(comm_ptr->comm_kind == MPID_INTRACOMM);

    n = group_ptr->size;
    *newcomm = MPI_COMM_NULL;

    /* Create a new communicator from the specified group members */

    /* Creating the context id is collective over the *input* communicator,
       so it must be created before we decide if this process is a
       member of the group */
    /* In the multi-threaded case, MPIR_Get_contextid assumes that the
       calling routine already holds the single criticial section */
    /* TODO should be converted to use MPIR_Get_contextid_sparse instead */
    mpi_errno = MPIR_Get_contextid( comm_ptr, &new_context_id );
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(new_context_id != 0);

    if (group_ptr->rank != MPI_UNDEFINED) {
        MPID_VCR *mapping_vcr = NULL;

        mpi_errno = MPIR_Comm_create_calculate_mapping(group_ptr, comm_ptr, 
						       &mapping_vcr, &mapping);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        /* Get the new communicator structure and context id */

        mpi_errno = MPIR_Comm_create( &newcomm_ptr );
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        newcomm_ptr->recvcontext_id = new_context_id;
        newcomm_ptr->rank           = group_ptr->rank;
        newcomm_ptr->comm_kind      = comm_ptr->comm_kind;
        /* Since the group has been provided, let the new communicator know
           about the group */
        newcomm_ptr->local_comm     = 0;
        newcomm_ptr->local_group    = group_ptr;
        MPIR_Group_add_ref( group_ptr );

        newcomm_ptr->remote_group   = group_ptr;
        MPIR_Group_add_ref( group_ptr );
        newcomm_ptr->context_id     = newcomm_ptr->recvcontext_id;
        newcomm_ptr->remote_size    = newcomm_ptr->local_size = n;

        /* Setup the communicator's vc table.  This is for the remote group,
           which is the same as the local group for intracommunicators */
        mpi_errno = MPIR_Comm_create_create_and_map_vcrt(n,
                                                         mapping,
                                                         mapping_vcr,
                                                         &newcomm_ptr->vcrt,
                                                         &newcomm_ptr->vcr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        /* Notify the device of this new communicator */
        MPID_Dev_comm_create_hook( newcomm_ptr );
        mpi_errno = MPIR_Comm_commit(newcomm_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        MPIU_OBJ_PUBLISH_HANDLE(*newcomm, newcomm_ptr->handle);
    }
    else {
        /* This process is not in the group */
        MPIR_Free_contextid( new_context_id );
        new_context_id = 0;
    }

fn_exit:
    if (mapping)
        MPIU_Free(mapping);

    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE_INTRA);
    return mpi_errno;
fn_fail:
    if (newcomm_ptr != NULL) {
        MPIR_Comm_release(newcomm_ptr, 0/*isDisconnect*/);
        new_context_id = 0; /* MPIR_Comm_release frees the new ctx id */
    }
    if (new_context_id != 0)
        MPIR_Free_contextid(new_context_id);
    *newcomm = MPI_COMM_NULL;

    goto fn_exit;
}