Beispiel #1
0
int free_2level_comm (MPID_Comm* comm_ptr)
{
    MPID_Comm *shmem_comm_ptr=NULL; 
    MPID_Comm *leader_comm_ptr=NULL;
    MPID_Comm *allgather_comm_ptr=NULL;
    int local_rank=0;
    int mpi_errno=MPI_SUCCESS;

    if (comm_ptr->ch.leader_map != NULL)  { 
        MPIU_Free(comm_ptr->ch.leader_map);  
    }
    if (comm_ptr->ch.leader_rank != NULL) { 
        MPIU_Free(comm_ptr->ch.leader_rank); 
    }

    MPID_Comm_get_ptr((comm_ptr->ch.shmem_comm), shmem_comm_ptr );
    MPID_Comm_get_ptr((comm_ptr->ch.leader_comm), leader_comm_ptr );
    if(comm_ptr->ch.allgather_comm_ok == 1)  { 
       MPID_Comm_get_ptr((comm_ptr->ch.allgather_comm), allgather_comm_ptr );
       MPIU_Free(comm_ptr->ch.allgather_new_ranks); 
    } 

    local_rank = shmem_comm_ptr->rank; 

    if(local_rank == 0) { 
        if(comm_ptr->ch.node_sizes != NULL) { 
            MPIU_Free(comm_ptr->ch.node_sizes); 
        } 
    } 
    if (local_rank == 0 && leader_comm_ptr != NULL) { 
        mpi_errno = MPIR_Comm_release(leader_comm_ptr, 0);
        if (mpi_errno != MPI_SUCCESS) { 
            goto fn_fail;
        } 
    }
    if (shmem_comm_ptr != NULL)  { 
        mpi_errno = MPIR_Comm_release(shmem_comm_ptr, 0);
        if (mpi_errno != MPI_SUCCESS) { 
            goto fn_fail;
        } 
     }
    if (allgather_comm_ptr != NULL)  { 
        mpi_errno = MPIR_Comm_release(allgather_comm_ptr, 0);
        if (mpi_errno != MPI_SUCCESS) { 
            goto fn_fail;
        } 
     }

    clear_2level_comm(comm_ptr);
    fn_exit:
       return mpi_errno;
    fn_fail:
       goto fn_exit;
}
Beispiel #2
0
void MPIDI_CH3_Request_destroy(MPID_Request * req)
{
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY);
    
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY);
    MPIU_DBG_MSG_P(CH3_CHANNEL,VERBOSE,
		   "freeing request, handle=0x%08x", req->handle);
    
#ifdef MPICH_DBG_OUTPUT
    /*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);*/
    if (HANDLE_GET_MPI_KIND(req->handle) != MPID_REQUEST)
    {
	int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
                      FCNAME, __LINE__, MPI_ERR_OTHER, 
                      "**invalid_handle", "**invalid_handle %d", req->handle);
	MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
    }
    /* XXX DJG FIXME should we be checking this? */
    /*MPIU_Assert(req->ref_count == 0);*/
    if (req->ref_count != 0)
    {
	int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
                       FCNAME, __LINE__, MPI_ERR_OTHER, 
              "**invalid_refcount", "**invalid_refcount %d", req->ref_count);
	MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
    }
#endif

    /* FIXME: We need a better way to handle these so that we
       do not always need to initialize these fields and check them
       when we destroy a request */
    /* FIXME: We need a way to call these routines ONLY when the 
       related ref count has become zero. */
    if (req->comm != NULL) {
	MPIR_Comm_release(req->comm, 0);
    }

    if (req->greq_fns != NULL) {
        MPIU_Free(req->greq_fns);
    }

    if (req->dev.datatype_ptr != NULL) {
	MPID_Datatype_release(req->dev.datatype_ptr);
    }

    if (req->dev.segment_ptr != NULL) {
	MPID_Segment_free(req->dev.segment_ptr);
    }

    if (MPIDI_Request_get_srbuf_flag(req)) {
	MPIDI_CH3U_SRBuf_free(req);
    }

    MPIU_Handle_obj_free(&MPID_Request_mem, req);
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY);
}
Beispiel #3
0
/*
 * This function does all of the work or either revoking the communciator for
 * the first time or keeping track of an ongoing revocation.
 *
 * comm_ptr  - The communicator being revoked
 * is_remote - If we received the revocation from a remote process, this should
 *             be set to true. This way we'll know to decrement the counter twice
 *             (once for our local revocation and once for the remote).
 */
int MPID_Comm_revoke(MPIR_Comm *comm_ptr, int is_remote)
{
    MPIDI_VC_t *vc;
    MPL_IOV iov[MPL_IOV_LIMIT];
    int mpi_errno = MPI_SUCCESS;
    int i, size, my_rank;
    MPIR_Request *request;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_revoke_t *revoke_pkt = &upkt.revoke;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_COMM_REVOKE);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_COMM_REVOKE);

    if (0 == comm_ptr->revoked) {
        /* Mark the communicator as revoked locally */
        comm_ptr->revoked = 1;
        if (comm_ptr->node_comm) comm_ptr->node_comm->revoked = 1;
        if (comm_ptr->node_roots_comm) comm_ptr->node_roots_comm->revoked = 1;

        /* Start a counter to track how many revoke messages we've received from
         * other ranks */
        comm_ptr->dev.waiting_for_revoke = comm_ptr->local_size - 1 - is_remote; /* Subtract the processes who already know about the revoke */
        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, VERBOSE, (MPL_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke));

        /* Keep a reference to this comm so it doesn't get destroyed while
         * it's being revoked */
        MPIR_Comm_add_ref(comm_ptr);

        /* Send out the revoke message */
        MPIDI_Pkt_init(revoke_pkt, MPIDI_CH3_PKT_REVOKE);
        revoke_pkt->revoked_comm = comm_ptr->context_id;

        size = comm_ptr->remote_size;
        my_rank = comm_ptr->rank;
        for (i = 0; i < size; i++) {
            if (i == my_rank) continue;
            request = NULL;

            MPIDI_Comm_get_vc_set_active(comm_ptr, i, &vc);

            iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) revoke_pkt;
            iov[0].MPL_IOV_LEN = sizeof(*revoke_pkt);

            MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
            mpi_errno = MPIDI_CH3_iStartMsgv(vc, iov, 1, &request);
            MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
            if (mpi_errno) comm_ptr->dev.waiting_for_revoke--;
            if (NULL != request)
                /* We don't need to keep a reference to this request. The
                 * progress engine will keep a reference until it completes
                 * later */
                MPIR_Request_free(request);
        }

        /* Check to see if we are done revoking */
        if (comm_ptr->dev.waiting_for_revoke == 0) {
            MPIR_Comm_release(comm_ptr);
        }

        /* Go clean up all of the existing operations involving this
         * communicator. This includes completing existing MPI requests, MPID
         * requests, and cleaning up the unexpected queue to make sure there
         * aren't any unexpected messages hanging around. */

        /* Clean up the receive and unexpected queues */
        MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
        MPIDI_CH3U_Clean_recvq(comm_ptr);
        MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
    } else if (is_remote)  { /* If this is local, we've already revoked and don't need to do it again. */
        /* Decrement the revoke counter */
        comm_ptr->dev.waiting_for_revoke--;
        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER, VERBOSE, (MPL_DBG_FDEST, "Comm %08x waiting_for_revoke: %d", comm_ptr->handle, comm_ptr->dev.waiting_for_revoke));

        /* Check to see if we are done revoking */
        if (comm_ptr->dev.waiting_for_revoke == 0) {
            MPIR_Comm_release(comm_ptr);
        }
    }

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_COMM_REVOKE);
    return MPI_SUCCESS;
}
Beispiel #4
0
int MPIR_Comm_free_impl(MPID_Comm * comm_ptr)
{
    return MPIR_Comm_release(comm_ptr, 0);
}
Beispiel #5
0
int MPIR_Comm_delete_internal(MPID_Comm * comm_ptr)
{
    int in_use;
    int mpi_errno = MPI_SUCCESS;
    MPID_MPI_STATE_DECL(MPID_STATE_COMM_DELETE_INTERNAL);

    MPID_MPI_FUNC_ENTER(MPID_STATE_COMM_DELETE_INTERNAL);

    MPIU_Assert(MPIU_Object_get_ref(comm_ptr) == 0);    /* sanity check */

    /* Remove the attributes, executing the attribute delete routine.
     * Do this only if the attribute functions are defined.
     * This must be done first, because if freeing the attributes
     * returns an error, the communicator is not freed */
    if (MPIR_Process.attr_free && comm_ptr->attributes) {
        /* Temporarily add a reference to this communicator because
         * the attr_free code requires a valid communicator */
        MPIU_Object_add_ref(comm_ptr);
        mpi_errno = MPIR_Process.attr_free(comm_ptr->handle, &comm_ptr->attributes);
        /* Release the temporary reference added before the call to
         * attr_free */
        MPIU_Object_release_ref(comm_ptr, &in_use);
    }

    /* If the attribute delete functions return failure, the
     * communicator must not be freed.  That is the reason for the
     * test on mpi_errno here. */
    if (mpi_errno == MPI_SUCCESS) {
        /* If this communicator is our parent, and we're disconnecting
         * from the parent, mark that fact */
        if (MPIR_Process.comm_parent == comm_ptr)
            MPIR_Process.comm_parent = NULL;

        /* Notify the device that the communicator is about to be
         * destroyed */
        mpi_errno = MPID_Dev_comm_destroy_hook(comm_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        /* Free info hints */
        if (comm_ptr->info != NULL) {
            MPIU_Info_free(comm_ptr->info);
        }

        /* release our reference to the collops structure, comes after the
         * destroy_hook to allow the device to manage these vtables in a custom
         * fashion */
        if (comm_ptr->coll_fns && --comm_ptr->coll_fns->ref_count == 0) {
            MPIU_Free(comm_ptr->coll_fns);
            comm_ptr->coll_fns = NULL;
        }

        if (comm_ptr->comm_kind == MPID_INTERCOMM && comm_ptr->local_comm)
            MPIR_Comm_release(comm_ptr->local_comm);

        /* Free the local and remote groups, if they exist */
        if (comm_ptr->local_group)
            MPIR_Group_release(comm_ptr->local_group);
        if (comm_ptr->remote_group)
            MPIR_Group_release(comm_ptr->remote_group);

        /* free the intra/inter-node communicators, if they exist */
        if (comm_ptr->node_comm)
            MPIR_Comm_release(comm_ptr->node_comm);
        if (comm_ptr->node_roots_comm)
            MPIR_Comm_release(comm_ptr->node_roots_comm);
        if (comm_ptr->intranode_table != NULL)
            MPIU_Free(comm_ptr->intranode_table);
        if (comm_ptr->internode_table != NULL)
            MPIU_Free(comm_ptr->internode_table);

        /* Free the context value.  This should come after freeing the
         * intra/inter-node communicators since those free calls won't
         * release this context ID and releasing this before then could lead
         * to races once we make threading finer grained. */
        /* This must be the recvcontext_id (i.e. not the (send)context_id)
         * because in the case of intercommunicators the send context ID is
         * allocated out of the remote group's bit vector, not ours. */
        MPIR_Free_contextid(comm_ptr->recvcontext_id);

        /* We need to release the error handler */
        if (comm_ptr->errhandler &&
            !(HANDLE_GET_KIND(comm_ptr->errhandler->handle) == HANDLE_KIND_BUILTIN)) {
            int errhInuse;
            MPIR_Errhandler_release_ref(comm_ptr->errhandler, &errhInuse);
            if (!errhInuse) {
                MPIU_Handle_obj_free(&MPID_Errhandler_mem, comm_ptr->errhandler);
            }
        }

        /* Remove from the list of active communicators if
         * we are supporting message-queue debugging.  We make this
         * conditional on having debugger support since the
         * operation is not constant-time */
        MPIR_COMML_FORGET(comm_ptr);

        /* Check for predefined communicators - these should not
         * be freed */
        if (!(HANDLE_GET_KIND(comm_ptr->handle) == HANDLE_KIND_BUILTIN))
            MPIU_Handle_obj_free(&MPID_Comm_mem, comm_ptr);
    }
    else {
        /* If the user attribute free function returns an error,
         * then do not free the communicator */
        MPIR_Comm_add_ref(comm_ptr);
    }

  fn_exit:
    MPID_MPI_FUNC_EXIT(MPID_STATE_COMM_DELETE_INTERNAL);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #6
0
int MPIR_Intercomm_merge_impl(MPID_Comm *comm_ptr, int high, MPID_Comm **new_intracomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int  local_high, remote_high, new_size;
    MPIU_Context_id_t new_context_id;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_INTERCOMM_MERGE_IMPL);

    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_INTERCOMM_MERGE_IMPL);
    /* Make sure that we have a local intercommunicator */
    if (!comm_ptr->local_comm) {
        /* Manufacture the local communicator */
        mpi_errno = MPIR_Setup_intercomm_localcomm( comm_ptr );
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    /* Find the "high" value of the other group of processes.  This
       will be used to determine which group is ordered first in
       the generated communicator.  high is logical */
    local_high = high;
    if (comm_ptr->rank == 0) {
        /* This routine allows use to use the collective communication
           context rather than the point-to-point context. */
        mpi_errno = MPIC_Sendrecv( &local_high, 1, MPI_INT, 0, 0,
                                      &remote_high, 1, MPI_INT, 0, 0, comm_ptr,
                                      MPI_STATUS_IGNORE, &errflag );
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        
        /* If local_high and remote_high are the same, then order is arbitrary.
           we use the gpids of the rank 0 member of the local and remote
           groups to choose an order in this case. */
        if (local_high == remote_high) {
            MPID_Gpid ingpid, outgpid;
            
            mpi_errno = MPID_GPID_Get( comm_ptr, 0, &ingpid );
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            
            mpi_errno = MPIC_Sendrecv( &ingpid, sizeof(MPID_Gpid), MPI_BYTE, 0, 1,
                                          &outgpid, sizeof(MPID_Gpid), MPI_BYTE, 0, 1, comm_ptr,
                                          MPI_STATUS_IGNORE, &errflag );
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

            /* Note that the gpids cannot be the same because we are
               starting from a valid intercomm */
            int rc = memcmp(&ingpid,&outgpid,sizeof(MPID_Gpid));
            if(rc < 0)
                local_high = 1;
            else if(rc > 0)
                local_high = 0;
            else
                {
                    /* req#3930: The merge algorithm will deadlock if the gpids are inadvertently the
                       same due to implementation bugs in the MPID_GPID_Get() function */
                    MPIU_Assert(rc != 0);
                }
            }
        }

    /*
       All processes in the local group now need to get the
       value of local_high, which may have changed if both groups
       of processes had the same value for high
    */
    mpi_errno = MPIR_Bcast_impl( &local_high, 1, MPI_INT, 0, comm_ptr->local_comm, &errflag );
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");

    mpi_errno = MPIR_Comm_create( new_intracomm_ptr );
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    
    new_size = comm_ptr->local_size + comm_ptr->remote_size;

    /* FIXME: For the intracomm, we need a consistent context id.
       That means that one of the two groups needs to use
       the recvcontext_id and the other must use the context_id */
    if (local_high) {
        (*new_intracomm_ptr)->context_id = comm_ptr->recvcontext_id + 2; /* See below */
    }
    else {
        (*new_intracomm_ptr)->context_id = comm_ptr->context_id + 2; /* See below */
    }
    (*new_intracomm_ptr)->recvcontext_id = (*new_intracomm_ptr)->context_id;
    (*new_intracomm_ptr)->remote_size    = (*new_intracomm_ptr)->local_size   = new_size;
    (*new_intracomm_ptr)->rank           = -1;
    (*new_intracomm_ptr)->comm_kind      = MPID_INTRACOMM;

    /* Now we know which group comes first.  Build the new mapping
       from the existing comm */
    mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr));
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* We've setup a temporary context id, based on the context id
       used by the intercomm.  This allows us to perform the allreduce
       operations within the context id algorithm, since we already
       have a valid (almost - see comm_create_hook) communicator.
    */
    mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr));
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* printf( "About to get context id \n" ); fflush( stdout ); */
    /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the
       calling routine already holds the single criticial section */
    new_context_id = 0;
    mpi_errno = MPIR_Get_contextid_sparse( (*new_intracomm_ptr), &new_context_id, FALSE );
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    MPIU_Assert(new_context_id != 0);

    /* We release this communicator that was involved just to
     * get valid context id and create true one
     */
    mpi_errno = MPIR_Comm_release(*new_intracomm_ptr);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Comm_create( new_intracomm_ptr );
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    (*new_intracomm_ptr)->remote_size    = (*new_intracomm_ptr)->local_size   = new_size;
    (*new_intracomm_ptr)->rank           = -1;
    (*new_intracomm_ptr)->comm_kind      = MPID_INTRACOMM;
    (*new_intracomm_ptr)->context_id = new_context_id;
    (*new_intracomm_ptr)->recvcontext_id = new_context_id;

    mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr));
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr));
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

 fn_exit:
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_INTERCOMM_MERGE_IMPL);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #7
0
/* comm create impl for intracommunicators, assumes that the standard error
 * checking has already taken place in the calling function */
int MPIR_Comm_create_intra(MPIR_Comm *comm_ptr, MPIR_Group *group_ptr,
                           MPIR_Comm **newcomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Context_id_t new_context_id = 0;
    int *mapping = NULL;
    int n;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTRA);

    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTRA);

    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);

    n = group_ptr->size;
    *newcomm_ptr = NULL;

    /* Create a new communicator from the specified group members */

    /* Creating the context id is collective over the *input* communicator,
       so it must be created before we decide if this process is a
       member of the group */
    /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the
       calling routine already holds the single criticial section */
    mpi_errno = MPIR_Get_contextid_sparse( comm_ptr, &new_context_id,
                                           group_ptr->rank == MPI_UNDEFINED );
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    MPIR_Assert(new_context_id != 0);

    if (group_ptr->rank != MPI_UNDEFINED) {
        MPIR_Comm *mapping_comm = NULL;

        mpi_errno = MPII_Comm_create_calculate_mapping(group_ptr, comm_ptr,
                                                       &mapping, &mapping_comm);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        /* Get the new communicator structure and context id */

        mpi_errno = MPIR_Comm_create( newcomm_ptr );
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        (*newcomm_ptr)->recvcontext_id = new_context_id;
        (*newcomm_ptr)->rank           = group_ptr->rank;
        (*newcomm_ptr)->comm_kind      = comm_ptr->comm_kind;
        /* Since the group has been provided, let the new communicator know
           about the group */
        (*newcomm_ptr)->local_comm     = 0;
        (*newcomm_ptr)->local_group    = group_ptr;
        MPIR_Group_add_ref( group_ptr );

        (*newcomm_ptr)->remote_group   = group_ptr;
        MPIR_Group_add_ref( group_ptr );
        (*newcomm_ptr)->context_id     = (*newcomm_ptr)->recvcontext_id;
        (*newcomm_ptr)->remote_size    = (*newcomm_ptr)->local_size = n;
        (*newcomm_ptr)->pof2           = MPL_pof2(n);

        /* Setup the communicator's network address mapping.  This is for the remote group,
           which is the same as the local group for intracommunicators */
        mpi_errno = MPII_Comm_create_map(n, 0,
                                         mapping,
                                         NULL,
                                         mapping_comm,
                                         *newcomm_ptr);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Comm_commit(*newcomm_ptr);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }
    else {
        /* This process is not in the group */
        new_context_id = 0;
    }

fn_exit:
    if (mapping)
        MPL_free(mapping);

    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE_INTRA);
    return mpi_errno;
fn_fail:
    /* --BEGIN ERROR HANDLING-- */
    if (*newcomm_ptr != NULL) {
        MPIR_Comm_release(*newcomm_ptr);
        new_context_id = 0; /* MPIR_Comm_release frees the new ctx id */
    }
    if (new_context_id != 0 && group_ptr->rank != MPI_UNDEFINED) {
        MPIR_Free_contextid(new_context_id);
    }
    /* --END ERROR HANDLING-- */
    goto fn_exit;
}
Beispiel #8
0
/* comm shrink impl; assumes that standard error checking has already taken
 * place in the calling function */
int MPIR_Comm_shrink(MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Group *global_failed = NULL, *comm_grp = NULL, *new_group_ptr = NULL;
    int attempts = 0;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;

    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SHRINK);

    /* TODO - Implement this function for intercommunicators */
    MPIR_Comm_group_impl(comm_ptr, &comm_grp);

    do {
        errflag = MPIR_ERR_NONE;

        MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG);
        /* Ignore the mpi_errno value here as it will definitely communicate
         * with failed procs */

        mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        if (MPIR_Group_empty != global_failed)
            MPIR_Group_release(global_failed);

        mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr);
        if (*newcomm_ptr == NULL) {
            errflag = MPIR_ERR_PROC_FAILED;
        } else if (mpi_errno) {
            errflag =
                MPIX_ERR_PROC_FAILED ==
                MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
            MPIR_Comm_release(*newcomm_ptr);
        }

        mpi_errno = MPII_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr,
                                         new_group_ptr, MPIR_SHRINK_TAG, &errflag);
        MPIR_Group_release(new_group_ptr);

        if (errflag) {
            if (*newcomm_ptr != NULL && MPIR_Object_get_ref(*newcomm_ptr) > 0) {
                MPIR_Object_set_ref(*newcomm_ptr, 1);
                MPIR_Comm_release(*newcomm_ptr);
            }
            if (MPIR_Object_get_ref(new_group_ptr) > 0) {
                MPIR_Object_set_ref(new_group_ptr, 1);
                MPIR_Group_release(new_group_ptr);
            }
        }
    } while (errflag && ++attempts < 5);

    if (errflag && attempts >= 5)
        goto fn_fail;
    else
        mpi_errno = MPI_SUCCESS;

  fn_exit:
    MPIR_Group_release(comm_grp);
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SHRINK);
    return mpi_errno;
  fn_fail:
    if (*newcomm_ptr)
        MPIR_Object_set_ref(*newcomm_ptr, 0);
    MPIR_Object_set_ref(global_failed, 0);
    MPIR_Object_set_ref(new_group_ptr, 0);
    goto fn_exit;
}
/* comm create group impl; assumes that the standard error checking
 * has already taken place in the calling function */
int MPIR_Comm_create_group(MPIR_Comm * comm_ptr, MPIR_Group * group_ptr, int tag,
                           MPIR_Comm ** newcomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Context_id_t new_context_id = 0;
    int *mapping = NULL;
    int n;

    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_GROUP);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_CREATE_GROUP);

    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);

    n = group_ptr->size;
    *newcomm_ptr = NULL;

    /* Create a new communicator from the specified group members */

    if (group_ptr->rank != MPI_UNDEFINED) {
        MPIR_Comm *mapping_comm = NULL;

        /* For this routine, creation of the id is collective over the input
           *group*, so processes not in the group do not participate. */

        mpi_errno = MPIR_Get_contextid_sparse_group( comm_ptr, group_ptr, tag, &new_context_id, 0 );
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        MPIR_Assert(new_context_id != 0);

        mpi_errno = MPII_Comm_create_calculate_mapping(group_ptr, comm_ptr,
                                                       &mapping, &mapping_comm);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        /* Get the new communicator structure and context id */

        mpi_errno = MPIR_Comm_create( newcomm_ptr );
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        (*newcomm_ptr)->recvcontext_id = new_context_id;
        (*newcomm_ptr)->rank           = group_ptr->rank;
        (*newcomm_ptr)->comm_kind      = comm_ptr->comm_kind;
        /* Since the group has been provided, let the new communicator know
           about the group */
        (*newcomm_ptr)->local_comm     = 0;
        (*newcomm_ptr)->local_group    = group_ptr;
        MPIR_Group_add_ref( group_ptr );

        (*newcomm_ptr)->remote_group   = group_ptr;
        MPIR_Group_add_ref( group_ptr );
        (*newcomm_ptr)->context_id     = (*newcomm_ptr)->recvcontext_id;
        (*newcomm_ptr)->remote_size    = (*newcomm_ptr)->local_size = n;

        /* Setup the communicator's vc table.  This is for the remote group,
           which is the same as the local group for intracommunicators */
        mpi_errno = MPII_Comm_create_map(n, 0,
                                         mapping,
                                         NULL,
                                         mapping_comm,
                                         *newcomm_ptr);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Comm_commit(*newcomm_ptr);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }
    else {
        /* This process is not in the group */
        new_context_id = 0;
    }

fn_exit:
    if (mapping)
        MPL_free(mapping);

    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_CREATE_GROUP);
    return mpi_errno;
fn_fail:
    /* --BEGIN ERROR HANDLING-- */
    if (*newcomm_ptr != NULL) {
        MPIR_Comm_release(*newcomm_ptr);
        new_context_id = 0; /* MPIR_Comm_release frees the new ctx id */
    }
    if (new_context_id != 0)
        MPIR_Free_contextid(new_context_id);
    /* --END ERROR HANDLING-- */

    goto fn_exit;
}
Beispiel #10
0
int MPIR_Intercomm_merge_impl(MPIR_Comm * comm_ptr, int high, MPIR_Comm ** new_intracomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int local_high, remote_high, new_size;
    MPIR_Context_id_t new_context_id;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);

    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
    /* Make sure that we have a local intercommunicator */
    if (!comm_ptr->local_comm) {
        /* Manufacture the local communicator */
        mpi_errno = MPII_Setup_intercomm_localcomm(comm_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    }

    /* Find the "high" value of the other group of processes.  This
     * will be used to determine which group is ordered first in
     * the generated communicator.  high is logical */
    local_high = high;
    if (comm_ptr->rank == 0) {
        /* This routine allows use to use the collective communication
         * context rather than the point-to-point context. */
        mpi_errno = MPIC_Sendrecv(&local_high, 1, MPI_INT, 0, 0,
                                  &remote_high, 1, MPI_INT, 0, 0, comm_ptr,
                                  MPI_STATUS_IGNORE, &errflag);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        /* If local_high and remote_high are the same, then order is arbitrary.
         * we use the is_low_group in the intercomm in this case. */
        MPL_DBG_MSG_FMT(MPIR_DBG_COMM, VERBOSE,
                        (MPL_DBG_FDEST, "local_high=%d remote_high=%d is_low_group=%d",
                         local_high, remote_high, comm_ptr->is_low_group));
        if (local_high == remote_high) {
            local_high = !(comm_ptr->is_low_group);
        }
    }

    /*
     * All processes in the local group now need to get the
     * value of local_high, which may have changed if both groups
     * of processes had the same value for high
     */
    mpi_errno = MPIR_Bcast(&local_high, 1, MPI_INT, 0, comm_ptr->local_comm, &errflag);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);
    MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");

    /*
     * For the intracomm, we need a consistent context id.
     * That means that one of the two groups needs to use
     * the recvcontext_id and the other must use the context_id
     * The recvcontext_id is unique on each process, but another
     * communicator may use the context_id. Therefore, we do a small hack.
     * We set both flags indicating a sub-communicator (intra-node and
     * inter-node) to one. This is normally not possible (a communicator
     * is either intra- or inter-node) - which makes this context_id unique.
     *
     */

    new_size = comm_ptr->local_size + comm_ptr->remote_size;

    mpi_errno = MPIR_Comm_create(new_intracomm_ptr);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    if (local_high) {
        (*new_intracomm_ptr)->context_id =
            MPIR_CONTEXT_SET_FIELD(SUBCOMM, comm_ptr->recvcontext_id, 3);
    } else {
        (*new_intracomm_ptr)->context_id = MPIR_CONTEXT_SET_FIELD(SUBCOMM, comm_ptr->context_id, 3);
    }
    (*new_intracomm_ptr)->recvcontext_id = (*new_intracomm_ptr)->context_id;
    (*new_intracomm_ptr)->remote_size = (*new_intracomm_ptr)->local_size = new_size;
    (*new_intracomm_ptr)->pof2 = MPL_pof2(new_size);
    (*new_intracomm_ptr)->rank = -1;
    (*new_intracomm_ptr)->comm_kind = MPIR_COMM_KIND__INTRACOMM;

    /* Now we know which group comes first.  Build the new mapping
     * from the existing comm */
    mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr));
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    /* We've setup a temporary context id, based on the context id
     * used by the intercomm.  This allows us to perform the allreduce
     * operations within the context id algorithm, since we already
     * have a valid (almost - see comm_create_hook) communicator.
     */
    mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr));
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    /* printf("About to get context id \n"); fflush(stdout); */
    /* In the multi-threaded case, MPIR_Get_contextid_sparse assumes that the
     * calling routine already holds the single criticial section */
    new_context_id = 0;
    mpi_errno = MPIR_Get_contextid_sparse((*new_intracomm_ptr), &new_context_id, FALSE);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);
    MPIR_Assert(new_context_id != 0);

    /* We release this communicator that was involved just to
     * get valid context id and create true one
     */
    mpi_errno = MPIR_Comm_release(*new_intracomm_ptr);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Comm_create(new_intracomm_ptr);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    (*new_intracomm_ptr)->remote_size = (*new_intracomm_ptr)->local_size = new_size;
    (*new_intracomm_ptr)->pof2 = MPL_pof2(new_size);
    (*new_intracomm_ptr)->rank = -1;
    (*new_intracomm_ptr)->comm_kind = MPIR_COMM_KIND__INTRACOMM;
    (*new_intracomm_ptr)->context_id = new_context_id;
    (*new_intracomm_ptr)->recvcontext_id = new_context_id;

    mpi_errno = create_and_map(comm_ptr, local_high, (*new_intracomm_ptr));
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPIR_Comm_commit((*new_intracomm_ptr));
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

  fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_KIND__INTERCOMM_MERGE_IMPL);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
/* comm create impl for intracommunicators, assumes that the standard error
 * checking has already taken place in the calling function */
PMPI_LOCAL int MPIR_Comm_create_intra(MPID_Comm *comm_ptr, MPID_Group *group_ptr, MPI_Comm *newcomm)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Context_id_t new_context_id = 0;
    MPID_Comm *newcomm_ptr = NULL;
    int *mapping = NULL;
    int n;
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_CREATE_INTRA);

    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_CREATE_INTRA);

    MPIU_Assert(comm_ptr->comm_kind == MPID_INTRACOMM);

    n = group_ptr->size;
    *newcomm = MPI_COMM_NULL;

    /* Create a new communicator from the specified group members */

    /* Creating the context id is collective over the *input* communicator,
       so it must be created before we decide if this process is a
       member of the group */
    /* In the multi-threaded case, MPIR_Get_contextid assumes that the
       calling routine already holds the single criticial section */
    /* TODO should be converted to use MPIR_Get_contextid_sparse instead */
    mpi_errno = MPIR_Get_contextid( comm_ptr, &new_context_id );
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    MPIU_Assert(new_context_id != 0);

    if (group_ptr->rank != MPI_UNDEFINED) {
        MPID_VCR *mapping_vcr = NULL;

        mpi_errno = MPIR_Comm_create_calculate_mapping(group_ptr, comm_ptr, 
						       &mapping_vcr, &mapping);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        /* Get the new communicator structure and context id */

        mpi_errno = MPIR_Comm_create( &newcomm_ptr );
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        newcomm_ptr->recvcontext_id = new_context_id;
        newcomm_ptr->rank           = group_ptr->rank;
        newcomm_ptr->comm_kind      = comm_ptr->comm_kind;
        /* Since the group has been provided, let the new communicator know
           about the group */
        newcomm_ptr->local_comm     = 0;
        newcomm_ptr->local_group    = group_ptr;
        MPIR_Group_add_ref( group_ptr );

        newcomm_ptr->remote_group   = group_ptr;
        MPIR_Group_add_ref( group_ptr );
        newcomm_ptr->context_id     = newcomm_ptr->recvcontext_id;
        newcomm_ptr->remote_size    = newcomm_ptr->local_size = n;

        /* Setup the communicator's vc table.  This is for the remote group,
           which is the same as the local group for intracommunicators */
        mpi_errno = MPIR_Comm_create_create_and_map_vcrt(n,
                                                         mapping,
                                                         mapping_vcr,
                                                         &newcomm_ptr->vcrt,
                                                         &newcomm_ptr->vcr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        /* Notify the device of this new communicator */
        MPID_Dev_comm_create_hook( newcomm_ptr );
        mpi_errno = MPIR_Comm_commit(newcomm_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        MPIU_OBJ_PUBLISH_HANDLE(*newcomm, newcomm_ptr->handle);
    }
    else {
        /* This process is not in the group */
        MPIR_Free_contextid( new_context_id );
        new_context_id = 0;
    }

fn_exit:
    if (mapping)
        MPIU_Free(mapping);

    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_CREATE_INTRA);
    return mpi_errno;
fn_fail:
    if (newcomm_ptr != NULL) {
        MPIR_Comm_release(newcomm_ptr, 0/*isDisconnect*/);
        new_context_id = 0; /* MPIR_Comm_release frees the new ctx id */
    }
    if (new_context_id != 0)
        MPIR_Free_contextid(new_context_id);
    *newcomm = MPI_COMM_NULL;

    goto fn_exit;
}