Esempio n. 1
0
int MPIDI_VCR_Dup(MPIDI_VCR orig_vcr, MPIDI_VCR * new_vcr)
{
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_VCR_DUP);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_VCR_DUP);

    /* We are allowed to create a vc that belongs to no process group 
     as part of the initial connect/accept action, so in that case,
     ignore the pg ref count update */
    /* XXX DJG FIXME-MT should we be checking this? */
    /* we probably need a test-and-incr operation or equivalent to avoid races */
    if (MPIR_Object_get_ref(orig_vcr) == 0 && orig_vcr->pg) {
	MPIDI_VC_add_ref( orig_vcr );
	MPIDI_VC_add_ref( orig_vcr );
	MPIDI_PG_add_ref( orig_vcr->pg );
    }
    else {
	MPIDI_VC_add_ref(orig_vcr);
    }
    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_REFCOUNT,TYPICAL,(MPL_DBG_FDEST,"Incr VCR %p ref count",orig_vcr));
    *new_vcr = orig_vcr;
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_VCR_DUP);
    return MPI_SUCCESS;
}
Esempio n. 2
0
/*@ 
   MPIDI_PG_Finalize - Finalize the process groups, including freeing all
   process group structures
  @*/
int MPIDI_PG_Finalize(void)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_PG_t *pg, *pgNext;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_FINALIZE);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_FINALIZE);

    /* Print the state of the process groups */
    if (verbose) {
	MPIU_PG_Printall( stdout );
    }

    /* FIXME - straighten out the use of PMI_Finalize - no use after 
       PG_Finalize */
    if (pg_world->connData) {
#ifdef USE_PMI2_API
        mpi_errno = PMI2_Finalize();
        if (mpi_errno) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|pmi_finalize");
#else
	int rc;
	rc = PMI_Finalize();
	if (rc) {
	    MPIR_ERR_SET1(mpi_errno,MPI_ERR_OTHER, 
			  "**ch3|pmi_finalize", 
			  "**ch3|pmi_finalize %d", rc);
	}
#endif
    }

    /* Free the storage associated with the process groups */
    pg = MPIDI_PG_list;
    while (pg) {
	pgNext = pg->next;
	
	/* In finalize, we free all process group information, even if
	   the ref count is not zero.  This can happen if the user
	   fails to use MPI_Comm_disconnect on communicators that
	   were created with the dynamic process routines.*/
        /* XXX DJG FIXME-MT should we be checking this? */
	if (MPIR_Object_get_ref(pg) == 0 || 1) {
	    if (pg == MPIDI_Process.my_pg)
		MPIDI_Process.my_pg = NULL;

	    MPIR_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */
	    MPIDI_PG_Destroy( pg );
	}
	pg     = pgNext;
    }

    /* If COMM_WORLD is still around (it normally should be), 
       try to free it here.  The reason that we need to free it at this 
       point is that comm_world (and comm_self) still exist, and 
       hence the usual process to free the related VC structures will
       not be invoked. */
    if (MPIDI_Process.my_pg) {
	MPIDI_PG_Destroy(MPIDI_Process.my_pg);
    } 
    MPIDI_Process.my_pg = NULL;

    /* ifdefing out this check because the list will not be NULL in 
       Ch3_finalize because
       one additional reference is retained in MPIDI_Process.my_pg. 
       That reference is released
       only after ch3_finalize returns. If I release it before ch3_finalize, 
       the ssm channel crashes. */
#if 0
    if (MPIDI_PG_list != NULL)
    { 
	
	/* --BEGIN ERROR HANDLING-- */
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_INTERN,
        "**dev|pg_finalize|list_not_empty", NULL); 
	/* --END ERROR HANDLING-- */
    }
#endif

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_FINALIZE);
    return mpi_errno;
}
Esempio n. 3
0
int MPIDI_PG_Destroy(MPIDI_PG_t * pg)
{
    MPIDI_PG_t * pg_prev;
    MPIDI_PG_t * pg_cur;
    int i;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_DESTROY);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_DESTROY);

    MPIR_Assert(MPIR_Object_get_ref(pg) == 0);

    pg_prev = NULL;
    pg_cur = MPIDI_PG_list;
    while(pg_cur != NULL)
    {
	if (pg_cur == pg)
	{
	    if (MPIDI_PG_iterator_next == pg)
	    { 
		MPIDI_PG_iterator_next = MPIDI_PG_iterator_next->next;
	    }

            if (pg_prev == NULL)
                MPIDI_PG_list = pg->next; 
            else
                pg_prev->next = pg->next;

            MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT, VERBOSE, (MPL_DBG_FDEST, "destroying pg=%p pg->id=%s", pg, (char *)pg->id));

            for (i = 0; i < pg->size; ++i) {
                /* FIXME it would be good if we could make this assertion.
                   Unfortunately, either:
                   1) We're not being disciplined and some caller of this
                      function doesn't bother to manage all the refcounts
                      because he thinks he knows better.  Annoying, but not
                      strictly a bug.
		      (wdg - actually, that is a bug - managing the ref
		      counts IS required and missing one is a bug.)
                   2) There is a real bug lurking out there somewhere and we
                      just haven't hit it in the tests yet.  */
                /*MPIR_Assert(MPIR_Object_get_ref(pg->vct[i]) == 0);*/

                MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT, VERBOSE, (MPL_DBG_FDEST, "about to free pg->vct=%p which contains vc=%p", pg->vct, &pg->vct[i]));

                /* This used to be handled in MPIDI_VCRT_Release, but that was
                   not the right place to do this.  The VC should only be freed
                   when the PG that it belongs to is freed, not just when the
                   VC's refcount drops to zero. [goodell@ 2008-06-13] */
		/* In that case, the fact that the VC is in the PG should
		   increment the ref count - reflecting the fact that the
		   use in the PG constitutes a reference-count-incrementing
		   use.  Alternately, if the PG is able to recreate a VC, 
		   and can thus free unused (or idle) VCs, it should be allowed
		   to do so.  [wdg 2008-08-31] */
                mpi_errno = MPIDI_CH3_VC_Destroy(&(pg->vct[i]));
                if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
            }

	    MPIDI_PG_Destroy_fn(pg);
	    MPL_free(pg->vct);
	    if (pg->connData) {
		if (pg->freeConnInfo) {
		    (*pg->freeConnInfo)( pg );
		}
		else {
		    MPL_free(pg->connData);
		}
	    }
	    mpi_errno = MPIDI_CH3_PG_Destroy(pg);
	    MPL_free(pg);

	    goto fn_exit;
	}

	pg_prev = pg_cur;
	pg_cur = pg_cur->next;
    }

    /* PG not found if we got here */
    MPIR_ERR_SET1(mpi_errno,MPI_ERR_OTHER,
		  "**dev|pg_not_found", "**dev|pg_not_found %p", pg);

  fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_DESTROY);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Esempio n. 4
0
/* comm shrink impl; assumes that standard error checking has already taken
 * place in the calling function */
int MPIR_Comm_shrink(MPIR_Comm * comm_ptr, MPIR_Comm ** newcomm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Group *global_failed = NULL, *comm_grp = NULL, *new_group_ptr = NULL;
    int attempts = 0;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;

    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_COMM_SHRINK);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_COMM_SHRINK);

    /* TODO - Implement this function for intercommunicators */
    MPIR_Comm_group_impl(comm_ptr, &comm_grp);

    do {
        errflag = MPIR_ERR_NONE;

        MPID_Comm_get_all_failed_procs(comm_ptr, &global_failed, MPIR_SHRINK_TAG);
        /* Ignore the mpi_errno value here as it will definitely communicate
         * with failed procs */

        mpi_errno = MPIR_Group_difference_impl(comm_grp, global_failed, &new_group_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        if (MPIR_Group_empty != global_failed)
            MPIR_Group_release(global_failed);

        mpi_errno = MPIR_Comm_create_group(comm_ptr, new_group_ptr, MPIR_SHRINK_TAG, newcomm_ptr);
        if (*newcomm_ptr == NULL) {
            errflag = MPIR_ERR_PROC_FAILED;
        } else if (mpi_errno) {
            errflag =
                MPIX_ERR_PROC_FAILED ==
                MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
            MPIR_Comm_release(*newcomm_ptr);
        }

        mpi_errno = MPII_Allreduce_group(MPI_IN_PLACE, &errflag, 1, MPI_INT, MPI_MAX, comm_ptr,
                                         new_group_ptr, MPIR_SHRINK_TAG, &errflag);
        MPIR_Group_release(new_group_ptr);

        if (errflag) {
            if (*newcomm_ptr != NULL && MPIR_Object_get_ref(*newcomm_ptr) > 0) {
                MPIR_Object_set_ref(*newcomm_ptr, 1);
                MPIR_Comm_release(*newcomm_ptr);
            }
            if (MPIR_Object_get_ref(new_group_ptr) > 0) {
                MPIR_Object_set_ref(new_group_ptr, 1);
                MPIR_Group_release(new_group_ptr);
            }
        }
    } while (errflag && ++attempts < 5);

    if (errflag && attempts >= 5)
        goto fn_fail;
    else
        mpi_errno = MPI_SUCCESS;

  fn_exit:
    MPIR_Group_release(comm_grp);
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_COMM_SHRINK);
    return mpi_errno;
  fn_fail:
    if (*newcomm_ptr)
        MPIR_Object_set_ref(*newcomm_ptr, 0);
    MPIR_Object_set_ref(global_failed, 0);
    MPIR_Object_set_ref(new_group_ptr, 0);
    goto fn_exit;
}
Esempio n. 5
0
void MPID_nem_dbg_print_all_sendq(FILE *stream)
{
    int i;
    MPIDI_PG_t *pg;
    MPIDI_VC_t *vc;
    MPIDI_PG_iterator iter;

    fprintf(stream, "========================================\n");
    fprintf(stream, "MPI_COMM_WORLD  ctx=%#x rank=%d\n", MPIR_Process.comm_world->context_id, MPIR_Process.comm_world->rank);
    fprintf(stream, "MPI_COMM_SELF   ctx=%#x\n", MPIR_Process.comm_self->context_id);
    if (MPIR_Process.comm_parent) {
        fprintf(stream, "MPI_COMM_PARENT ctx=%#x recvctx=%#x\n",
                MPIR_Process.comm_self->context_id,
                MPIR_Process.comm_parent->recvcontext_id);
    }
    else {
        fprintf(stream, "MPI_COMM_PARENT (NULL)\n");
    }

    MPIDI_PG_Get_iterator(&iter);
    while (MPIDI_PG_Has_next(&iter)) {
        MPIDI_PG_Get_next(&iter, &pg);
        fprintf(stream, "PG ptr=%p size=%d id=%s refcount=%d\n", pg, pg->size, (const char*)pg->id, MPIR_Object_get_ref(pg));
        for (i = 0; i < MPIDI_PG_Get_size(pg); ++i) {
            MPIDI_PG_Get_vc(pg, i, &vc);
            MPID_nem_dbg_print_vc_sendq(stream, vc);
        }
    }

    fprintf(stream, "========================================\n");
}
Esempio n. 6
0
/*@
  MPIDI_CH3U_Handle_connection - handle connection event

Input Parameters:
+ vc - virtual connection
. event - connection event

  NOTE:
  This routine is used to transition the VC state.

  The only events currently handled are TERMINATED events.  This
  routine should be called (with TERMINATED) whenever a connection is
  terminated whether normally (in MPIDI_CH3_Connection_terminate() ),
  or abnormally.

  FIXME: Currently state transitions resulting from receiving CLOSE
  packets are performed in MPIDI_CH3_PktHandler_Close().  Perhaps that
  should move here.
@*/
int MPIDI_CH3U_Handle_connection(MPIDI_VC_t * vc, MPIDI_VC_Event_t event)
{
    int inuse;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);

    switch (event)
    {
	case MPIDI_VC_EVENT_TERMINATED:
	{
	    switch (vc->state)
	    {
		case MPIDI_VC_STATE_CLOSED:
                    /* Normal termination. */
                    MPIDI_CHANGE_VC_STATE(vc, INACTIVE);

		    /* MT: this is not thread safe */
		    MPIDI_Outstanding_close_ops -= 1;
		    MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,
             "outstanding close operations = %d", MPIDI_Outstanding_close_ops);
	    
		    if (MPIDI_Outstanding_close_ops == 0)
		    {
			MPIDI_CH3_Progress_signal_completion();
                        mpi_errno = MPIDI_CH3_Channel_close();
                        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
		    }

		    break;

                case MPIDI_VC_STATE_INACTIVE:
                    /* VC was terminated before it was activated.
                       This can happen if a failed process was
                       detected before the process used the VC. */
                    MPL_DBG_MSG(MPIDI_CH3_DBG_DISCONNECT,TYPICAL, "VC terminated before it was activated.  We probably got a failed"
                                 " process notification.");
                    MPIDI_CH3U_Complete_posted_with_error(vc);
                    ++MPIDI_Failed_vc_count;
                    MPIDI_CHANGE_VC_STATE(vc, MORIBUND);

                    break;

                    
                case MPIDI_VC_STATE_ACTIVE:
                case MPIDI_VC_STATE_REMOTE_CLOSE:
                    /* This is a premature termination.  This process
                       has not started the close protocol.  There may
                       be outstanding sends or receives on the local
                       side, remote side or both. */
                    
                    MPL_DBG_MSG(MPIDI_CH3_DBG_DISCONNECT,TYPICAL, "Connection closed prematurely.");

                    MPIDI_CH3U_Complete_posted_with_error(vc);
                    ++MPIDI_Failed_vc_count;
                    
                    MPIDU_Ftb_publish_vc(MPIDU_FTB_EV_UNREACHABLE, vc);
                    MPIDI_CHANGE_VC_STATE(vc, MORIBUND);

                    break;
                    
                case MPIDI_VC_STATE_LOCAL_CLOSE:
                    /* This is a premature termination.  This process
                       has started the close protocol, but hasn't
                       received a CLOSE packet from the remote side.
                       This process may not have been able to send the
                       CLOSE ack=F packet to the remote side.  There
                       may be outstanding sends or receives on the
                       local or remote sides. */
                case MPIDI_VC_STATE_CLOSE_ACKED:
                    /* This is a premature termination.  Both sides
                       have started the close protocol.  This process
                       has received CLOSE ack=F, but not CLOSE ack=t.
                       This process may not have been able to send
                       CLOSE ack=T.  There should not be any
                       outstanding sends or receives on either
                       side. */

                    MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL, "Connection closed prematurely during close protocol.  "
                                   "Outstanding close operations = %d", MPIDI_Outstanding_close_ops);

                    MPIDI_CH3U_Complete_posted_with_error(vc);
                    ++MPIDI_Failed_vc_count;

                    MPIDU_Ftb_publish_vc(MPIDU_FTB_EV_UNREACHABLE, vc);
                    MPIDI_CHANGE_VC_STATE(vc, MORIBUND);
                    
		    /* MT: this is not thread safe */
		    MPIDI_Outstanding_close_ops -= 1;
	    
		    if (MPIDI_Outstanding_close_ops == 0) {
			MPIDI_CH3_Progress_signal_completion();
                        mpi_errno = MPIDI_CH3_Channel_close();
                        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
		    }
                    
                    break;

		default:
		{
		    MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL, "Unhandled connection state %d when closing connection",vc->state);
		    mpi_errno = MPIR_Err_create_code(
			MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, 
                        MPI_ERR_INTERN, "**ch3|unhandled_connection_state",
			"**ch3|unhandled_connection_state %p %d", vc, vc->state);
                    goto fn_fail;
		    break;
		}
	    }

            /* FIXME: Decrement the reference count?  Who increments? */
            /* FIXME: The reference count is often already 0.  But
               not always */
            /* MPIR_Object_set_ref(vc, 0); ??? */

            /*
             * FIXME: The VC used in connect accept has a NULL 
             * process group
             */
            /* XXX DJG FIXME-MT should we be checking this ref_count? */
            if (vc->pg != NULL && (MPIR_Object_get_ref(vc) == 0))
            {
                /* FIXME: Who increments the reference count that
                   this is decrementing? */
                /* When the reference count for a vc becomes zero, 
                   decrement the reference count
                   of the associated process group.  */
                /* FIXME: This should be done when the reference 
                   count of the vc is first decremented */
                MPIDI_PG_release_ref(vc->pg, &inuse);
                if (inuse == 0) {
                    MPIDI_PG_Destroy(vc->pg);
                }
            }

	    break;
	}
    
	default:
	{
	    break;
	}
    }

fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_CONNECTION);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Esempio n. 7
0
int MPIDI_VCRT_Release(struct MPIDI_VCRT *vcrt, int isDisconnect )
{
    int in_use;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_VCRT_RELEASE);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_VCRT_RELEASE);

    MPIR_Object_release_ref(vcrt, &in_use);
    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_REFCOUNT,TYPICAL,(MPL_DBG_FDEST, "Decr VCRT %p ref count",vcrt));
    
    /* If this VC reference table is no longer in use, we can
       decrement the reference count of each of the VCs.  If the
       count on the VCs goes to zero, then we can decrement the
       ref count on the process group and so on. 
    */
    if (!in_use) {
	int i, inuse;

	for (i = 0; i < vcrt->size; i++)
	{
	    MPIDI_VC_t * const vc = vcrt->vcr_table[i];
	    
	    MPIDI_VC_release_ref(vc, &in_use);

            /* Dynamic connections start with a refcount of 2 instead of 1.
             * That way we can distinguish between an MPI_Free and an
             * MPI_Comm_disconnect. */
            /* XXX DJG FIXME-MT should we be checking this? */
            /* probably not, need to do something like the following instead: */
#if 0
            if (isDisconnect) {
                MPIR_Assert(in_use);
                /* FIXME this is still bogus, the VCRT may contain a mix of
                 * dynamic and non-dynamic VCs, so the ref_count isn't
                 * guaranteed to have started at 2.  The best thing to do might
                 * be to avoid overloading the reference counting this way and
                 * use a separate check for dynamic VCs (another flag? compare
                 * PGs?) */
                MPIR_Object_release_ref(vc, &in_use);
            }
#endif
	    if (isDisconnect && MPIR_Object_get_ref(vc) == 1) {
		MPIDI_VC_release_ref(vc, &in_use);
	    }

	    if (!in_use)
	    {
		/* If the VC is myself then skip the close message */
		if (vc->pg == MPIDI_Process.my_pg && 
		    vc->pg_rank == MPIDI_Process.my_pg_rank)
		{
                    MPIDI_PG_release_ref(vc->pg, &inuse);
                    if (inuse == 0)
                    {
                        MPIDI_PG_Destroy(vc->pg);
                    }
		    continue;
		}
		
		/* FIXME: the correct test is ACTIVE or REMOTE_CLOSE */
		/*if (vc->state != MPIDI_VC_STATE_INACTIVE) { */
		if (vc->state == MPIDI_VC_STATE_ACTIVE ||
		    vc->state == MPIDI_VC_STATE_REMOTE_CLOSE)
		{
		    MPIDI_CH3U_VC_SendClose( vc, i );
		}
		else
		{
                    MPIDI_PG_release_ref(vc->pg, &inuse);
                    if (inuse == 0)
                    {
                        MPIDI_PG_Destroy(vc->pg);
                    }

		    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
                            "vc=%p: not sending a close to %d, vc in state %s",
			     vc, i, MPIDI_VC_GetStateString(vc->state)));
		}

                /* NOTE: we used to * MPIDI_CH3_VC_Destroy(&(pg->vct[i])))
                   here but that is incorrect.  According to the standard, it's
                   entirely possible (likely even) that this VC might still be
                   connected.  VCs are now destroyed when the PG that "owns"
                   them is destroyed (see MPIDI_PG_Destroy). [goodell@ 2008-06-13] */
	    }
	}

	MPL_free(vcrt);
    }

 fn_exit:    
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_VCRT_RELEASE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}