Пример #1
0
int MPIDI_nem_ckpt_pkthandler_init(MPIDI_CH3_PktHandler_Fcn *pktArray[], int arraySize)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);

    /* Check that the array is large enough */
    if (arraySize <= MPIDI_CH3_PKT_END_ALL) {
	MPIU_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_INTERN, "**ch3|pktarraytoosmall");
    }

    pktArray[MPIDI_NEM_PKT_CKPT_MARKER] = pkt_ckpt_marker_handler;

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_CKPT_PKTHANDLER_INIT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Пример #2
0
int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t *pg_p, int pg_rank)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INIT);

    /* Override split_type */
    MPID_Comm_fns = &comm_fns;

    mpi_errno = MPID_nem_init (pg_rank, pg_p, has_parent);
    if (mpi_errno) MPIU_ERR_POP (mpi_errno);

    nemesis_initialized = 1;

    MPIDI_CH3I_my_rank = pg_rank;
    MPIDI_CH3I_my_pg = pg_p;

    /*
     * Initialize Progress Engine
     */
    mpi_errno = MPIDI_CH3I_Progress_init();
    if (mpi_errno) MPIU_ERR_SETFATALANDJUMP (mpi_errno, MPI_ERR_OTHER, "**init_progress");

    for (i = 0; i < pg_p->size; i++)
    {
	mpi_errno = MPIDI_CH3_VC_Init(&pg_p->vct[i]);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INIT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Пример #3
0
int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t * pg, int pg_rank)
{
    int mpi_errno = MPI_SUCCESS;
    int pg_size, threshold, dpm = 0, p;
    char *dpm_str, *value, *conn_info = NULL;
    MPIDI_VC_t *vc;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INIT);
 
    if (MPIDI_CH3_Pkt_size_index[MPIDI_CH3_PKT_CLOSE] != sizeof (MPIDI_CH3_Pkt_close_t))
    {
        MPIU_ERR_SETFATALANDJUMP1(
            mpi_errno,
            MPI_ERR_OTHER,
            "**fail",
            "**fail %s", 
            "Failed sanity check! Packet size table mismatch");
    }
    
    pg_size = MPIDI_PG_Get_size(pg);

    /*Determine to use which connection management*/
    threshold = MPIDI_CH3I_CM_DEFAULT_ON_DEMAND_THRESHOLD;

    /*check ON_DEMAND_THRESHOLD*/
    value = getenv("MV2_ON_DEMAND_THRESHOLD");
    if (value)
    {
        threshold = atoi(value);
    }

    dpm_str = getenv("MV2_SUPPORT_DPM");
    if (dpm_str) {
        dpm = !!atoi(dpm_str);
    }
    MPIDI_CH3I_Process.has_dpm = dpm;
    if(MPIDI_CH3I_Process.has_dpm) {
        setenv("MV2_ENABLE_AFFINITY", "0", 1);
    }

#ifdef _ENABLE_XRC_
    value = getenv ("MV2_USE_XRC");
    if (value) {
        USE_XRC = atoi(value);
        if (USE_XRC) {
            /* Enable on-demand */
            threshold = 1;
        }
    }
#endif /* _ENABLE_XRC_ */
#ifdef _ENABLE_UD_
    if ((value = getenv("MV2_HYBRID_ENABLE_THRESHOLD")) != NULL) {
        rdma_hybrid_enable_threshold = atoi(value);
    }
    if ((value = getenv("MV2_USE_UD_HYBRID")) != NULL) {
        rdma_enable_hybrid = atoi(value);
    }
    if (pg_size < rdma_hybrid_enable_threshold) {
        rdma_enable_hybrid = 0;
    }
#endif

    if (pg_size > threshold || dpm 
#ifdef _ENABLE_XRC_
            || USE_XRC
#endif /* _ENABLE_XRC_ */
#ifdef _ENABLE_UD_
            || rdma_enable_hybrid
#endif
            )
    {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND;
	    MPIDI_CH3I_Process.num_conn = 0;
    }
    else
    {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_BASIC_ALL2ALL;
    }

#if defined(RDMA_CM)
    if (((value = getenv("MV2_USE_RDMA_CM")) != NULL
        || (value = getenv("MV2_USE_IWARP_MODE")) != NULL)
        && atoi(value) && ! dpm) {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_RDMA_CM;
    } else {
        rdma_cm_get_hca_type(&MPIDI_CH3I_RDMA_Process);
    }
#endif /* defined(RDMA_CM) */

    MPIDI_PG_GetConnKVSname(&pg->ch.kvs_name);

#if defined(CKPT)
#if defined(RDMA_CM)
    if (MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_RDMA_CM)
    {
        MPIU_Error_printf("Error: Checkpointing does not work with RDMA CM.\n"
            "Please configure and compile MVAPICH2 with checkpointing disabled "
            "or without support for RDMA CM.\n");
	MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail");
    }
#endif /* defined(RDMA_CM) */

    // Always use CM_ON_DEMAND for Checkpoint/Restart and Migration
    MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND;

#endif /* defined(CKPT) */
#ifdef _ENABLE_UD_
    if (rdma_enable_hybrid) {
        MPIU_Assert(MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_ON_DEMAND);
    }
#endif

    /* save my vc_ptr for easy access */
    MPIDI_PG_Get_vc(pg, pg_rank, &MPIDI_CH3I_Process.vc);

    /* Initialize Progress Engine */
    if ((mpi_errno = MPIDI_CH3I_Progress_init()))
    {
        MPIU_ERR_POP(mpi_errno);
    }

    /* Check for SMP only */
    MPIDI_CH3I_set_smp_only();

    if (!SMP_ONLY) 
    {
        switch (MPIDI_CH3I_Process.cm_type)
        {
            /* allocate rmda memory and set up the queues */
            case MPIDI_CH3I_CM_ON_DEMAND:
#if defined(RDMA_CM)
            case MPIDI_CH3I_CM_RDMA_CM:
#endif /* defined(RDMA_CM) */
                mpi_errno = MPIDI_CH3I_CM_Init(pg, pg_rank, &conn_info);
                if (mpi_errno != MPI_SUCCESS)
                {
                    MPIU_ERR_POP(mpi_errno);
                }
                break;
            default:
                /*call old init to setup all connections*/
                if ((mpi_errno = MPIDI_CH3I_RDMA_init(pg, pg_rank)) != MPI_SUCCESS)
                {
                    MPIU_ERR_POP(mpi_errno);
                }

                /* All vc should be connected */
                for (p = 0; p < pg_size; ++p)
                {
                    MPIDI_PG_Get_vc(pg, p, &vc);
                    vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE;
                }
                break;
        }
    }
#if defined(CKPT)
#if defined(DISABLE_PTMALLOC)
    MPIU_Error_printf("Error: Checkpointing does not work without registration "
        "caching enabled.\nPlease configure and compile MVAPICH2 without checkpointing "
        " or enable registration caching.\n");
    MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail");
#endif /* defined(DISABLE_PTMALLOC) */

    if ((mpi_errno = MPIDI_CH3I_CR_Init(pg, pg_rank, pg_size)))
    {
        MPIU_ERR_POP(mpi_errno);
    }
#endif /* defined(CKPT) */

    /* set connection info for dynamic process management */
    if (conn_info && dpm) {
        mpi_errno = MPIDI_PG_SetConnInfo(pg_rank, (const char *)conn_info);
        if (mpi_errno != MPI_SUCCESS)
        {
            MPIU_ERR_POP(mpi_errno);
        }
    }
    MPIU_Free(conn_info);

    /* Initialize the smp channel */
    if ((mpi_errno = MPIDI_CH3I_SMP_init(pg)))
    {
        MPIU_ERR_POP(mpi_errno);
    }

    if (SMP_INIT)
    {
        for (p = 0; p < pg_size; ++p)
        {
            MPIDI_PG_Get_vc(pg, p, &vc);
	    /* Mark the SMP VC as Idle */
	    if (vc->smp.local_nodes >= 0)
            {
                vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE;
                if (SMP_ONLY) {
                    MPIDI_CH3I_SMP_Init_VC(vc);
                }
#ifdef _ENABLE_XRC_
                VC_XST_SET (vc, XF_SMP_VC);
#endif
            }
        }
    } else {
        extern int enable_shmem_collectives;
        enable_shmem_collectives = SMP_INIT;
    }

    /* Set the eager max msg size now that we know SMP and RDMA are initialized.
     * The max message size is also set during VC initialization, but the state
     * of SMP is unknown at that time.
     */
    for (p = 0; p < pg_size; ++p)
    {
        MPIDI_PG_Get_vc(pg, p, &vc);
        vc->eager_max_msg_sz = MPIDI_CH3_EAGER_MAX_MSG_SIZE(vc);
    }
    
fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INIT);
    return mpi_errno;

fn_fail:
    goto fn_exit;
}
Пример #4
0
int MPIR_Datatype_init_names(void)
{
#ifdef HAVE_ERROR_CHECKING
    static const char FCNAME[] = "MPIR_Datatype_init_names";
#endif
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPID_Datatype *datatype_ptr = NULL;
    MPIU_THREADSAFE_INIT_DECL(needsInit);

    if (needsInit) {
	MPIU_THREADSAFE_INIT_BLOCK_BEGIN(needsInit);
	/* Make sure that the basics have datatype structures allocated
	 * and filled in for them.  They are just integers prior to this
	 * call.
	 */
	mpi_errno = MPIR_Datatype_builtin_fillin();
	if (mpi_errno != MPI_SUCCESS) {
	    MPIU_ERR_POPFATAL(mpi_errno);
	}
	
	/* For each predefined type, ensure that there is a corresponding
	   object and that the object's name is set */
	for (i=0; mpi_names[i].name != 0; i++) {
	    /* The size-specific types may be DATATYPE_NULL, as might be those
	       based on 'long long' and 'long double' if those types were
	       disabled at configure time. */
	    if (mpi_names[i].dtype == MPI_DATATYPE_NULL) continue;

	    MPID_Datatype_get_ptr(mpi_names[i].dtype, datatype_ptr);

	    if (datatype_ptr < MPID_Datatype_builtin ||
		datatype_ptr > MPID_Datatype_builtin + MPID_DATATYPE_N_BUILTIN)
		{
		    MPIU_ERR_SETFATALANDJUMP1(mpi_errno,MPI_ERR_INTERN,
			      "**typeinitbadmem","**typeinitbadmem %d", i);
		}
	    if (!datatype_ptr) {
		MPIU_ERR_SETFATALANDJUMP1(mpi_errno,MPI_ERR_INTERN,
			      "**typeinitfail", "**typeinitfail %d", i - 1);
	    }

	    MPIU_DBG_MSG_FMT(DATATYPE,VERBOSE,(MPIU_DBG_FDEST,
		   "mpi_names[%d].name = %p", i, mpi_names[i].name));

	    MPIU_Strncpy(datatype_ptr->name, mpi_names[i].name,
			 MPI_MAX_OBJECT_NAME);
	}
	/* Handle the minloc/maxloc types */
	for (i=0; mpi_maxloc_names[i].name != 0; i++) {
	    /* types based on 'long long' and 'long double' may be disabled at
	       configure time, and their values set to MPI_DATATYPE_NULL.  skip
	       those types. */
	    if (mpi_maxloc_names[i].dtype == MPI_DATATYPE_NULL) continue;

	    MPID_Datatype_get_ptr(mpi_maxloc_names[i].dtype,
				  datatype_ptr);
	    if (!datatype_ptr) {
		MPIU_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_INTERN, "**typeinitminmaxloc");
	    }
	    MPIU_Strncpy(datatype_ptr->name, mpi_maxloc_names[i].name,
			 MPI_MAX_OBJECT_NAME);
	}
	MPIU_THREADSAFE_INIT_CLEAR(needsInit);
    fn_fail:;
    MPIU_THREADSAFE_INIT_BLOCK_END(needsInit);
    }

    return mpi_errno;
}
Пример #5
0
int MPIDI_CH3U_Post_data_receive_found(MPID_Request * rreq)
{
    int mpi_errno = MPI_SUCCESS;	
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPIDI_msg_sz_t userbuf_sz;
    MPID_Datatype * dt_ptr = NULL;
    MPIDI_msg_sz_t data_sz;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);

    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"posted request found");
	
    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
		
    if (rreq->dev.recv_data_sz <= userbuf_sz) {
	data_sz = rreq->dev.recv_data_sz;
    }
    else {
	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
               "receive buffer too small; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", userbuf_sz="
					    MPIDI_MSG_SZ_FMT,
				 rreq->dev.recv_data_sz, userbuf_sz));
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
                     MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
		     "**truncate", "**truncate %d %d %d %d", 
		     rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, 
		     rreq->dev.recv_data_sz, userbuf_sz );
	rreq->status.count = userbuf_sz;
	data_sz = userbuf_sz;
    }

    if (dt_contig && data_sz == rreq->dev.recv_data_sz)
    {
	/* user buffer is contiguous and large enough to store the
	   entire message.  However, we haven't yet *read* the data 
	   (this code describes how to read the data into the destination) */
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"IOV loaded for contiguous read");
	rreq->dev.iov[0].MPID_IOV_BUF = 
	    (MPID_IOV_BUF_CAST)((char*)(rreq->dev.user_buf) + dt_true_lb);
	rreq->dev.iov[0].MPID_IOV_LEN = data_sz;
	rreq->dev.iov_count = 1;
	/* FIXME: We want to set the OnDataAvail to the appropriate 
	   function, which depends on whether this is an RMA 
	   request or a pt-to-pt request. */
	rreq->dev.OnDataAvail = 0;
    }
    else {
	/* user buffer is not contiguous or is too small to hold
	   the entire message */
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"IOV loaded for non-contiguous read");
	rreq->dev.segment_ptr = MPID_Segment_alloc( );
        MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
	MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, 
			  rreq->dev.datatype, rreq->dev.segment_ptr, 0);
	rreq->dev.segment_first = 0;
	rreq->dev.segment_size = data_sz;
	mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIU_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER,
				     "**ch3|loadrecviov");
	}
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Пример #6
0
int MPIDI_CH3U_Receive_data_found(MPID_Request *rreq, char *buf, MPIDI_msg_sz_t *buflen, int *complete)
{
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPIDI_msg_sz_t userbuf_sz;
    MPID_Datatype * dt_ptr = NULL;
    MPIDI_msg_sz_t data_sz;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);

    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"posted request found");
	
    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
		
    if (rreq->dev.recv_data_sz <= userbuf_sz) {
	data_sz = rreq->dev.recv_data_sz;
    }
    else {
	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
               "receive buffer too small; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", userbuf_sz="
					    MPIDI_MSG_SZ_FMT,
				 rreq->dev.recv_data_sz, userbuf_sz));
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
                     MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
		     "**truncate", "**truncate %d %d %d %d", 
		     rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, 
		     rreq->dev.recv_data_sz, userbuf_sz );
	rreq->status.count = userbuf_sz;
	data_sz = userbuf_sz;
    }

    if (dt_contig && data_sz == rreq->dev.recv_data_sz)
    {
	/* user buffer is contiguous and large enough to store the
	   entire message.  However, we haven't yet *read* the data 
	   (this code describes how to read the data into the destination) */

        /* if all of the data has already been received, unpack it
           now, otherwise build an iov and let the channel unpack */
        if (*buflen >= data_sz)
        {
            MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"Copying contiguous data to user buffer");
            /* copy data out of the receive buffer */
            MPIU_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
            *buflen = data_sz;
            *complete = TRUE;
        }
        else
        {
            MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"IOV loaded for contiguous read");
            
            rreq->dev.iov[0].MPID_IOV_BUF = 
                (MPID_IOV_BUF_CAST)((char*)(rreq->dev.user_buf) + dt_true_lb);
            rreq->dev.iov[0].MPID_IOV_LEN = data_sz;
            rreq->dev.iov_count = 1;
            *buflen = 0;
            *complete = FALSE;
        }
        
	/* FIXME: We want to set the OnDataAvail to the appropriate 
	   function, which depends on whether this is an RMA 
	   request or a pt-to-pt request. */
	rreq->dev.OnDataAvail = 0;
    }
    else {
	/* user buffer is not contiguous or is too small to hold
	   the entire message */
        
	rreq->dev.segment_ptr = MPID_Segment_alloc( );
        MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");

 	MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, 
			  rreq->dev.datatype, rreq->dev.segment_ptr, 0);
	rreq->dev.segment_first = 0;
	rreq->dev.segment_size  = data_sz;

        /* if all of the data has already been received, and the
           message is not truncated, unpack it now, otherwise build an
           iov and let the channel unpack */
        if (data_sz == rreq->dev.recv_data_sz && *buflen >= data_sz)
        {
            MPIDI_msg_sz_t last;
            MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"Copying noncontiguous data to user buffer");
            last = data_sz;
            MPID_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, 
				&last, buf);
            /* --BEGIN ERROR HANDLING-- */
            if (last != data_sz)
            {
                /* If the data can't be unpacked, the we have a
                   mismatch between the datatype and the amount of
                   data received.  Throw away received data. */
                MPIU_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch");
                rreq->status.count = (int)rreq->dev.segment_first;
                *buflen = data_sz;
                *complete = TRUE;
		/* FIXME: Set OnDataAvail to 0?  If not, why not? */
                goto fn_exit;
            }
            /* --END ERROR HANDLING-- */
            *buflen = data_sz;
            rreq->dev.OnDataAvail = 0;
            *complete = TRUE;
        }
        else
        {   
            MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"IOV loaded for non-contiguous read");

            mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIU_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER,
                                         "**ch3|loadrecviov");
            }
            *buflen = 0;
            *complete = FALSE;
        }
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}