int MPID_nem_lmt_dma_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPID_Request *sreq)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_pkt_lmt_rts_t * const rts_pkt = (MPID_nem_pkt_lmt_rts_t *)pkt;
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
    
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);

    MPIU_CHKPMEM_MALLOC(sreq->ch.s_cookie, knem_cookie_t *, sizeof(knem_cookie_t), mpi_errno, "s_cookie");

    mpi_errno = send_sreq_data(vc, sreq, sreq->ch.s_cookie);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    MPID_nem_lmt_send_RTS(vc, rts_pkt, sreq->ch.s_cookie, sizeof(knem_cookie_t));

fn_exit:
    MPIU_CHKPMEM_COMMIT();
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT);
    return mpi_errno;
fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}
Example #2
0
int MPIDI_CH3U_Comm_register_destroy_hook(int (*hook_fn)(struct MPID_Comm *, void *), void *param)
{
    int mpi_errno = MPI_SUCCESS;
    hook_elt *elt;
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);

    MPIU_CHKPMEM_MALLOC(elt, hook_elt *, sizeof(hook_elt), mpi_errno, "hook_elt");

    elt->hook_fn = hook_fn;
    elt->param = param;
    
    MPL_LL_PREPEND(destroy_hooks_head, destroy_hooks_tail, elt);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_COMM_REGISTER_DESTROY_HOOK);
    return mpi_errno;
 fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}
Example #3
0
int MPID_nem_mxm_poll(int in_blocking_progress)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *req = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MXM_POLL);
    MPIDI_FUNC_ENTER(MPID_STATE_MXM_POLL);

    while (!MPID_nem_mxm_queue_empty(mxm_obj->sreq_queue)) {
        MPID_nem_mxm_queue_dequeue(&mxm_obj->sreq_queue, &req);
        _mxm_handle_sreq(req);
    }

    mpi_errno = _mxm_poll();
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MXM_POLL);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #4
0
int MPIDI_CH3_Rendezvous_push(MPIDI_VC_t * vc, MPID_Request * sreq)
{
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_PUSH);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_PUSH);


    if (SMP_INIT
        && vc->smp.local_nodes >= 0
        && vc->smp.local_nodes != g_smpi.my_local_id)
    {
        MPIU_Assert(sreq->mrail.protocol == VAPI_PROTOCOL_R3);
        MPIDI_CH3_SMP_Rendezvous_push(vc, sreq);
        return MPI_SUCCESS;
    }

    switch (sreq->mrail.protocol)
    {
    case VAPI_PROTOCOL_RPUT:
            MPIDI_CH3I_MRAILI_Rendezvous_rput_push(vc, sreq);
        break;
    case VAPI_PROTOCOL_RGET:
            MPIDI_CH3I_MRAILI_Rendezvous_rget_push(vc, sreq);
        break;
#ifdef _ENABLE_UD_
    case VAPI_PROTOCOL_UD_ZCOPY:
            MPIDI_CH3I_MRAILI_Rendezvous_zcopy_push(vc, sreq,
                        &(MPIDI_CH3I_RDMA_Process.zcopy_info));
        break;
#endif
    default:
            MPIDI_CH3_Rendezvous_r3_push(vc, sreq);
        break;
    }
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_PUSH);
    return MPI_SUCCESS;
}
Example #5
0
static int append_overflow(int i)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    ptl_me_t me;
    ptl_process_t id_any;
    MPIDI_STATE_DECL(MPID_STATE_APPEND_OVERFLOW);

    MPIDI_FUNC_ENTER(MPID_STATE_APPEND_OVERFLOW);

    MPIU_Assert(i >= 0 && i < NUM_OVERFLOW_ME);
    
    id_any.phys.pid = PTL_PID_ANY;
    id_any.phys.nid = PTL_NID_ANY;

    me.start = overflow_buf[i];
    me.length = OVERFLOW_LENGTH;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = ( PTL_ME_OP_PUT | PTL_ME_MANAGE_LOCAL | PTL_ME_NO_TRUNCATE | PTL_ME_MAY_ALIGN |
                   PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE );
    me.match_id = id_any;
    me.match_bits = 0;
    me.ignore_bits = ~((ptl_match_bits_t)0);
    me.min_free = PTL_LARGE_THRESHOLD;
    
    /* if there is no space to append the entry, process outstanding events and try again */
    ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_OVERFLOW_LIST, (void *)(size_t)i,
                      &overflow_me_handle[i]);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_APPEND_OVERFLOW);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #6
0
int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t *pg_p, int pg_rank)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INIT);

    /* Override split_type */
    MPID_Comm_fns = &comm_fns;

    mpi_errno = MPID_nem_init (pg_rank, pg_p, has_parent);
    if (mpi_errno) MPIR_ERR_POP (mpi_errno);

    nemesis_initialized = 1;

    MPIDI_CH3I_my_rank = pg_rank;
    MPIDI_CH3I_my_pg = pg_p;

    /*
     * Initialize Progress Engine
     */
    mpi_errno = MPIDI_CH3I_Progress_init();
    if (mpi_errno) MPIR_ERR_SETFATALANDJUMP (mpi_errno, MPI_ERR_OTHER, "**init_progress");

    for (i = 0; i < pg_p->size; i++)
    {
        mpi_errno = MPIDI_CH3_VC_Init(&pg_p->vct[i]);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INIT);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Example #7
0
int MPID_GPID_GetAllInComm( MPID_Comm *comm_ptr, int local_size, 
			    MPID_Gpid local_gpids[], int *singlePG )
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    int *gpid = (int*)&local_gpids[0];
    int lastPGID = -1, pgid;
    MPIDI_VCR vc;
    MPIDI_STATE_DECL(MPID_STATE_MPID_GPID_GETALLINCOMM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_GPID_GETALLINCOMM);

    MPIU_Assert(comm_ptr->local_size == local_size);
    
    *singlePG = 1;
    for (i=0; i<comm_ptr->local_size; i++) {
	vc = comm_ptr->dev.vcrt->vcr_table[i];

	/* Get the process group id as an int */
	MPIDI_PG_IdToNum( vc->pg, &pgid );

	*gpid++ = pgid;
	if (lastPGID != pgid) { 
	    if (lastPGID != -1)
		*singlePG = 0;
	    lastPGID = pgid;
	}
	*gpid++ = vc->pg_rank;

        MPIU_DBG_MSG_FMT(COMM,VERBOSE, (MPIU_DBG_FDEST,
                         "pgid=%d vc->pg_rank=%d",
                         pgid, vc->pg_rank));
    }
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_GPID_GETALLINCOMM);
    return mpi_errno;
}
Example #8
0
static int pkt_ckpt_marker_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **req)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_pkt_ckpt_marker_t * const ckpt_pkt = (MPID_nem_pkt_ckpt_marker_t *)pkt;
    MPIDI_STATE_DECL(MPID_STATE_PKT_CKPT_MARKER_HANDLER);

    MPIDI_FUNC_ENTER(MPID_STATE_PKT_CKPT_MARKER_HANDLER);

    if (!checkpointing) {
        mpi_errno = MPIDI_nem_ckpt_start();
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    MPIU_Assert(current_wave == ckpt_pkt->wave);

    --marker_count;

    /* We're checkpointing the shared memory region, so we don't need
       to flush the channels between local processes, only remote
       processes */

    if (marker_count == 0) {
        MPIDI_nem_ckpt_finish_checkpoint = TRUE;
        /* make sure we break out of receive loop into progress loop */
        MPIDI_CH3_Progress_signal_completion();
    }
    
    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    *req = NULL;

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_PKT_CKPT_MARKER_HANDLER);
    return mpi_errno;
fn_fail:

    goto fn_exit;
}
int MPID_nem_lmt_dma_handle_cookie(MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV cookie)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);

    if (cookie.MPID_IOV_LEN == 0 && cookie.MPID_IOV_BUF == NULL) {
        /* req is a send request, we need to initiate another knem request and
           send a COOKIE message back to the receiver indicating the lid
           returned from the ioctl. */
        int complete;
        knem_cookie_t s_cookie;

        /* This function will invoke the OnDataAvail function to load more data. */
        mpi_errno = check_req_complete(vc, req, &complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        /* If we were complete we should have received a DONE message instead
           of a COOKIE message. */
        MPIU_Assert(!complete);

        mpi_errno = do_dma_send(vc, req, req->dev.iov_count, &req->dev.iov[0], &s_cookie);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_nem_lmt_send_COOKIE(vc, req, &s_cookie, sizeof(knem_cookie_t));
    }
    else {
        /* req is a receive request and we need to continue receiving using the
           lid provided in the cookie iov. */
        mpi_errno = MPID_nem_lmt_dma_start_recv(vc, req, cookie);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
    return MPI_SUCCESS;
}
Example #10
0
/**
 * Get the type of a device.
 *
 * @param dev the device.
 * @param ctx the device context.
 * @param hca_type the type (output).
 *
 * @return MPI_SUCCESS if succeded, MPI_ERR_OTHER if failed
 *
 * \see HCA_Type
 */
static inline int get_hca_type (struct ibv_device* dev, struct ibv_context* ctx, HCA_Type* hca_type)
{
    MPIDI_STATE_DECL(MPID_STATE_GET_HCA_TYPE);
    MPIDI_FUNC_ENTER(MPID_STATE_GET_HCA_TYPE);
    int ret;
    int mpi_errno = MPI_SUCCESS;
    struct ibv_device_attr dev_attr;

    memset(&dev_attr, 0, sizeof(struct ibv_device_attr));

    char* dev_name = (char*) ibv_get_device_name(dev);
    if (!dev_name)
    {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ibv_get_device_name");
    }

    ret = ibv_query_device(ctx, &dev_attr);
    if (ret)
    {
        MPIU_ERR_SETANDJUMP1(
            mpi_errno,
            MPI_ERR_OTHER,
            "**ibv_query_device",
            "**ibv_query_device %s",
            dev_name
        );
    }

    if ((mpi_errno = hcaNameToType(dev_name, hca_type)) != MPI_SUCCESS)
    {
        MPIU_ERR_POP(mpi_errno);
    }

fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_GET_HCA_TYPE);
    return mpi_errno;
}
Example #11
0
static int MPIDI_Open_port(MPID_Info *info_ptr, char *port_name)
{
    int mpi_errno = MPI_SUCCESS;
    int str_errno = MPIU_STR_SUCCESS;
    int len;
    int port_name_tag = 0; /* this tag is added to the business card,
                              which is then returned as the port name */
    int myRank = MPIR_Process.comm_world->rank;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_OPEN_PORT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_OPEN_PORT);

    mpi_errno = get_port_name_tag(&port_name_tag);
    MPIR_ERR_CHKANDJUMP(mpi_errno,mpi_errno,MPI_ERR_OTHER,"**argstr_port_name_tag");

    len = MPI_MAX_PORT_NAME;
    str_errno = MPIU_Str_add_int_arg(&port_name, &len,
                                     MPIDI_CH3I_PORT_NAME_TAG_KEY, port_name_tag);
    MPIR_ERR_CHKANDJUMP(str_errno, mpi_errno, MPI_ERR_OTHER, "**argstr_port_name_tag");

    /* This works because Get_business_card uses the same MPIU_Str_xxx 
       functions as above to add the business card to the input string */
    /* FIXME: We should instead ask the mpid_pg routines to give us
       a connection string. There may need to be a separate step to 
       restrict us to a connection information that is only valid for
       connections between processes that are started separately (e.g.,
       may not use shared memory).  We may need a channel-specific 
       function to create an exportable connection string.  */
    mpi_errno = MPIDI_CH3_Get_business_card(myRank, port_name, len);
    MPIU_DBG_MSG_FMT(CH3, VERBOSE, (MPIU_DBG_FDEST, "port_name = %s", port_name));

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_OPEN_PORT);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
int MPIDI_CH3U_Handle_send_req(MPIDI_VC_t * vc, MPID_Request * sreq, 
			       int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);

    /* Use the associated function rather than switching on the old ca field */
    /* Routines can call the attached function directly */
    reqFn = sreq->dev.OnDataAvail;
    if (!reqFn) {
	MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
	MPIDI_CH3U_Request_complete(sreq);
        *complete = 1;
    }
    else {
	mpi_errno = reqFn( vc, sreq, complete );
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
    return mpi_errno;
}
Example #13
0
int MPID_nem_tcp_get_vc_from_conninfo (char *pg_id, int pg_rank, struct MPIDI_VC **vc)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_PG_t *pg;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);

    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "pg_id=%s pg_rank=%d", pg_id, pg_rank));
    
    mpi_errno = MPIDI_PG_Find (pg_id, &pg);
    if (mpi_errno) MPIU_ERR_POP (mpi_errno);

    MPIU_ERR_CHKINTERNAL(pg == NULL, mpi_errno, "invalid PG");
    MPIU_ERR_CHKINTERNAL(pg_rank < 0 || pg_rank > MPIDI_PG_Get_size (pg), mpi_errno, "invalid pg_rank");
        
    MPIDI_PG_Get_vc_set_active (pg, pg_rank, vc);
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_GET_VC_FROM_CONNINFO);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #14
0
int MPIDI_VCR_Dup(MPIDI_VCR orig_vcr, MPIDI_VCR * new_vcr)
{
    MPIDI_STATE_DECL(MPID_STATE_MPID_VCR_DUP);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_VCR_DUP);

    /* We are allowed to create a vc that belongs to no process group 
     as part of the initial connect/accept action, so in that case,
     ignore the pg ref count update */
    /* XXX DJG FIXME-MT should we be checking this? */
    /* we probably need a test-and-incr operation or equivalent to avoid races */
    if (MPIU_Object_get_ref(orig_vcr) == 0 && orig_vcr->pg) {
	MPIDI_VC_add_ref( orig_vcr );
	MPIDI_VC_add_ref( orig_vcr );
	MPIDI_PG_add_ref( orig_vcr->pg );
    }
    else {
	MPIDI_VC_add_ref(orig_vcr);
    }
    MPIU_DBG_MSG_FMT(REFCOUNT,TYPICAL,(MPIU_DBG_FDEST,"Incr VCR %p ref count",orig_vcr));
    *new_vcr = orig_vcr;
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_VCR_DUP);
    return MPI_SUCCESS;
}
Example #15
0
void vbuf_init_rma_put(vbuf *v, void *l_addr, uint32_t lkey,
                       void *r_addr, uint32_t rkey, int len, int rail)
{
    MPIDI_STATE_DECL(MPID_STATE_VBUF_INIT_RMA_PUT);
    MPIDI_FUNC_ENTER(MPID_STATE_VBUF_INIT_RMA_PUT);

    v->desc.u.sr.next = NULL;
    v->desc.u.sr.send_flags = IBV_SEND_SIGNALED;
    v->desc.u.sr.opcode = IBV_WR_RDMA_WRITE;
    v->desc.u.sr.wr_id = (uintptr_t) v;

    v->desc.u.sr.num_sge = 1;
    v->desc.u.sr.wr.rdma.remote_addr = (uintptr_t)(r_addr);
    v->desc.u.sr.wr.rdma.rkey = rkey;

    v->desc.u.sr.sg_list = &(v->desc.sg_entry);
    v->desc.sg_entry.length = len;
    v->desc.sg_entry.lkey = lkey;
    v->desc.sg_entry.addr = (uintptr_t)(l_addr);
    v->padding = RDMA_ONE_SIDED;
    v->rail = rail;

    MPIDI_FUNC_EXIT(MPID_STATE_VBUF_INIT_RMA_PUT);
}
int MPID_Comm_failure_ack(MPID_Comm *comm_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_COMM_FAILURE_ACK);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_COMM_FAILURE_ACK);

    /* Update the list of failed processes that we know about locally.
     * This part could technically be turned off and be a correct
     * implementation, but it would be slower about propagating failure
     * information. Also, this is the failure case so speed isn't as
     * important. */
    MPIDI_CH3U_Check_for_failed_procs();

    /* Update the marker for the last known failed process in this
     * communciator. */
    comm_ptr->dev.last_ack_rank = MPIDI_last_known_failed;

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_COMM_FAILURE_ACK);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Example #17
0
int MPIDI_CH3U_Request_unpack_srbuf(MPID_Request * rreq)
{
    MPI_Aint last;
    int tmpbuf_last;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
    
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);

    tmpbuf_last = (int)(rreq->dev.segment_first + rreq->dev.tmpbuf_sz);
    if (rreq->dev.segment_size < tmpbuf_last)
    {
	tmpbuf_last = (int)rreq->dev.segment_size;
    }
    last = tmpbuf_last;
    MPID_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, 
			&last, rreq->dev.tmpbuf);
    if (last == 0 || last == rreq->dev.segment_first)
    {
	/* --BEGIN ERROR HANDLING-- */
	/* If no data can be unpacked, then we have a datatype processing 
	   problem.  Adjust the segment info so that the remaining
	   data is received and thrown away. */
	MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first);
	rreq->dev.segment_size = rreq->dev.segment_first;
	rreq->dev.segment_first += tmpbuf_last;
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
		       "**dtypemismatch", 0);
	/* --END ERROR HANDLING-- */
    }
    else if (tmpbuf_last == rreq->dev.segment_size)
    {
	/* --BEGIN ERROR HANDLING-- */
	if (last != tmpbuf_last)
	{
	    /* received data was not entirely consumed by unpack() because too
	       few bytes remained to fill the next basic datatype.
	       Note: the segment_first field is set to segment_last so that if
	       this is a truncated message, extra data will be read
	       off the pipe. */
	    MPIR_STATUS_SET_COUNT(rreq->status, last);
	    rreq->dev.segment_size = last;
	    rreq->dev.segment_first = tmpbuf_last;
	    rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		  MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
							  "**dtypemismatch", 0);
	}
	/* --END ERROR HANDLING-- */
    }
    else
    {
	rreq->dev.tmpbuf_off = (int)(tmpbuf_last - last);
	if (rreq->dev.tmpbuf_off > 0)
	{
	    /* move any remaining data to the beginning of the buffer.  
	       Note: memmove() is used since the data regions could
               overlap. */
	    memmove(rreq->dev.tmpbuf, (char *) rreq->dev.tmpbuf + 
		    (last - rreq->dev.segment_first), rreq->dev.tmpbuf_off);
	}
	rreq->dev.segment_first = last;
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
    return mpi_errno;
}
Example #18
0
int MPID_nem_mxm_issend(MPIDI_VC_t * vc, const void *buf, int count, MPI_Datatype datatype,
                        int rank, int tag, MPID_Comm * comm, int context_offset,
                        MPID_Request ** sreq_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq = NULL;
    MPID_Datatype *dt_ptr;
    int dt_contig;
    MPIDI_msg_sz_t data_sz;
    MPI_Aint dt_true_lb;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISSEND);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISSEND);

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    /* create a request */
    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIU_Assert(sreq != NULL);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);
    if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
        MPID_Datatype_get_ptr(datatype, sreq->dev.datatype_ptr);
        MPID_Datatype_add_ref(sreq->dev.datatype_ptr);
    }
    sreq->partner_request = NULL;
    sreq->dev.OnDataAvail = NULL;
    sreq->dev.tmpbuf = NULL;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = FALSE;

    _dbg_mxm_output(5,
                    "isSend ========> Sending USER msg for req %p (context %d to %d tag %d size %d) \n",
                    sreq, comm->context_id + context_offset, rank, tag, data_sz);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area-> ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 0;
    req_area->iov_buf[0].ptr = NULL;
    req_area->iov_buf[0].length = 0;

    if (data_sz) {
        if (dt_contig) {
            req_area->iov_count = 1;
            req_area->iov_buf[0].ptr = (char *) (buf) + dt_true_lb;
            req_area->iov_buf[0].length = data_sz;
        }
        else {
            MPIDI_msg_sz_t last;
            MPI_Aint packsize = 0;

            sreq->ch.noncontig = TRUE;
            sreq->dev.segment_ptr = MPID_Segment_alloc();
            MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER,
                                 "**nomem", "**nomem %s", "MPID_Segment_alloc");
            MPIR_Pack_size_impl(count, datatype, &packsize);

            last = data_sz;
            if (packsize > 0) {
                sreq->dev.tmpbuf = MPIU_Malloc((size_t) packsize);
                MPIU_Assert(sreq->dev.tmpbuf);
                MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
                MPID_Segment_pack(sreq->dev.segment_ptr, 0, &last, sreq->dev.tmpbuf);

                req_area->iov_count = 1;
                req_area->iov_buf[0].ptr = sreq->dev.tmpbuf;
                req_area->iov_buf[0].length = last;
            }
        }
    }

    vc_area->pending_sends += 1;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_SYNC,
                           (mxm_mq_h) comm->dev.ch.netmod_priv, comm->rank, tag, _mxm_tag_mpi2mxm(tag,
                                                                                              comm->context_id
                                                                                              +
                                                                                              context_offset),
                           0);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

    _dbg_mxm_out_req(sreq);

  fn_exit:
    *sreq_ptr = sreq;
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISSEND);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #19
0
int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, MPIDI_msg_sz_t hdr_sz, 
			MPID_Request ** sreq_ptr)
{
    MPID_Request * sreq = NULL;
    MPIDI_CH3I_VC *vcch = &vc->ch;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    
    MPIU_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    /* The SOCK channel uses a fixed length header, the size of which is the 
       maximum of all possible packet headers */
    hdr_sz = sizeof(MPIDI_CH3_Pkt_t);
    MPIU_DBG_STMT(CH3_CHANNEL,VERBOSE,
		  MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */
    {
	/* Connection already formed.  If send queue is empty attempt to send 
	   data, queuing any unsent data. */
	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
	{
	    MPIU_Size_t nb;
	    int rc;

	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send queue empty, attempting to write");
	    
	    MPIU_DBG_PKT(vcch->conn,hdr,"istartmsg");
	    /* MT: need some signalling to lock down our right to use the 
	       channel, thus insuring that the progress engine does
               not also try to write */
	    rc = MPIDU_Sock_write(vcch->sock, hdr, hdr_sz, &nb);
	    if (rc == MPI_SUCCESS)
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
			       "wrote %ld bytes", (unsigned long) nb);
		
		if (nb == hdr_sz)
		{ 
		    MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
				   "entire write complete, " MPIDI_MSG_SZ_FMT " bytes", nb);
		    /* done.  get us out of here as quickly as possible. */
		}
		else
		{
		    MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
                    "partial write of " MPIDI_MSG_SZ_FMT " bytes, request enqueued at head", nb);
		    sreq = create_request(hdr, hdr_sz, nb);
		    if (!sreq) {
			MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
		    }

		    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
		    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
     (MPIU_DBG_FDEST,"posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
		    vcch->conn->send_active = sreq;
		    mpi_errno = MPIDU_Sock_post_write(vcch->conn->sock, sreq->dev.iov[0].MPID_IOV_BUF,
						      sreq->dev.iov[0].MPID_IOV_LEN, sreq->dev.iov[0].MPID_IOV_LEN, NULL);
		    /* --BEGIN ERROR HANDLING-- */
		    if (mpi_errno != MPI_SUCCESS)
		    {
			mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
							 sreq, vcch->conn, vc);
			goto fn_fail;
		    }
		    /* --END ERROR HANDLING-- */
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,TYPICAL,
			       "ERROR - MPIDU_Sock_write failed, rc=%d", rc);
		sreq = MPID_Request_create();
		if (!sreq) {
		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
		}
		sreq->kind = MPID_REQUEST_SEND;
		MPID_cc_set(&(sreq->cc), 0);
		sreq->status.MPI_ERROR = MPIR_Err_create_code( rc,
			       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
			       MPI_ERR_INTERN, "**ch3|sock|writefailed",
			       "**ch3|sock|writefailed %d", rc );
		/* Make sure that the caller sees this error */
		mpi_errno = sreq->status.MPI_ERROR;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send in progress, request enqueued");
	    sreq = create_request(hdr, hdr_sz, 0);
	    if (!sreq) {
		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	    }
	    MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	}
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING) /* MT */
    {
	MPIU_DBG_VCUSE(vc,
		       "connecteding. enqueuing request");
	
	/* queue the data so it can be sent after the connection is formed */
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */
    {
	MPIU_DBG_VCUSE(vc,
		       "unconnected.  posting connect and enqueuing request");
	
	/* queue the data so it can be sent after the connection is formed */
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);

	/* Form a new connection */
	MPIDI_CH3I_VC_post_connect(vc);
    }
    else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPIU_DBG_VCUSE(vc,"forming connection, request enqueued");
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	/* Connection failed, so allocate a request and return an error. */
	MPIU_DBG_VCUSE(vc,"ERROR - connection failed");
	sreq = MPID_Request_create();
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	sreq->kind = MPID_REQUEST_SEND;
	MPID_cc_set(&sreq->cc, 0);
	
	sreq->status.MPI_ERROR = MPIR_Err_create_code( MPI_SUCCESS,
		       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
		       MPI_ERR_INTERN, "**ch3|sock|connectionfailed",0 );
	/* Make sure that the caller sees this error */
	mpi_errno = sreq->status.MPI_ERROR;
    }
    /* --END ERROR HANDLING-- */

  fn_fail:
    *sreq_ptr = sreq;
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    return mpi_errno;
}
int MPIDI_Comm_spawn_multiple(int count, char **commands,
                                  char ***argvs, const int *maxprocs,
                                  MPID_Info **info_ptrs, int root,
                                  MPID_Comm *comm_ptr, MPID_Comm
                                  **intercomm, int *errcodes) 
{
    char port_name[MPI_MAX_PORT_NAME];
    int *info_keyval_sizes=0, i, mpi_errno=MPI_SUCCESS;
    PMI_keyval_t **info_keyval_vectors=0, preput_keyval_vector;
    int *pmi_errcodes = 0, pmi_errno;
    int total_num_processes, should_accept = 1;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_COMM_SPAWN_MULTIPLE);


    if (comm_ptr->rank == root) {
	/* create an array for the pmi error codes */
	total_num_processes = 0;
	for (i=0; i<count; i++) {
	    total_num_processes += maxprocs[i];
	}
	pmi_errcodes = (int*)MPIU_Malloc(sizeof(int) * total_num_processes);
	if (pmi_errcodes == NULL) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}

	/* initialize them to 0 */
	for (i=0; i<total_num_processes; i++)
	    pmi_errcodes[i] = 0;

	/* Open a port for the spawned processes to connect to */
	/* FIXME: info may be needed for port name */
        mpi_errno = MPID_Open_port(NULL, port_name);
	/* --BEGIN ERROR HANDLING-- */
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
	/* --END ERROR HANDLING-- */

	/* Spawn the processes */
#ifdef USE_PMI2_API
        MPIU_Assert(count > 0);
        {
            int *argcs = MPIU_Malloc(count*sizeof(int));
            struct MPID_Info preput;
            struct MPID_Info *preput_p[1] = { &preput };

            MPIU_Assert(argcs);
            /*
            info_keyval_sizes = MPIU_Malloc(count * sizeof(int));
            */

            /* FIXME cheating on constness */
            preput.key = (char *)PARENT_PORT_KVSKEY;
            preput.value = port_name;
            preput.next = NULL;

            /* compute argcs array */
            for (i = 0; i < count; ++i) {
                argcs[i] = 0;
                if (argvs != NULL && argvs[i] != NULL) {
                    while (argvs[i][argcs[i]]) {
                        ++argcs[i];
                    }
                }

                /* a fib for now */
                /*
                info_keyval_sizes[i] = 0;
                */
            }
            /* XXX DJG don't need this, PMI API is thread-safe? */
            /*MPIU_THREAD_CS_ENTER(PMI,);*/
            /* release the global CS for spawn PMI calls */
            MPIU_THREAD_CS_EXIT(ALLFUNC,);
            pmi_errno = PMI2_Job_Spawn(count, (const char **)commands,
                                       argcs, (const char ***)argvs,
                                       maxprocs,
                                       info_keyval_sizes, (const MPID_Info **)info_ptrs,
                                       1, (const struct MPID_Info **)preput_p,
                                       NULL, 0,
                                       /*jobId, jobIdSize,*/ /* XXX DJG job stuff? */
                                       pmi_errcodes);
            MPIU_THREAD_CS_ENTER(ALLFUNC,);
            /*MPIU_THREAD_CS_EXIT(PMI,);*/
            MPIU_Free(argcs);
            if (pmi_errno != PMI2_SUCCESS) {
                MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                     "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno);
            }
        }
#else
        /* FIXME: This is *really* awkward.  We should either
           Fix on MPI-style info data structures for PMI (avoid unnecessary
           duplication) or add an MPIU_Info_getall(...) that creates
           the necessary arrays of key/value pairs */

        /* convert the infos into PMI keyvals */
        info_keyval_sizes   = (int *) MPIU_Malloc(count * sizeof(int));
        info_keyval_vectors = 
            (PMI_keyval_t**) MPIU_Malloc(count * sizeof(PMI_keyval_t*));
        if (!info_keyval_sizes || !info_keyval_vectors) { 
            MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
        }

        if (!info_ptrs) {
            for (i=0; i<count; i++) {
                info_keyval_vectors[i] = 0;
                info_keyval_sizes[i]   = 0;
            }
        }
        else {
            for (i=0; i<count; i++) {
                mpi_errno = mpi_to_pmi_keyvals( info_ptrs[i], 
                                                &info_keyval_vectors[i],
                                                &info_keyval_sizes[i] );
                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
            }
        }

        preput_keyval_vector.key = PARENT_PORT_KVSKEY;
        preput_keyval_vector.val = port_name;


        MPIU_THREAD_CS_ENTER(PMI,);
        pmi_errno = PMI_Spawn_multiple(count, (const char **)
                                       commands, 
                                       (const char ***) argvs,
                                       maxprocs, info_keyval_sizes,
                                       (const PMI_keyval_t **)
                                       info_keyval_vectors, 1, 
                                       &preput_keyval_vector,
                                       pmi_errcodes);
	MPIU_THREAD_CS_EXIT(PMI,);
        if (pmi_errno != PMI_SUCCESS) {
	    MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER,
		 "**pmi_spawn_multiple", "**pmi_spawn_multiple %d", pmi_errno);
        }
#endif

	if (errcodes != MPI_ERRCODES_IGNORE) {
	    for (i=0; i<total_num_processes; i++) {
		/* FIXME: translate the pmi error codes here */
		errcodes[i] = pmi_errcodes[i];
                /* We want to accept if any of the spawns succeeded.
                   Alternatively, this is the same as we want to NOT accept if
                   all of them failed.  should_accept = NAND(e_0, ..., e_n)
                   Remember, success equals false (0). */
                should_accept = should_accept && errcodes[i];
	    }
            should_accept = !should_accept; /* the `N' in NAND */
	}
    }
Example #21
0
int MPID_Cancel_send(MPID_Request * sreq)
{
    MPIDI_VC_t * vc;
    int proto;
    int flag;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_SEND);

    MPIU_Assert(sreq->kind == MPID_REQUEST_SEND);

    MPIDI_Request_cancel_pending(sreq, &flag);
    if (flag)
    {
        goto fn_exit;
    }

    /*
     * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer
     * and no pointer to the underlying communication
     * request.  For now, we simply fail to cancel the request.  In the future,
     * we should add a new request kind to indicate that
     * the request is a BSEND.  Then we can properly cancel the request, much
     * in the way we do persistent requests.
     */
    if (sreq->comm == NULL)
    {
        goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc);

    proto = MPIDI_Request_get_msg_type(sreq);

    if (proto == MPIDI_REQUEST_SELF_MSG)
    {
        MPID_Request * rreq;

        MPIU_DBG_MSG(CH3_OTHER,VERBOSE,
                     "attempting to cancel message sent to self");

        MPIU_THREAD_CS_ENTER(MSGQUEUE,);
        rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match);
        MPIU_THREAD_CS_EXIT(MSGQUEUE,);
        if (rreq)
        {
            MPIU_Assert(rreq->partner_request == sreq);

            MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                                                "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x",
                                                sreq->handle, rreq->handle));

            MPIU_Object_set_ref(rreq, 0);
            MPIDI_CH3_Request_destroy(rreq);

            sreq->status.cancelled = TRUE;
            /* no other thread should be waiting on sreq, so it is safe to
               reset ref_count and cc */
            MPID_cc_set(&sreq->cc, 0);
            /* FIXME should be a decr and assert, not a set */
            MPIU_Object_set_ref(sreq, 1);
        }
        else
        {
            sreq->status.cancelled = FALSE;
            MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                                                "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x",
                                                sreq->handle, rreq->handle));
        }

        goto fn_exit;
    }
Example #22
0
int MPID_nem_ib_init (MPIDI_PG_t *pg_p,
        int pg_rank,
        char **bc_val_p,
        int *val_max_sz_p)
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPID_IB_INIT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_IB_INIT);

    /* Make sure that our private fields in vc fit into the area provided. */
    MPIU_Assert(sizeof(MPID_nem_ib_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN);


    /* Allocate and initialize conn mgmt related info  */
    mpi_errno = MPID_nem_ib_init_process_info(pg_rank, pg_p);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to init process info");
    }


    mpi_errno = MPID_nem_ib_init_connection(pg_rank, pg_p->size);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to init conn info");
    }

    mpi_errno = MPID_nem_ib_get_control_params();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to get control params");
    }

    /* Open and init all HCA's for communication */
    mpi_errno = MPID_nem_ib_init_hca();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to init HCA");
    }

    mpi_errno = MPID_nem_ib_get_control_params_after_hcainit();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to get control params after hca_init");
    }

    /* Set default parameters. */
    mpi_errno = MPID_nem_ib_set_default_params();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to set def params");
    }

    /* Get user defined parameters. */
    mpi_errno = MPID_nem_ib_get_user_params();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to get user params");
    }

    /* init process_init_info for communication info exchange */
    mpi_errno = MPID_nem_ib_alloc_process_init_info();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to init process_init_info");
    }

    mpi_errno = MPID_nem_ib_setup_startup_ring(pg_p, pg_rank);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to setup startup ring");
    }

    if (process_info.has_srq) {
        mpi_errno = init_vbuf_lock();
        if(mpi_errno) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to init vbuf lock");
        }
    }

    mpi_errno = MPID_nem_ib_open_ports();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to init HCA");
    }

    /* Setup QP's and other things for communication */
    mpi_errno = MPID_nem_ib_setup_conn(pg_p);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                   "**fail %s", "Failed to setup conn");
    }

    /* Exchange conn info between all processes */
    mpi_errno = MPID_nem_ib_exchange_conn(pg_p, pg_rank);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                   "**fail %s", "Failed to exchange conn info");

    }

#if !defined(DISABLE_PTMALLOC)
    if (!mvapich2_minit()) {
        process_info.has_lazy_mem_unregister = 1;

        /* Initialize the registration cache */
        mpi_errno = dreg_init();
        if (mpi_errno != MPI_SUCCESS) {
            MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                   "**fail %s", "Failed to initialize registration cache");
        }
    }
#else /* !defined(DISABLE_PTMALLOC) */
    mallopt(M_TRIM_THRESHOLD, -1);
    mallopt(M_MMAP_MAX, 0);
    process_info.has_lazy_mem_unregister = 0;
#endif /* !defined(DISABLE_PTMALLOC) */


    /* Allocate RDMA Buffers */
    mpi_errno = MPID_nem_ib_allocate_memory(
            pg_rank,
            pg_p->size);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                   "**fail %s", "Failed to allocate memory");
    }

    if(process_info.has_srq) {
        mpi_errno = MPID_nem_ib_allocate_srq();
        if (mpi_errno != MPI_SUCCESS) {
            MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                       "**fail %s", "Failed to allocate memory for srq");

        }
    }

    /* establish conn info between all processes */
    mpi_errno = MPID_nem_ib_establish_conn();
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                   "**fail %s", "Failed to establish conn");
    }

    /* init channel manager */
    /* Defined in ib_channel_manager.c, need to declare in ib_channel_manager.h
     *
     */
    mpi_errno = MPIDI_nem_ib_init_cmanager(pg_rank, pg_p->size);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                   "**fail %s", "Failed to init cmanager");
    }

    /* Free conn mgmt related info */
    mpi_errno = MPID_nem_ib_free_conn_info(pg_p->size);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**fail",
                                    "**fail %s", "Failed to init conn info");
    }


fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_IB_INIT);
    return mpi_errno;

fn_fail:
    goto fn_exit;
}
Example #23
0
/* XXX DJG FIXME at some point this should poll, much like the newtcp module.
   But then we have that whole pollfd array to manage, which we don't really
   need until this proof-of-concept proves itself. */
int MPID_nem_lmt_vmsplice_progress(void)
{
    int mpi_errno = MPI_SUCCESS;
    struct lmt_vmsplice_node *prev = NULL;
    struct lmt_vmsplice_node *free_me = NULL;
    struct lmt_vmsplice_node *cur = outstanding_head;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
    
    while (cur) {
        int complete = 0;

        switch (MPIDI_Request_get_type(cur->req)) {
            case MPIDI_REQUEST_TYPE_RECV:
                mpi_errno = do_readv(cur->req, cur->pipe_fd, cur->req->dev.iov,
                                     &cur->req->dev.iov_offset,
                                     &cur->req->dev.iov_count, &complete);
                /* FIXME: set the error status of the req and complete it, rather than POP */
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                break;
            case MPIDI_REQUEST_TYPE_SEND:
                mpi_errno = do_vmsplice(cur->req, cur->pipe_fd, cur->req->dev.iov,
                                        &cur->req->dev.iov_offset,
                                        &cur->req->dev.iov_count, &complete);
                /* FIXME: set the error status of the req and complete it, rather than POP */
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                break;
            default:
                MPIR_ERR_INTERNALANDJUMP(mpi_errno, "unexpected request type");
                break;
        }

        if (complete) {
            MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete");

            /* remove the node from the list */
            if (cur == outstanding_head) {
                outstanding_head = cur->next;
                prev = NULL;
                free_me = cur;
                cur = cur->next;
            }
            else {
                prev->next = cur->next;
                prev = cur;
                free_me = cur;
                cur = cur->next;
            }
            if (free_me) MPL_free(free_me);
            --MPID_nem_local_lmt_pending;
        }

        if (!cur) break; /* we might have made cur NULL above */

        prev = cur;
        cur = cur->next;
    }

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_PROGRESS);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Example #24
0
/* MPIDI_CH3I_SendNoncontig - Sends a message by packing
   directly into cells.  The caller must initialize sreq->dev.segment
   as well as segment_first and segment_size. */
int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz )
{
    int mpi_errno = MPI_SUCCESS;
    int again = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)header);

    MPIU_THREAD_CS_ENTER(MPIDCOMM,);

    if (!MPIDI_CH3I_Sendq_empty(MPIDI_CH3I_shm_sendq)) /* MT */
    {
        /* send queue is not empty, enqueue the request then check to
           see if we can send any now */

        MPIDI_DBG_PRINTF((55, FCNAME, "enqueuing"));

	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
        sreq->ch.noncontig    = TRUE;
        sreq->ch.header_sz    = hdr_sz;
	sreq->ch.vc           = vc;

        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        mpi_errno = MPIDI_CH3I_Shm_send_progress();
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        goto fn_exit;
    }

    /* send as many cells of data as you can */
    MPID_nem_mpich_send_seg_header(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, header, hdr_sz, vc, &again);
    while(!again && sreq->dev.segment_first < sreq->dev.segment_size)
        MPID_nem_mpich_send_seg(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, vc, &again);

    if (again)
    {
        /* we didn't finish sending everything */
        sreq->ch.noncontig = TRUE;
        sreq->ch.vc = vc;
        if (sreq->dev.segment_first == 0) /* nothing was sent, save header */
        {
            sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
            sreq->ch.header_sz    = hdr_sz;
        }
        else
        {
            /* part of message was sent, make this req an active send */
            MPIU_Assert(MPIDI_CH3I_shm_active_send == NULL);
            MPIDI_CH3I_shm_active_send = sreq;
        }
        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        goto fn_exit;
    }

    /* finished sending all data, complete the request */
    if (!sreq->dev.OnDataAvail)
    {
        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
        MPIDI_CH3U_Request_complete(sreq);
        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }
    else
    {
        int complete = 0;
        mpi_errno = sreq->dev.OnDataAvail(vc, sreq, &complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPIU_Assert(complete); /* all data has been sent, we should always complete */

        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }

 fn_exit:
    MPIU_THREAD_CS_EXIT(MPIDCOMM,);
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #25
0
/**
 * Initialize Process Manager Interface and update global_info.
 * Called by MPID_nem_ib_init.
 *
 *   -# Initialize the Process Manager Interface;
 *   -# Set the rank;
 *   -# Set the progexx group size;
 *
 * \see MPID_nem_ib_init
 *
    \todo Need to add more stuff here
          Look at InitPG in mvapich2/trunk/src/mpid/ch3/src/mpid_init.c
 */
int MPID_nem_ib_pmi_init()
{
    int pmi_errno   = 0;
    int mpi_errno = MPI_SUCCESS;
    int spawned;
    /* Process group id size*/
    int pg_id_sz,  pg_size;
    char *pg_id;
    MPIDI_PG_t *pg = 0;



    assert( global_info!= NULL );

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PMI_INIT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PMI_INIT);

    /* Initialize the Process Manager Interface */
    pmi_errno = PMI_Init(&spawned);
    if (pmi_errno != PMI_SUCCESS) {
        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_init",
                 "**pmi_init %d", pmi_errno);
    }

    /* Set the rank */
    pmi_errno = PMI_Get_rank(&global_info->pg_rank);
    if (pmi_errno != PMI_SUCCESS) {
        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_get_rank",
                 "**pmi_get_rank %d", pmi_errno);
    }

    /* Set the progexx group size */
    pmi_errno = PMI_Get_size(&global_info->pg_size);
    if (pmi_errno != 0) {
        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**pmi_get_size",
                 "**pmi_get_size %d", pmi_errno);
    }

    /* -------------------------------------- From InitPG in mvapich2/trunk/src/mpid/ch3/src/mpid_init.c
	pmi_errno = PMI_Get_appnum(&appnum);
	if (pmi_errno != PMI_SUCCESS) {
	    MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_get_appnum",
				 "**pmi_get_appnum %d", pmi_errno);
	}

    / * Note that if pmi is not availble, the value of MPI_APPNUM is
       not set * /
    if (appnum != -1) {
        MPIR_Process.attrs.appnum = appnum;
    }
	*/

    /* Now, initialize the process group information with PMI calls */
    /*
     * Get the process group id
     */
    pmi_errno = PMI_KVS_Get_name_length_max(&pg_id_sz);
    if (pmi_errno != PMI_SUCCESS) {
        /*
         * I don't believe that MPICH2 has updated the error message for this
         * yet.
         */
        MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,
                 "**pmi_get_id_length_max",
                 "**pmi_get_id_length_max %d", pmi_errno);
    }

    /* This memory will be freed by the PG_Destroy if there is an error */
    pg_id = MPIU_Malloc(pg_id_sz + 1);
    if (pg_id == NULL) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**nomem");
    }

    /* Note in the singleton init case, the pg_id is a dummy.
       We'll want to replace this value if we join a
       Process manager */
    pmi_errno = PMI_KVS_Get_my_name(pg_id, pg_id_sz);
    if (pmi_errno != PMI_SUCCESS) {
        /*
         * I don't believe the MPICH2 team has updated the error message for
         * this change yet.
         */
        MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_get_id",
                 "**pmi_get_id %d", pmi_errno);
    }

    /*
     * Create a new structure to track the process group for our MPI_COMM_WORLD
     */
    mpi_errno = MPIDI_PG_Create(pg_size, pg_id, &pg);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|pg_create");
    }

    MPIDI_PG_InitConnKVS( pg );


fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PMI_INIT);
    return mpi_errno;

fn_fail:
	if (pg) {
		MPIDI_PG_Destroy( pg );
	}
	goto fn_exit;
}
Example #26
0
/*@ 
   MPIDI_PG_Finalize - Finalize the process groups, including freeing all
   process group structures
  @*/
int MPIDI_PG_Finalize(void)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_PG_t *pg, *pgNext;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_FINALIZE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_FINALIZE);

    /* Print the state of the process groups */
    if (verbose) {
	MPIU_PG_Printall( stdout );
    }

    /* FIXME - straighten out the use of PMI_Finalize - no use after 
       PG_Finalize */
    if (pg_world->connData) {
#ifdef USE_PMI2_API
        mpi_errno = PMI2_Finalize();
        if (mpi_errno) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|pmi_finalize");
#else
	int rc;
	rc = PMI_Finalize();
	if (rc) {
	    MPIR_ERR_SET1(mpi_errno,MPI_ERR_OTHER, 
			  "**ch3|pmi_finalize", 
			  "**ch3|pmi_finalize %d", rc);
	}
#endif
    }

    /* Free the storage associated with the process groups */
    pg = MPIDI_PG_list;
    while (pg) {
	pgNext = pg->next;
	
	/* In finalize, we free all process group information, even if
	   the ref count is not zero.  This can happen if the user
	   fails to use MPI_Comm_disconnect on communicators that
	   were created with the dynamic process routines.*/
        /* XXX DJG FIXME-MT should we be checking this? */
	if (MPIU_Object_get_ref(pg) == 0 || 1) {
	    if (pg == MPIDI_Process.my_pg)
		MPIDI_Process.my_pg = NULL;

	    MPIU_Object_set_ref(pg, 0); /* satisfy assertions in PG_Destroy */
	    MPIDI_PG_Destroy( pg );
	}
	pg     = pgNext;
    }

    /* If COMM_WORLD is still around (it normally should be), 
       try to free it here.  The reason that we need to free it at this 
       point is that comm_world (and comm_self) still exist, and 
       hence the usual process to free the related VC structures will
       not be invoked. */
    if (MPIDI_Process.my_pg) {
	MPIDI_PG_Destroy(MPIDI_Process.my_pg);
    } 
    MPIDI_Process.my_pg = NULL;

    /* ifdefing out this check because the list will not be NULL in 
       Ch3_finalize because
       one additional reference is retained in MPIDI_Process.my_pg. 
       That reference is released
       only after ch3_finalize returns. If I release it before ch3_finalize, 
       the ssm channel crashes. */
#if 0
    if (MPIDI_PG_list != NULL)
    { 
	
	/* --BEGIN ERROR HANDLING-- */
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_INTERN,
        "**dev|pg_finalize|list_not_empty", NULL); 
	/* --END ERROR HANDLING-- */
    }
#endif

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_FINALIZE);
    return mpi_errno;
}
Example #27
0
int MPIDI_PG_SetConnInfo( int rank, const char *connString )
{
#ifdef USE_PMI2_API
    int mpi_errno = MPI_SUCCESS;
    int len;
    char key[PMI2_MAX_KEYLEN];
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo);

    len = MPL_snprintf(key, sizeof(key), "P%d-businesscard", rank);
    MPIR_ERR_CHKANDJUMP1(len < 0 || len > sizeof(key), mpi_errno, MPI_ERR_OTHER, "**snprintf", "**snprintf %d", len);

    mpi_errno = PMI2_KVS_Put(key, connString);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = PMI2_KVS_Fence();
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
#else
    int mpi_errno = MPI_SUCCESS;
    int pmi_errno;
    int len;
    char key[128];
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo);

    MPIU_Assert(pg_world->connData);
    
    len = MPL_snprintf(key, sizeof(key), "P%d-businesscard", rank);
    if (len < 0 || len > sizeof(key)) {
	MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**snprintf",
			     "**snprintf %d", len);
    }
    pmi_errno = PMI_KVS_Put(pg_world->connData, key, connString );
    if (pmi_errno != PMI_SUCCESS) {
	MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_kvs_put",
			     "**pmi_kvs_put %d", pmi_errno);
    }
    pmi_errno = PMI_KVS_Commit(pg_world->connData);
    if (pmi_errno != PMI_SUCCESS) {
	MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_kvs_commit",
			     "**pmi_kvs_commit %d", pmi_errno);
    }
    
    pmi_errno = PMI_Barrier();
    if (pmi_errno != PMI_SUCCESS) {
	MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_barrier",
			     "**pmi_barrier %d", pmi_errno);
    }
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
#endif
}
Example #28
0
int MPIDI_PG_Destroy(MPIDI_PG_t * pg)
{
    MPIDI_PG_t * pg_prev;
    MPIDI_PG_t * pg_cur;
    int i;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_DESTROY);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_DESTROY);

    MPIU_Assert(MPIU_Object_get_ref(pg) == 0);

    pg_prev = NULL;
    pg_cur = MPIDI_PG_list;
    while(pg_cur != NULL)
    {
	if (pg_cur == pg)
	{
	    if (MPIDI_PG_iterator_next == pg)
	    { 
		MPIDI_PG_iterator_next = MPIDI_PG_iterator_next->next;
	    }

            if (pg_prev == NULL)
                MPIDI_PG_list = pg->next; 
            else
                pg_prev->next = pg->next;

            MPIU_DBG_MSG_FMT(CH3_DISCONNECT, VERBOSE, (MPIU_DBG_FDEST, "destroying pg=%p pg->id=%s", pg, (char *)pg->id));

            for (i = 0; i < pg->size; ++i) {
                /* FIXME it would be good if we could make this assertion.
                   Unfortunately, either:
                   1) We're not being disciplined and some caller of this
                      function doesn't bother to manage all the refcounts
                      because he thinks he knows better.  Annoying, but not
                      strictly a bug.
		      (wdg - actually, that is a bug - managing the ref
		      counts IS required and missing one is a bug.)
                   2) There is a real bug lurking out there somewhere and we
                      just haven't hit it in the tests yet.  */
                /*MPIU_Assert(MPIU_Object_get_ref(pg->vct[i]) == 0);*/

                MPIU_DBG_MSG_FMT(CH3_DISCONNECT, VERBOSE, (MPIU_DBG_FDEST, "about to free pg->vct=%p which contains vc=%p", pg->vct, &pg->vct[i]));

                /* This used to be handled in MPIDI_VCRT_Release, but that was
                   not the right place to do this.  The VC should only be freed
                   when the PG that it belongs to is freed, not just when the
                   VC's refcount drops to zero. [goodell@ 2008-06-13] */
		/* In that case, the fact that the VC is in the PG should
		   increment the ref count - reflecting the fact that the
		   use in the PG constitutes a reference-count-incrementing
		   use.  Alternately, if the PG is able to recreate a VC, 
		   and can thus free unused (or idle) VCs, it should be allowed
		   to do so.  [wdg 2008-08-31] */
                mpi_errno = MPIDI_CH3_VC_Destroy(&(pg->vct[i]));
                if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
            }

	    MPIDI_PG_Destroy_fn(pg);
	    MPIU_Free(pg->vct);
	    if (pg->connData) {
		if (pg->freeConnInfo) {
		    (*pg->freeConnInfo)( pg );
		}
		else {
		    MPIU_Free(pg->connData);
		}
	    }
	    mpi_errno = MPIDI_CH3_PG_Destroy(pg);
	    MPIU_Free(pg);

	    goto fn_exit;
	}

	pg_prev = pg_cur;
	pg_cur = pg_cur->next;
    }

    /* PG not found if we got here */
    MPIR_ERR_SET1(mpi_errno,MPI_ERR_OTHER,
		  "**dev|pg_not_found", "**dev|pg_not_found %p", pg);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_DESTROY);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #29
0
int MPIDI_PG_Create(int vct_sz, void * pg_id, MPIDI_PG_t ** pg_ptr)
{
    MPIDI_PG_t * pg = NULL, *pgnext;
    int p;
    int mpi_errno = MPI_SUCCESS;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_PG_CREATE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_PG_CREATE);
    
    MPIU_CHKPMEM_MALLOC(pg,MPIDI_PG_t*,sizeof(MPIDI_PG_t),mpi_errno,"pg");
    MPIU_CHKPMEM_MALLOC(pg->vct,MPIDI_VC_t *,sizeof(MPIDI_VC_t)*vct_sz,
			mpi_errno,"pg->vct");

    if (verbose) {
	fprintf( stdout, "Creating a process group of size %d\n", vct_sz );
	fflush(stdout);
    }

    pg->handle = 0;
    /* The reference count indicates the number of vc's that are or 
       have been in use and not disconnected. It starts at zero,
       except for MPI_COMM_WORLD. */
    MPIU_Object_set_ref(pg, 0);
    pg->size = vct_sz;
    pg->id   = pg_id;
    pg->finalize = 0;
    /* Initialize the connection information to null.  Use
       the appropriate MPIDI_PG_InitConnXXX routine to set up these 
       fields */
    pg->connData           = 0;
    pg->getConnInfo        = 0;
    pg->connInfoToString   = 0;
    pg->connInfoFromString = 0;
    pg->freeConnInfo       = 0;

    for (p = 0; p < vct_sz; p++)
    {
	/* Initialize device fields in the VC object */
	MPIDI_VC_Init(&pg->vct[p], pg, p);
    }

    /* We may first need to initialize the channel before calling the channel 
       VC init functions.  This routine may be a no-op; look in the 
       ch3_init.c file in each channel */
    MPIDI_CH3_PG_Init(pg);

    /* These are now done in MPIDI_VC_Init */
#if 0
    for (p = 0; p < vct_sz; p++)
    {
	/* Initialize the channel fields in the VC object */
	MPIDI_CH3_VC_Init( &pg->vct[p] );
    }
#endif

    /* The first process group is always the world group */
    if (!pg_world) { pg_world = pg; }

    /* Add pg's at the tail so that comm world is always the first pg */
    pg->next = 0;
    if (!MPIDI_PG_list)
    {
	MPIDI_PG_list = pg;
    }
    else
    {
	pgnext = MPIDI_PG_list; 
	while (pgnext->next)
	{
	    pgnext = pgnext->next;
	}
	pgnext->next = pg;
    }
    *pg_ptr = pg;
    
  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_PG_CREATE);
    return mpi_errno;
    
  fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}
Example #30
0
int MPIDI_CH3U_Request_unpack_uebuf(MPID_Request * rreq)
{
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPIDI_msg_sz_t userbuf_sz;
    MPID_Datatype * dt_ptr;
    MPIDI_msg_sz_t unpack_sz;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);

    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
    
    if (rreq->dev.recv_data_sz <= userbuf_sz)
    {
	unpack_sz = rreq->dev.recv_data_sz;
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
      "receive buffer overflow; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT 
	      ", buf_sz=" MPIDI_MSG_SZ_FMT, 
                rreq->dev.recv_data_sz, userbuf_sz));
	unpack_sz = userbuf_sz;
	MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz);
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		 MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
		 "**truncate", "**truncate %d %d", 
                 rreq->dev.recv_data_sz, userbuf_sz);
	/* --END ERROR HANDLING-- */
    }

    if (unpack_sz > 0)
    {
	if (dt_contig)
	{
	    /* TODO - check that amount of data is consistent with datatype.  
	       In other words, if we were to use Segment_unpack()
	       would last = unpack?  If not we should return an error 
	       (unless configured with --enable-fast) */
	    MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
	    MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, rreq->dev.tmpbuf,
		   unpack_sz);
	    MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
	}
	else
	{
	    MPID_Segment seg;
	    MPI_Aint last;

	    MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, 
			      rreq->dev.datatype, &seg, 0);
	    last = unpack_sz;
	    MPID_Segment_unpack(&seg, 0, &last, rreq->dev.tmpbuf);
	    if (last != unpack_sz)
	    {
		/* --BEGIN ERROR HANDLING-- */
		/* received data was not entirely consumed by unpack() 
		   because too few bytes remained to fill the next basic
		   datatype */
		MPIR_STATUS_SET_COUNT(rreq->status, last);
		rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
                         MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
			 "**dtypemismatch", 0);
		/* --END ERROR HANDLING-- */
	    }
	}
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
    return mpi_errno;
}