Example #1
0
void
MPID_nem_newmad_handle_sreq(MPID_Request *req)
{
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
#ifdef DEBUG
    fprintf(stdout,"========> Completing Send req  %p \n",req);
#endif
    reqFn = req->dev.OnDataAvail;
    if (!reqFn){
	MPIDI_CH3U_Request_complete(req);
	MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
    }
    else{
	MPIDI_VC_t *vc = req->ch.vc;
	int complete   = 0;
	reqFn(vc, req, &complete);
	if(!complete)
        {   
	   MPIU_Assert(complete == TRUE);
	}
    }
    if (REQ_FIELD(req,iov) != NULL)
	MPIU_Free((REQ_FIELD(req,iov)));
    mpid_nem_newmad_pending_send_req--;
}
static int do_readv(MPID_Request *rreq, int pipe_fd, MPID_IOV iov[],
                    int *iov_offset, int *iov_count, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    ssize_t nread;

    nread = readv(pipe_fd, &rreq->dev.iov[rreq->dev.iov_offset], rreq->dev.iov_count);
    MPIU_ERR_CHKANDJUMP2(nread < 0 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read",
                         "**readv %d %s", errno, MPIU_Strerror(errno));

    if (nread < 0) {
        if (errno == EAGAIN) goto fn_exit;
        MPIU_ERR_CHKANDJUMP2(errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**vmsplice",
                             "**vmsplice %d %s", errno, MPIU_Strerror(errno));
    }

    *complete = adjust_partially_xferred_iov(iov, iov_offset, iov_count, nread);
    if (*complete) {
        /* look for additional data to send and reload IOV if there is more */
        mpi_errno = check_req_complete(rreq->ch.vc, rreq, complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        if (*complete) {
            nread = close(pipe_fd);
            MPIU_ERR_CHKANDJUMP(nread < 0, mpi_errno, MPI_ERR_OTHER, "**close");
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
        }
    }

fn_fail:
fn_exit:
    return mpi_errno;
}
Example #3
0
int MPIR_Thread_CS_Finalize( void )
{
    MPIU_DBG_MSG(THREAD,TYPICAL,"Freeing global mutex and private storage");
#if MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_GLOBAL
/* There is a single, global lock, held for the duration of an MPI call */
    MPID_Thread_mutex_destroy(&MPIR_ThreadInfo.global_mutex, NULL);

#elif MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_PER_OBJECT
    /* MPIU_THREAD_GRANULARITY_PER_OBJECT: There are multiple locks,
     * one for each logical class (e.g., each type of object) */
    MPID_Thread_mutex_destroy(&MPIR_ThreadInfo.global_mutex, NULL);
    MPID_Thread_mutex_destroy(&MPIR_ThreadInfo.handle_mutex, NULL);
    MPID_Thread_mutex_destroy(&MPIR_ThreadInfo.msgq_mutex, NULL);
    MPID_Thread_mutex_destroy(&MPIR_ThreadInfo.completion_mutex, NULL);
    MPID_Thread_mutex_destroy(&MPIR_ThreadInfo.ctx_mutex, NULL);
    MPID_Thread_mutex_destroy(&MPIR_ThreadInfo.pmi_mutex, NULL);


#elif MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_LOCK_FREE
/* Updates to shared data and access to shared services is handled without 
   locks where ever possible. */
#error lock-free not yet implemented

#elif MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_SINGLE
/* No thread support, make all operations a no-op */

#else
#error Unrecognized thread granularity
#endif

    MPIU_THREADPRIV_FINALIZE;

    return MPI_SUCCESS;
}
static int do_vmsplice(MPID_Request *sreq, int pipe_fd, MPID_IOV iov[],
                       int *iov_offset, int *iov_count, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    ssize_t err;

#if 1
    err = vmsplice(pipe_fd, &iov[*iov_offset], *iov_count, SPLICE_F_NONBLOCK);
#else
    err = writev(pipe_fd, &iov[*iov_offset], *iov_count);
#endif

    if (err < 0) {
        if (errno == EAGAIN) goto fn_exit;
        MPIU_ERR_CHKANDJUMP2(errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**vmsplice",
                             "**vmsplice %d %s", errno, MPIU_Strerror(errno));
    }

    *complete = adjust_partially_xferred_iov(iov, iov_offset, iov_count, err);
    if (*complete) {
        /* look for additional data to send and reload IOV if there is more */
        mpi_errno = check_req_complete(sreq->ch.vc, sreq, complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        if (*complete) {
            err = close(pipe_fd);
            MPIU_ERR_CHKANDJUMP(err < 0, mpi_errno, MPI_ERR_OTHER, "**close");
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
        }
    }

fn_fail:
fn_exit:
    return mpi_errno;
}
Example #5
0
int MPIDI_CH3_RecvRndv( MPIDI_VC_t * vc, MPID_Request *rreq )
{
    int mpi_errno = MPI_SUCCESS;

    /* A rendezvous request-to-send (RTS) message has arrived.  We need
       to send a CTS message to the remote process. */
    MPID_Request * cts_req;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,
		 "rndv RTS in the request, sending rndv CTS");
    
    MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND);
    cts_pkt->sender_req_id = rreq->dev.sender_req_id;
    cts_pkt->receiver_req_id = rreq->handle;
    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, cts_pkt, 
					      sizeof(*cts_pkt), &cts_req));
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt");
    }
    if (cts_req != NULL)
    {
	/* FIXME: Ideally we could specify that a req not be returned.  
	   This would avoid our having to decrement the
	   reference count on a req we don't want/need. */
	MPID_Request_release(cts_req);
    }

 fn_fail:    
    return mpi_errno;
}
Example #6
0
int MPIDI_CH3U_Post_data_receive_unexpected(MPID_Request * rreq)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);

    /* FIXME: to improve performance, allocate temporary buffer from a 
       specialized buffer pool. */
    /* FIXME: to avoid memory exhaustion, integrate buffer pool management
       with flow control */
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated");
    
    rreq->dev.tmpbuf = MPIU_Malloc(rreq->dev.recv_data_sz);
    if (!rreq->dev.tmpbuf) {
	MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %d",
			     rreq->dev.recv_data_sz);
    }
    rreq->dev.tmpbuf_sz = rreq->dev.recv_data_sz;
    
    rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rreq->dev.tmpbuf;
    rreq->dev.iov[0].MPID_IOV_LEN = rreq->dev.recv_data_sz;
    rreq->dev.iov_count = 1;
    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackUEBufComplete;
    rreq->dev.recv_pending_count = 2;

 fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_UNEXPECTED);
    return mpi_errno;
}
Example #7
0
void MPIDI_CH3U_Buffer_allocate(
    const void * const sbuf, MPI_Aint scount, MPI_Datatype sdt, int * smpi_errno,
    void ** rbuf_handle, MPI_Aint rcount, MPI_Datatype rdt, MPIDI_msg_sz_t * rsz,
    int * rmpi_errno)
{
    int sdt_contig;
    int rdt_contig;
    MPI_Aint sdt_true_lb, rdt_true_lb;
    MPIDI_msg_sz_t sdata_sz;
    MPIDI_msg_sz_t rdata_sz;
    MPID_Datatype * sdt_ptr;
    MPID_Datatype * rdt_ptr;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_BUFFER_ALLOCATE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_BUFFER_ALLOCATE);
    *smpi_errno = MPI_SUCCESS;
    *rmpi_errno = MPI_SUCCESS;

    MPIDI_Datatype_get_info(scount, sdt, sdt_contig, sdata_sz, sdt_ptr, sdt_true_lb);
    MPIDI_Datatype_get_info(rcount, rdt, rdt_contig, rdata_sz, rdt_ptr, rdt_true_lb);

    /* --BEGIN ERROR HANDLING-- */
    if (sdata_sz > rdata_sz)
    {
	MPIU_DBG_MSG_FMT(CH3_OTHER,TYPICAL,(MPIU_DBG_FDEST,
	    "message truncated, sdata_sz=" MPIDI_MSG_SZ_FMT " rdata_sz=" MPIDI_MSG_SZ_FMT,
			  sdata_sz, rdata_sz));
	sdata_sz = rdata_sz;
	*rmpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz );
    }
    /* --END ERROR HANDLING-- */

    if (sdata_sz == 0)
    {
	*rsz = 0;
	goto fn_exit;
    }

    if (sdt_contig && rdt_contig)
    {
	*rbuf_handle = (void *)MPIU_Malloc(sdata_sz);
        MPIU_Assert(*rbuf_handle);
	*rsz = sdata_sz;
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */

        MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"Sender and receiver datatypes are not contiguous");
        *smpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**zcopybufalloc", "**zcopybufalloc %d %d", scount, rcount);
        *rmpi_errno = *smpi_errno;
        *rsz = 0;
        goto fn_exit;

	/* --END ERROR HANDLING-- */
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_BUFFER_ALLOCATE);
}
Example #8
0
int MPID_nem_mxm_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz,
                             void *data, MPIDI_msg_sz_t data_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iSendContig");
    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);

    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));

    _dbg_mxm_output(5,
                    "iSendContig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
                    vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
                    data_sz);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area->ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 0;

    req_area->iov_buf[req_area->iov_count].ptr = (void *) &(sreq->dev.pending_pkt);
    req_area->iov_buf[req_area->iov_count].length = sizeof(MPIDI_CH3_Pkt_t);
    (req_area->iov_count)++;

    if (sreq->dev.ext_hdr_sz != 0) {
        req_area->iov_buf[req_area->iov_count].ptr = (void *) (sreq->dev.ext_hdr_ptr);
        req_area->iov_buf[req_area->iov_count].length = sreq->dev.ext_hdr_sz;
        (req_area->iov_count)++;
    }

    if (data_sz) {
        req_area->iov_buf[req_area->iov_count].ptr = (void *) data;
        req_area->iov_buf[req_area->iov_count].length = data_sz;
        (req_area->iov_count)++;
    }

    vc_area->pending_sends += 1;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = FALSE;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM,
                           mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #9
0
int MPID_nem_mxm_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data,
                                 MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq = NULL;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iStartContigMsg");
    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);

    /* create a request */
    sreq = MPID_Request_create();
    MPIU_Assert(sreq != NULL);
    MPIU_Object_set_ref(sreq, 2);
    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
    sreq->kind = MPID_REQUEST_SEND;
    sreq->dev.OnDataAvail = NULL;
    sreq->dev.tmpbuf = NULL;

    _dbg_mxm_output(5,
                    "iStartContigMsg ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
                    vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
                    data_sz);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area->ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 1;
    req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt);
    req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t);
    if (data_sz) {
        req_area->iov_count = 2;
        req_area->iov_buf[1].ptr = (void *) data;
        req_area->iov_buf[1].length = data_sz;
    }

    vc_area->pending_sends += 1;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = FALSE;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM,
                           mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

  fn_exit:
    *sreq_ptr = sreq;
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #10
0
int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
                               MPIDI_msg_sz_t hdr_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t last;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "MPID_nem_mxm_iSendNoncontig");

    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));

    _dbg_mxm_output(5,
                    "SendNoncontig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
                    vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
                    sreq->dev.segment_size);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area->ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 1;
    req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt);
    req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t);

    MPIU_Assert(sreq->dev.segment_first == 0);
    last = sreq->dev.segment_size;
    if (last > 0) {
        sreq->dev.tmpbuf = MPIU_Malloc((size_t) sreq->dev.segment_size);
        MPIU_Assert(sreq->dev.tmpbuf);
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.tmpbuf);
        MPIU_Assert(last == sreq->dev.segment_size);

        req_area->iov_count = 2;
        req_area->iov_buf[1].ptr = sreq->dev.tmpbuf;
        req_area->iov_buf[1].length = last;
    }

    vc_area->pending_sends += 1;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = TRUE;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM,
                           mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
int MPID_nem_lmt_dma_done_send(MPIDI_VC_t *vc, MPID_Request *sreq)
{
    int mpi_errno = MPI_SUCCESS;
    int complete = 0;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);

    /* free cookie from RTS packet */
    MPIU_Free(sreq->ch.s_cookie);

    /* We shouldn't ever need to handle the more IOVs case here.  The DONE
       message should only be sent when all of the data is truly transferred.
       However in the interest of robustness, we'll start to handle it and
       assert if it looks like we were supposed to send more data for some
       reason. */
    reqFn = sreq->dev.OnDataAvail;
    if (!reqFn) {
        MPIDI_CH3U_Request_complete(sreq);
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
        goto fn_exit;
    }

    complete = 0;
    mpi_errno = reqFn(vc, sreq, &complete);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        
    if (complete) {
        /* request was completed by the OnDataAvail fn */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
        goto fn_exit;
    }
    else {
        /* There is more data to send. */
        MPIU_Assert(("should never be incomplete!", 0));
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
fn_exit:
    return MPI_SUCCESS;
fn_fail:
    goto fn_exit;
}
Example #12
0
int MPID_nem_ptl_pkt_cancel_send_resp_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
                                              MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq;
    MPIDI_nem_ptl_pkt_cancel_send_resp_t *resp_pkt = (MPIDI_nem_ptl_pkt_cancel_send_resp_t *)pkt;
    int i, ret;

    MPID_Request_get_ptr(resp_pkt->sender_req_id, sreq);

    if (resp_pkt->ack) {
        MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);

        /* remove/free any remaining get MEs and handles */
        for (i = 0; i < REQ_PTL(sreq)->num_gets; i++) {
            ret = PtlMEUnlink(REQ_PTL(sreq)->get_me_p[i]);
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeunlink", "**ptlmeunlink %s", MPID_nem_ptl_strerror(ret));
            mpi_errno = MPID_Request_complete(sreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
        }
        if (REQ_PTL(sreq)->get_me_p)
            MPIU_Free(REQ_PTL(sreq)->get_me_p);

        MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"message cancelled");
    } else {
        MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE);
        MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"unable to cancel message");
    }

    mpi_errno = MPID_Request_complete(sreq);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

     *rreqp = NULL;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #13
0
static int _mxm_irecv(MPID_nem_mxm_ep_t * ep, MPID_nem_mxm_req_area * req, int id, mxm_mq_h mxm_mq,
                      mxm_tag_t mxm_tag)
{
    int mpi_errno = MPI_SUCCESS;
    mxm_error_t ret = MXM_OK;
    mxm_recv_req_t *mxm_rreq;
    list_head_t *free_queue = NULL;

    MPIU_Assert(req);

    free_queue = (ep ? &ep->free_queue : &mxm_obj->free_queue);
    req->mxm_req = list_dequeue_mxm_req(free_queue);
    if (!req->mxm_req) {
        list_grow_mxm_req(free_queue);
        req->mxm_req = list_dequeue_mxm_req(free_queue);
        if (!req->mxm_req) {
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "empty free queue");
            mpi_errno = MPI_ERR_OTHER;
            goto fn_fail;
        }
    }
    mxm_rreq = &(req->mxm_req->item.recv);

    mxm_rreq->base.state = MXM_REQ_NEW;
    mxm_rreq->base.mq = mxm_mq;
    mxm_rreq->base.conn = (ep ? ep->mxm_conn : 0);
    mxm_rreq->base.completed_cb = _mxm_recv_completion_cb;
    mxm_rreq->base.context = req->ctx;

    mxm_rreq->tag = mxm_tag;
    mxm_rreq->tag_mask = _mxm_tag_mask(id);

    if (likely(req->iov_count == 1)) {
        mxm_rreq->base.data_type = MXM_REQ_DATA_BUFFER;
        mxm_rreq->base.data.buffer.ptr = req->iov_buf[0].ptr;
        mxm_rreq->base.data.buffer.length = req->iov_buf[0].length;
    }
    else {
        mxm_rreq->base.data_type = MXM_REQ_DATA_IOV;
        mxm_rreq->base.data.iov.vector = req->iov_buf;
        mxm_rreq->base.data.iov.count = req->iov_count;
    }

    ret = mxm_req_recv(mxm_rreq);
    if (MXM_OK != ret) {
        list_enqueue(free_queue, &req->mxm_req->queue);
        mpi_errno = MPI_ERR_OTHER;
        goto fn_fail;
    }

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #14
0
int MPIDI_CH3_PktHandler_RndvSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, 
				   MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPIDI_CH3_Pkt_rndv_send_t * rs_pkt = &pkt->rndv_send;
    int mpi_errno = MPI_SUCCESS;
    int complete;
    char *data_buf;
    MPIDI_msg_sz_t data_len;
    MPID_Request *req;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received rndv send (data) pkt");

    MPID_Request_get_ptr(rs_pkt->receiver_req_id, req);

    data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= req->dev.recv_data_sz)
                ? req->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t));
    data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
    
    if (req->dev.recv_data_sz == 0) {
        *buflen = sizeof(MPIDI_CH3_Pkt_t);
        mpi_errno = MPID_Request_complete(req);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
	*rreqp = NULL;
    }
    else {
        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len,
                                                  &complete);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv",
			     "**ch3|postrecv %s", "MPIDI_CH3_PKT_RNDV_SEND");
	}

        *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) 
        {
            mpi_errno = MPID_Request_complete(req);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
            *rreqp = NULL;
        }
        else
        {
            *rreqp = req;
        }
   }
	
 fn_fail:
    return mpi_errno;
}
Example #15
0
static int MPIR_Thread_CS_Init( void )
{
    int err;
    MPIU_THREADPRIV_DECL;

    MPIU_Assert(MPICH_MAX_LOCKS >= MPIU_Nest_NUM_MUTEXES);

    /* we create this at all granularities right now */
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.memalloc_mutex, &err);
    MPIU_Assert(err == 0);

    /* must come after memalloc_mutex creation */
    MPIU_THREADPRIV_INITKEY;
    MPIU_THREADPRIV_INIT;

#if MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_GLOBAL
/* There is a single, global lock, held for the duration of an MPI call */
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.global_mutex, &err);
    MPIU_Assert(err == 0);
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.handle_mutex, &err);
    MPIU_Assert(err == 0);

#elif MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_PER_OBJECT
    /* MPIU_THREAD_GRANULARITY_PER_OBJECT: Multiple locks */
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.global_mutex, &err);
    MPIU_Assert(err == 0);
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.handle_mutex, &err);
    MPIU_Assert(err == 0);
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.msgq_mutex, &err);
    MPIU_Assert(err == 0);
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.completion_mutex, &err);
    MPIU_Assert(err == 0);
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.ctx_mutex, &err);
    MPIU_Assert(err == 0);
    MPID_Thread_mutex_create(&MPIR_ThreadInfo.pmi_mutex, &err);
    MPIU_Assert(err == 0);

#elif MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_LOCK_FREE
/* Updates to shared data and access to shared services is handled without 
   locks where ever possible. */
#error lock-free not yet implemented

#elif MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_SINGLE
/* No thread support, make all operations a no-op */

#else
#error Unrecognized thread granularity
#endif
    MPIU_DBG_MSG(THREAD,TYPICAL,"Created global mutex and private storage");
    return MPI_SUCCESS;
}
Example #16
0
int MPIDI_CH3I_Shm_send_progress(void)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_IOV *iov;
    int n_iov;
    MPID_Request *sreq;
    int again = 0;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SHM_SEND_PROGRESS);

    sreq = MPIDI_CH3I_shm_active_send;
    MPIU_DBG_STMT(CH3_CHANNEL, VERBOSE, {if (sreq) MPIU_DBG_MSG (CH3_CHANNEL, VERBOSE, "Send: cont sreq");});
Example #17
0
void MPID_nem_mxm_get_adi_msg(mxm_conn_h conn, mxm_imm_t imm, void *data,
                              size_t length, size_t offset, int last)
{
    MPIDI_VC_t *vc = NULL;

    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "MPID_nem_mxm_get_adi_msg");

    vc = mxm_conn_ctx_get(conn);

    _dbg_mxm_output(5, "========> Getting ADI msg (from=%d data_size %d) \n", vc->pg_rank, length);
    _dbg_mxm_out_buf(data, (length > 16 ? 16 : length));

    MPID_nem_handle_pkt(vc, data, (MPIDI_msg_sz_t) (length));
}
Example #18
0
int MPIDI_CH3U_Receive_data_unexpected(MPID_Request * rreq, char *buf, MPIDI_msg_sz_t *buflen, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);

    /* FIXME: to improve performance, allocate temporary buffer from a 
       specialized buffer pool. */
    /* FIXME: to avoid memory exhaustion, integrate buffer pool management
       with flow control */
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated");
    
    rreq->dev.tmpbuf = MPIU_Malloc(rreq->dev.recv_data_sz);
    if (!rreq->dev.tmpbuf) {
	MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %d",
			     rreq->dev.recv_data_sz);
    }
    rreq->dev.tmpbuf_sz = rreq->dev.recv_data_sz;
    
    /* if all of the data has already been received, copy it
       now, otherwise build an iov and let the channel copy it */
    if (rreq->dev.recv_data_sz <= *buflen)
    {
        MPIU_Memcpy(rreq->dev.tmpbuf, buf, rreq->dev.recv_data_sz);
        *buflen = rreq->dev.recv_data_sz;
        rreq->dev.recv_pending_count = 1;
        *complete = TRUE;
    }
    else
    {
        rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *)rreq->dev.tmpbuf);
        rreq->dev.iov[0].MPID_IOV_LEN = rreq->dev.recv_data_sz;
        rreq->dev.iov_count = 1;
        rreq->dev.recv_pending_count = 2;
        *buflen = 0;
        *complete = FALSE;
    }

    if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG)
        MPIR_T_ADD(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz);

    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackUEBufComplete;

 fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED);
    return mpi_errno;
}
Example #19
0
/* Send a zero-sized message with eager synchronous.  This is a temporary
   routine, as we may want to replace this with a counterpart to the
   Eager Short message */
int MPIDI_CH3_EagerSyncZero(MPID_Request **sreq_p, int rank, int tag, 
			    MPID_Comm * comm, int context_offset )
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_eager_sync_send_t * const es_pkt = &upkt.eager_sync_send;
    MPIDI_VC_t * vc;
    MPID_Request *sreq = *sreq_p;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
    
    /* MT FIXME what are the two operations we are waiting for?  the send and
     * the sync response? */
    MPID_cc_set(&sreq->cc, 2);
    MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
    sreq->dev.OnDataAvail = 0;
    
    MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND);
    es_pkt->match.parts.rank = comm->rank;
    es_pkt->match.parts.tag = tag;
    es_pkt->match.parts.context_id = comm->context_id + context_offset;
    es_pkt->sender_req_id = sreq->handle;
    es_pkt->data_sz = 0;
    
    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(es_pkt, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);
    
    MPIU_DBG_MSGPKT(vc,tag,es_pkt->match.parts.context_id,rank,(MPIDI_msg_sz_t)0,"EagerSync0");
    MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
    mpi_errno = MPIDI_CH3_iSend(vc, sreq, es_pkt, sizeof(*es_pkt));
    MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS)
    {
	MPID_Request_release(sreq);
	*sreq_p = NULL;
        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
    }
    /* --END ERROR HANDLING-- */

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #20
0
int MPIDI_CH3_Connection_terminate(MPIDI_VC_t * vc)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3I_VC *vcch = &vc->ch;

    MPIU_DBG_CONNSTATECHANGE(vc,vcch->conn,CONN_STATE_CLOSING);
    vcch->conn->state = CONN_STATE_CLOSING;
    MPIU_DBG_MSG(CH3_DISCONNECT,TYPICAL,"Closing sock (Post_close)");
    mpi_errno = MPIDU_Sock_post_close(vcch->sock);
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_POP(mpi_errno);
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #21
0
static int _mxm_handle_sreq(MPID_Request * req)
{
    int complete = FALSE;
    int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    vc_area = VC_BASE(req->ch.vc);
    req_area = REQ_BASE(req);

    _dbg_mxm_out_buf(req_area->iov_buf[0].ptr,
                     (req_area->iov_buf[0].length >
                      16 ? 16 : req_area->iov_buf[0].length));

    vc_area->pending_sends -= 1;
    if (((req->dev.datatype_ptr != NULL) && (req->dev.tmpbuf != NULL))) {
        MPIU_Free(req->dev.tmpbuf);
    }

    if (req_area->iov_count > MXM_MPICH_MAX_IOV) {
        MPIU_Free(req_area->iov_buf);
        req_area->iov_buf = req_area->tmp_buf;
        req_area->iov_count = 0;
    }

    reqFn = req->dev.OnDataAvail;
    if (!reqFn) {
        MPIDI_CH3U_Request_complete(req);
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
    }
    else {
        MPIDI_VC_t *vc = req->ch.vc;

        reqFn(vc, req, &complete);
        if (!complete) {
            MPIU_Assert(complete == TRUE);
        }
    }

    return complete;
}
Example #22
0
static int nonempty_intersection(MPID_Comm *comm, MPID_Group *group, int *flag)
{
    int mpi_errno = MPI_SUCCESS;
    int i_g, i_c;
    MPIDI_VC_t *vc_g, *vc_c;
    MPIDI_STATE_DECL(MPID_STATE_NONEMPTY_INTERSECTION);

    MPIDI_FUNC_ENTER(MPID_STATE_NONEMPTY_INTERSECTION);

    /* handle common case fast */
    if (comm == MPIR_Process.comm_world || comm == MPIR_Process.icomm_world) {
        *flag = TRUE;
        MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "comm is comm_world or icomm_world");
        goto fn_exit;
    }
    *flag = FALSE;
    
    /* FIXME: This algorithm assumes that the number of processes in group is
       very small (like 1).  So doing a linear search for them in comm is better
       than sorting the procs in comm and group then doing a binary search */

    for (i_g = 0; i_g < group->size; ++i_g) {
        /* FIXME: This won't work for dynamic procs */
        MPIDI_PG_Get_vc(MPIDI_Process.my_pg, group->lrank_to_lpid[i_g].lpid, &vc_g);
        for (i_c = 0; i_c < comm->remote_size; ++i_c) {
            MPIDI_Comm_get_vc(comm, i_c, &vc_c);
            if (vc_g == vc_c) {
                *flag = TRUE;
                goto fn_exit;
            }
        }
    }
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_NONEMPTY_INTERSECTION);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #23
0
/* 
 * These routines are called when a receive matches an eager sync send 
 */
int MPIDI_CH3_EagerSyncAck( MPIDI_VC_t *vc, MPID_Request *rreq )
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack;
    MPID_Request * esa_req;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending eager sync ack");
    MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK);
    esa_pkt->sender_req_id = rreq->dev.sender_req_id;
    MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req);
    MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
    if (mpi_errno != MPI_SUCCESS) {
	MPIR_ERR_POP(mpi_errno);
    }
    if (esa_req != NULL)
    {
	MPID_Request_release(esa_req);
    }
 fn_fail:
    return mpi_errno;
}
Example #24
0
int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, MPIDI_msg_sz_t hdr_sz, 
			MPID_Request ** sreq_ptr)
{
    MPID_Request * sreq = NULL;
    MPIDI_CH3I_VC *vcch = &vc->ch;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    
    MPIU_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    /* The SOCK channel uses a fixed length header, the size of which is the 
       maximum of all possible packet headers */
    hdr_sz = sizeof(MPIDI_CH3_Pkt_t);
    MPIU_DBG_STMT(CH3_CHANNEL,VERBOSE,
		  MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */
    {
	/* Connection already formed.  If send queue is empty attempt to send 
	   data, queuing any unsent data. */
	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
	{
	    MPIU_Size_t nb;
	    int rc;

	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send queue empty, attempting to write");
	    
	    MPIU_DBG_PKT(vcch->conn,hdr,"istartmsg");
	    /* MT: need some signalling to lock down our right to use the 
	       channel, thus insuring that the progress engine does
               not also try to write */
	    rc = MPIDU_Sock_write(vcch->sock, hdr, hdr_sz, &nb);
	    if (rc == MPI_SUCCESS)
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
			       "wrote %ld bytes", (unsigned long) nb);
		
		if (nb == hdr_sz)
		{ 
		    MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
				   "entire write complete, " MPIDI_MSG_SZ_FMT " bytes", nb);
		    /* done.  get us out of here as quickly as possible. */
		}
		else
		{
		    MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
                    "partial write of " MPIDI_MSG_SZ_FMT " bytes, request enqueued at head", nb);
		    sreq = create_request(hdr, hdr_sz, nb);
		    if (!sreq) {
			MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
		    }

		    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
		    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
     (MPIU_DBG_FDEST,"posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
		    vcch->conn->send_active = sreq;
		    mpi_errno = MPIDU_Sock_post_write(vcch->conn->sock, sreq->dev.iov[0].MPID_IOV_BUF,
						      sreq->dev.iov[0].MPID_IOV_LEN, sreq->dev.iov[0].MPID_IOV_LEN, NULL);
		    /* --BEGIN ERROR HANDLING-- */
		    if (mpi_errno != MPI_SUCCESS)
		    {
			mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
							 sreq, vcch->conn, vc);
			goto fn_fail;
		    }
		    /* --END ERROR HANDLING-- */
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,TYPICAL,
			       "ERROR - MPIDU_Sock_write failed, rc=%d", rc);
		sreq = MPID_Request_create();
		if (!sreq) {
		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
		}
		sreq->kind = MPID_REQUEST_SEND;
		MPID_cc_set(&(sreq->cc), 0);
		sreq->status.MPI_ERROR = MPIR_Err_create_code( rc,
			       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
			       MPI_ERR_INTERN, "**ch3|sock|writefailed",
			       "**ch3|sock|writefailed %d", rc );
		/* Make sure that the caller sees this error */
		mpi_errno = sreq->status.MPI_ERROR;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send in progress, request enqueued");
	    sreq = create_request(hdr, hdr_sz, 0);
	    if (!sreq) {
		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	    }
	    MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	}
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING) /* MT */
    {
	MPIU_DBG_VCUSE(vc,
		       "connecteding. enqueuing request");
	
	/* queue the data so it can be sent after the connection is formed */
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */
    {
	MPIU_DBG_VCUSE(vc,
		       "unconnected.  posting connect and enqueuing request");
	
	/* queue the data so it can be sent after the connection is formed */
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);

	/* Form a new connection */
	MPIDI_CH3I_VC_post_connect(vc);
    }
    else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPIU_DBG_VCUSE(vc,"forming connection, request enqueued");
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	/* Connection failed, so allocate a request and return an error. */
	MPIU_DBG_VCUSE(vc,"ERROR - connection failed");
	sreq = MPID_Request_create();
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	sreq->kind = MPID_REQUEST_SEND;
	MPID_cc_set(&sreq->cc, 0);
	
	sreq->status.MPI_ERROR = MPIR_Err_create_code( MPI_SUCCESS,
		       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
		       MPI_ERR_INTERN, "**ch3|sock|connectionfailed",0 );
	/* Make sure that the caller sees this error */
	mpi_errno = sreq->status.MPI_ERROR;
    }
    /* --END ERROR HANDLING-- */

  fn_fail:
    *sreq_ptr = sreq;
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    return mpi_errno;
}
Example #25
0
static int _mxm_isend(MPID_nem_mxm_ep_t * ep, MPID_nem_mxm_req_area * req,
                      int type, mxm_mq_h mxm_mq, int mxm_rank, int id, mxm_tag_t mxm_tag, int block)
{
    int mpi_errno = MPI_SUCCESS;
    mxm_error_t ret = MXM_OK;
    mxm_send_req_t *mxm_sreq;
    list_head_t *free_queue = NULL;

    MPIU_Assert(ep);
    MPIU_Assert(req);

    free_queue = &ep->free_queue;
    req->mxm_req = list_dequeue_mxm_req(free_queue);
    if (!req->mxm_req) {
        list_grow_mxm_req(free_queue);
        req->mxm_req = list_dequeue_mxm_req(free_queue);
        if (!req->mxm_req) {
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "empty free queue");
            mpi_errno = MPI_ERR_OTHER;
            goto fn_fail;
        }
    }
    mxm_sreq = &(req->mxm_req->item.send);

    mxm_sreq->base.state = MXM_REQ_NEW;
    mxm_sreq->base.mq = mxm_mq;
    mxm_sreq->base.conn = ep->mxm_conn;
    mxm_sreq->base.completed_cb = _mxm_send_completion_cb;
    mxm_sreq->base.context = req->ctx;

    if (type == MXM_MPICH_ISEND_AM) {
        mxm_sreq->opcode = MXM_REQ_OP_AM;
        mxm_sreq->flags = 0;

        mxm_sreq->op.am.hid = id;
        mxm_sreq->op.am.imm_data = mxm_rank;
    }
    else if (type == MXM_MPICH_ISEND_SYNC) {
        mxm_sreq->opcode = MXM_REQ_OP_SEND_SYNC;
        mxm_sreq->flags = 0;

        mxm_sreq->op.send.tag = mxm_tag;
        mxm_sreq->op.send.imm_data = mxm_rank;
    }
    else {
        mxm_sreq->opcode = MXM_REQ_OP_SEND;
        mxm_sreq->flags = 0;

        mxm_sreq->op.send.tag = mxm_tag;
        mxm_sreq->op.send.imm_data = mxm_rank;
    }

    if (likely(req->iov_count == 1)) {
        mxm_sreq->base.data_type = MXM_REQ_DATA_BUFFER;
        mxm_sreq->base.data.buffer.ptr = req->iov_buf[0].ptr;
        mxm_sreq->base.data.buffer.length = req->iov_buf[0].length;
    }
    else {
        mxm_sreq->base.data_type = MXM_REQ_DATA_IOV;
        mxm_sreq->base.data.iov.vector = req->iov_buf;
        mxm_sreq->base.data.iov.count = req->iov_count;
    }

    ret = mxm_req_send(mxm_sreq);
    if (MXM_OK != ret) {
        list_enqueue(free_queue, &req->mxm_req->queue);
        mpi_errno = MPI_ERR_OTHER;
        goto fn_fail;
    }

    if (block)
        _mxm_req_wait(&mxm_sreq->base);

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #26
0
int MPID_Cancel_send(MPID_Request * sreq)
{
    MPIDI_VC_t * vc;
    int proto;
    int flag;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_SEND);

    MPIU_Assert(sreq->kind == MPID_REQUEST_SEND);

    MPIDI_Request_cancel_pending(sreq, &flag);
    if (flag)
    {
        goto fn_exit;
    }

    /*
     * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer
     * and no pointer to the underlying communication
     * request.  For now, we simply fail to cancel the request.  In the future,
     * we should add a new request kind to indicate that
     * the request is a BSEND.  Then we can properly cancel the request, much
     * in the way we do persistent requests.
     */
    if (sreq->comm == NULL)
    {
        goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc);

    proto = MPIDI_Request_get_msg_type(sreq);

    if (proto == MPIDI_REQUEST_SELF_MSG)
    {
        MPID_Request * rreq;

        MPIU_DBG_MSG(CH3_OTHER,VERBOSE,
                     "attempting to cancel message sent to self");

        MPIU_THREAD_CS_ENTER(MSGQUEUE,);
        rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match);
        MPIU_THREAD_CS_EXIT(MSGQUEUE,);
        if (rreq)
        {
            MPIU_Assert(rreq->partner_request == sreq);

            MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                                                "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x",
                                                sreq->handle, rreq->handle));

            MPIU_Object_set_ref(rreq, 0);
            MPIDI_CH3_Request_destroy(rreq);

            sreq->status.cancelled = TRUE;
            /* no other thread should be waiting on sreq, so it is safe to
               reset ref_count and cc */
            MPID_cc_set(&sreq->cc, 0);
            /* FIXME should be a decr and assert, not a set */
            MPIU_Object_set_ref(sreq, 1);
        }
        else
        {
            sreq->status.cancelled = FALSE;
            MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                                                "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x",
                                                sreq->handle, rreq->handle));
        }

        goto fn_exit;
    }
Example #27
0
MPID_Request * MPID_Request_create(void)
{
    MPID_Request * req;
    MPIDI_STATE_DECL(MPID_STATE_MPID_REQUEST_CREATE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_REQUEST_CREATE);
    
    req = MPIU_Handle_obj_alloc(&MPID_Request_mem);
    if (req != NULL)
    {
	MPIU_DBG_MSG_P(CH3_CHANNEL,VERBOSE,
		       "allocated request, handle=0x%08x", req->handle);
#ifdef MPICH_DBG_OUTPUT
	/*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);*/
	if (HANDLE_GET_MPI_KIND(req->handle) != MPID_REQUEST)
	{
	    int mpi_errno;
	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
		       FCNAME, __LINE__, MPI_ERR_OTHER, 
		       "**invalid_handle", "**invalid_handle %d", req->handle);
	    MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
	}
#endif
	/* FIXME: This makes request creation expensive.  We need to trim
	   this to the basics, with additional setup for special-purpose 
	   requests (think base class and inheritance).  For example, do we 
	   *really* want to set the kind to UNDEFINED? And should the RMA 
	   values be set only for RMA requests? */
	MPIU_Object_set_ref(req, 1);
	req->kind		   = MPID_REQUEST_UNDEFINED;
        MPID_cc_set(&req->cc, 1);
	req->cc_ptr		   = &req->cc;
	/* FIXME: status fields meaningful only for receive, and even then
	   should not need to be set. */
	req->status.MPI_SOURCE	   = MPI_UNDEFINED;
	req->status.MPI_TAG	   = MPI_UNDEFINED;
	req->status.MPI_ERROR	   = MPI_SUCCESS;
        MPIR_STATUS_SET_COUNT(req->status, 0);
        MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);
	req->comm		   = NULL;
        req->greq_fns              = NULL;
        req->errflag               = MPIR_ERR_NONE;
	req->dev.datatype_ptr	   = NULL;
	req->dev.segment_ptr	   = NULL;
	/* Masks and flags for channel device state in an MPID_Request */
	req->dev.state		   = 0;
	req->dev.cancel_pending	   = FALSE;
	/* FIXME: RMA ops shouldn't need to be set except when creating a
	   request for RMA operations */
	req->dev.target_win_handle = MPI_WIN_NULL;
	req->dev.source_win_handle = MPI_WIN_NULL;
        req->dev.lock_queue_entry  = NULL;
	req->dev.dtype_info	   = NULL;
	req->dev.dataloop	   = NULL;
	req->dev.iov_offset        = 0;
        req->dev.flags             = MPIDI_CH3_PKT_FLAG_NONE;
        req->dev.resp_request_handle = MPI_REQUEST_NULL;
        req->dev.user_buf          = NULL;
        req->dev.OnDataAvail       = NULL;
        req->dev.OnFinal           = NULL;
        req->dev.user_buf          = NULL;
        req->dev.drop_data         = FALSE;
        req->dev.stream_offset     = 0;
#ifdef MPIDI_CH3_REQUEST_INIT
	MPIDI_CH3_REQUEST_INIT(req);
#endif
    }
    else
    {
	/* FIXME: This fails to fail if debugging is turned off */
	MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"unable to allocate a request");
    }
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_REQUEST_CREATE);
    return req;
}
Example #28
0
int MPIDI_CH3U_Request_load_recv_iov(MPID_Request * const rreq)
{
    MPI_Aint last;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
    if (rreq->dev.segment_first < rreq->dev.segment_size)
    {
	/* still reading data that needs to go into the user buffer */
	
	if (MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_ACCUM_RECV &&
            MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_ACCUM_RECV &&
            MPIDI_Request_get_srbuf_flag(rreq))
	{
	    MPIDI_msg_sz_t data_sz;
	    MPIDI_msg_sz_t tmpbuf_sz;

	    /* Once a SRBuf is in use, we continue to use it since a small 
	       amount of data may already be present at the beginning
	       of the buffer.  This data is left over from the previous unpack,
	       most like a result of alignment issues.  NOTE: we
	       could force the use of the SRBuf only 
	       when (rreq->dev.tmpbuf_off > 0)... */
	    
	    data_sz = rreq->dev.segment_size - rreq->dev.segment_first - 
		rreq->dev.tmpbuf_off;
	    MPIU_Assert(data_sz > 0);
	    tmpbuf_sz = rreq->dev.tmpbuf_sz - rreq->dev.tmpbuf_off;
	    if (data_sz > tmpbuf_sz)
	    {
		data_sz = tmpbuf_sz;
	    }
	    rreq->dev.iov[0].MPID_IOV_BUF = 
		(MPID_IOV_BUF_CAST)((char *) rreq->dev.tmpbuf + 
				    rreq->dev.tmpbuf_off);
	    rreq->dev.iov[0].MPID_IOV_LEN = data_sz;
            rreq->dev.iov_offset = 0;
	    rreq->dev.iov_count = 1;
	    MPIU_Assert(rreq->dev.segment_first + data_sz + 
			rreq->dev.tmpbuf_off <= rreq->dev.recv_data_sz);
	    if (rreq->dev.segment_first + data_sz + rreq->dev.tmpbuf_off == 
		rreq->dev.recv_data_sz)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
		  "updating rreq to read the remaining data into the SRBuf");
		rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufComplete;
	    }
	    else
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
		       "updating rreq to read more data into the SRBuf");
		rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV;
	    }
	    goto fn_exit;
	}
	
	last = rreq->dev.segment_size;
	rreq->dev.iov_count = MPID_IOV_LIMIT;
	rreq->dev.iov_offset = 0;
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
   "pre-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d",
			  rreq->dev.segment_first, last, rreq->dev.iov_count));
	MPIU_Assert(rreq->dev.segment_first < last);
	MPIU_Assert(last > 0);
	MPID_Segment_unpack_vector(rreq->dev.segment_ptr, 
				   rreq->dev.segment_first,
				   &last, &rreq->dev.iov[0], &rreq->dev.iov_count);
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
   "post-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d, iov_offset=%lld",
			  rreq->dev.segment_first, last, rreq->dev.iov_count, (long long)rreq->dev.iov_offset));
	MPIU_Assert(rreq->dev.iov_count >= 0 && rreq->dev.iov_count <= 
		    MPID_IOV_LIMIT);

	/* --BEGIN ERROR HANDLING-- */
	if (rreq->dev.iov_count == 0)
	{
	    /* If the data can't be unpacked, the we have a mis-match between
	       the datatype and the amount of data received.  Adjust
	       the segment info so that the remaining data is received and 
	       thrown away. */
	    rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
		       "**dtypemismatch", 0);
            MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first);
	    rreq->dev.segment_size = rreq->dev.segment_first;
	    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
	    goto fn_exit;
	}
        else
        {
            MPIU_Assert(rreq->dev.iov_offset < rreq->dev.iov_count);
        }
	/* --END ERROR HANDLING-- */

	if (last == rreq->dev.recv_data_sz)
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
     "updating rreq to read the remaining data directly into the user buffer");
	    /* Eventually, use OnFinal for this instead */
	    rreq->dev.OnDataAvail = rreq->dev.OnFinal;
	}
	else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV ||
                 MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV ||
                 (last == rreq->dev.segment_size ||
                  (last - rreq->dev.segment_first) / rreq->dev.iov_count >= MPIDI_IOV_DENSITY_MIN))
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
	     "updating rreq to read more data directly into the user buffer");
	    rreq->dev.segment_first = last;
	    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV;
	}
	else
	{
	    /* Too little data would have been received using an IOV.  
	       We will start receiving data into a SRBuf and unpacking it
	       later. */
	    MPIU_Assert(MPIDI_Request_get_srbuf_flag(rreq) == FALSE);
	    
	    MPIDI_CH3U_SRBuf_alloc(rreq, 
			    rreq->dev.segment_size - rreq->dev.segment_first);
	    rreq->dev.tmpbuf_off = 0;
	    /* --BEGIN ERROR HANDLING-- */
	    if (rreq->dev.tmpbuf_sz == 0)
	    {
		/* FIXME - we should drain the data off the pipe here, but we 
		   don't have a buffer to drain it into.  should this be
		   a fatal error? */
		MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"SRBuf allocation failure");
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
			      FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 
			 "**nomem %d", 
			 rreq->dev.segment_size - rreq->dev.segment_first);
		rreq->status.MPI_ERROR = mpi_errno;
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */

	    /* fill in the IOV using a recursive call */
	    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
	}
    }
    else
    {
	/* receive and toss any extra data that does not fit in the user's 
	   buffer */
	MPIDI_msg_sz_t data_sz;

	data_sz = rreq->dev.recv_data_sz - rreq->dev.segment_first;
	if (!MPIDI_Request_get_srbuf_flag(rreq))
	{
	    MPIDI_CH3U_SRBuf_alloc(rreq, data_sz);
	    /* --BEGIN ERROR HANDLING-- */
	    if (rreq->dev.tmpbuf_sz == 0)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure");
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
			       FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
		rreq->status.MPI_ERROR = mpi_errno;
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */
	}

	if (data_sz <= rreq->dev.tmpbuf_sz)
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
	    "updating rreq to read overflow data into the SRBuf and complete");
	    rreq->dev.iov[0].MPID_IOV_LEN = data_sz;
	    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
	    /* Eventually, use OnFinal for this instead */
	    rreq->dev.OnDataAvail = rreq->dev.OnFinal;
	}
	else
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
	  "updating rreq to read overflow data into the SRBuf and reload IOV");
	    rreq->dev.iov[0].MPID_IOV_LEN = rreq->dev.tmpbuf_sz;
	    rreq->dev.segment_first += rreq->dev.tmpbuf_sz;
	    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV;
	}
	
	rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rreq->dev.tmpbuf;
	rreq->dev.iov_count = 1;
    }
    
  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
    return mpi_errno;
}
Example #29
0
int MPIDI_CH3U_Request_load_send_iov(MPID_Request * const sreq, 
				     MPID_IOV * const iov, int * const iov_n)
{
    MPI_Aint last;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
    MPIU_Assert(sreq->dev.segment_ptr != NULL);
    last = sreq->dev.segment_size;
    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
     "pre-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d",
		      sreq->dev.segment_first, last, *iov_n));
    MPIU_Assert(sreq->dev.segment_first < last);
    MPIU_Assert(last > 0);
    MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT);
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, 
			     &last, iov, iov_n);
    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
    "post-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d",
		      sreq->dev.segment_first, last, *iov_n));
    MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT);
    
    if (last == sreq->dev.segment_size)
    {
	MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data loaded into IOV");
	sreq->dev.OnDataAvail = sreq->dev.OnFinal;
    }
    else if ((last - sreq->dev.segment_first) / *iov_n >= MPIDI_IOV_DENSITY_MIN)
    {
	MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data loaded into IOV");
	sreq->dev.segment_first = last;
	sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV;
    }
    else
    {
	MPIDI_msg_sz_t data_sz;
	int i, iov_data_copied;
	
	MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"low density.  using SRBuf.");
	    
	data_sz = sreq->dev.segment_size - sreq->dev.segment_first;
	if (!MPIDI_Request_get_srbuf_flag(sreq))
	{
	    MPIDI_CH3U_SRBuf_alloc(sreq, data_sz);
	    /* --BEGIN ERROR HANDLING-- */
	    if (sreq->dev.tmpbuf_sz == 0)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure");
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
                                FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 
						 "**nomem %d", data_sz);
		sreq->status.MPI_ERROR = mpi_errno;
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */
	}

	iov_data_copied = 0;
	for (i = 0; i < *iov_n; i++) {
	    MPIU_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, 
		   iov[i].MPID_IOV_BUF, iov[i].MPID_IOV_LEN);
	    iov_data_copied += iov[i].MPID_IOV_LEN;
	}
	sreq->dev.segment_first = last;

	last = (data_sz <= sreq->dev.tmpbuf_sz - iov_data_copied) ? 
	    sreq->dev.segment_size :
	    sreq->dev.segment_first + sreq->dev.tmpbuf_sz - iov_data_copied;
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
               "pre-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT,
			  sreq->dev.segment_first, last));
	MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, 
			  &last, (char*) sreq->dev.tmpbuf + iov_data_copied);
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
              "post-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT,
			   sreq->dev.segment_first, last));
	iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)sreq->dev.tmpbuf;
	iov[0].MPID_IOV_LEN = last - sreq->dev.segment_first + iov_data_copied;
	*iov_n = 1;
	if (last == sreq->dev.segment_size)
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data packed into SRBuf");
	    sreq->dev.OnDataAvail = sreq->dev.OnFinal;
	}
	else 
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data packed into SRBuf");
	    sreq->dev.segment_first = last;
	    sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV;
	}
    }
    
  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV);
    return mpi_errno;
}
Example #30
0
int MPID_Send(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank,
	      int tag, MPID_Comm * comm, int context_offset,
	      MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq = NULL;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int eager_threshold = -1;
    int mpi_errno = MPI_SUCCESS;    
#if defined(FINEGRAIN_MPI)
    int destpid=-1, destworldrank=-1;
#endif
    MPIDI_STATE_DECL(MPID_STATE_MPID_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                "rank=%d, tag=%d, context=%d", 
		rank, tag, comm->context_id + context_offset));

    /* Check to make sure the communicator hasn't already been revoked */
    if (comm->revoked &&
            MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) &&
            MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) {
        MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked");
    }

#if defined(FINEGRAIN_MPI)
    MPIDI_Comm_get_pid_worldrank(comm, rank, &destpid, &destworldrank);

    if (COMPARE_RANKS(rank,comm,destpid) && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(&buf, count, datatype, rank, tag, comm,
				     context_offset, MPIDI_REQUEST_TYPE_SEND,
				     &sreq);
        if (rank == comm->rank)
	{
            printf("my_fgrank=%d: %s, self send DEADLOCK\n", my_fgrank, __FUNCTION__);
	    if (sreq != NULL && sreq->cc != 0) {
		MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				    "**dev|selfsenddeadlock");
	    }
	}

#else
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
				     context_offset, MPIDI_REQUEST_TYPE_SEND, 
				     &sreq);

	/* In the single threaded case, sending to yourself will cause 
	   deadlock.  Note that in the runtime-thread case, this check
	   will not be made (long-term FIXME) */
#       ifndef MPICH_IS_THREADED
	{
	    if (sreq != NULL && MPID_cc_get(sreq->cc) != 0) {
		MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				    "**dev|selfsenddeadlock");
	    }
	}
#	endif
#endif
	if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }
	goto fn_exit;
    }

    if (rank == MPI_PROC_NULL)
    {
	goto fn_exit;
    }

#if defined(FINEGRAIN_MPI)
    MPIDI_Comm_get_vc_set_active_direct(comm, destpid, &vc);
#else
    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
#endif
    MPIR_ERR_CHKANDJUMP1(vc->state == MPIDI_VC_STATE_MORIBUND, mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", rank);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->send)
    {
	mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);


    if (data_sz == 0)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
#if defined(FINEGRAIN_MPI)
        eager_pkt->match.parts.dest_rank = destworldrank;
#endif
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = MPI_REQUEST_NULL;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	
	MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq);
	MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg");
	}
	/* --END ERROR HANDLING-- */
	if (sreq != NULL)
	{
	    MPIDI_Request_set_seqnum(sreq, seqnum);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    /* sreq->comm = comm;
	      MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */
	}
	
	goto fn_exit;
    }

    MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc);

    /* FIXME: flow control: limit number of outstanding eager messages
       containing data and need to be buffered by the receiver */
#ifdef USE_EAGER_SHORT
    if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) {
	mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, 
					       MPIDI_CH3_PKT_EAGERSHORT_SEND,
					       (char *)buf + dt_true_lb,
					       data_sz, rank, tag, comm, 
					       context_offset );
    }
    else
#endif

    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold)
    {
	if (dt_contig)
        {
 	    mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, 
						   MPIDI_CH3_PKT_EAGER_SEND,
						   (char *)buf + dt_true_lb,
						   data_sz, rank, tag, comm, 
						   context_offset );
	}
	else
        {
	    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                      MPIDI_CH3_PKT_EAGER_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag, 
                                                      comm, context_offset );
	}
    }
    else
    {