Esempio n. 1
0
/* This dequeues req from the posted recv queue, set req's error code to comm_fail, and updates the req pointer.
   Note that this creates a new error code if one hasn't already been created (i.e., if *error is MPI_SUCCESS). */
static inline void dequeue_and_set_error(MPID_Request **req,  MPID_Request *prev_req, int *error, int rank)
{
    MPID_Request *next = (*req)->dev.next;

    if (*error == MPI_SUCCESS) {
        if (rank == MPI_PROC_NULL)
            MPIU_ERR_SET(*error, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail");
        else
            MPIU_ERR_SET1(*error, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail", "**comm_fail %d", rank);
    }
    
    /* remove from queue */
    if (recvq_posted_head == *req)
        recvq_posted_head = (*req)->dev.next;
    else
        prev_req->dev.next = (*req)->dev.next;
    if (recvq_posted_tail == *req)
        recvq_posted_tail = prev_req;

    MPIR_T_DEC(RECVQ_STATISTICS, posted_qlen);

    /* set error and complete */
    (*req)->status.MPI_ERROR = *error;
    MPIDI_CH3U_Request_complete(*req);
    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
                     (MPIU_DBG_FDEST, "set error of req %p (%#08x) to %#x and completing.",
                      *req, (*req)->handle, *error));
    *req = next;
}
Esempio n. 2
0
void
MPID_nem_newmad_handle_sreq(MPID_Request *req)
{
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
#ifdef DEBUG
    fprintf(stdout,"========> Completing Send req  %p \n",req);
#endif
    reqFn = req->dev.OnDataAvail;
    if (!reqFn){
	MPIDI_CH3U_Request_complete(req);
	MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
    }
    else{
	MPIDI_VC_t *vc = req->ch.vc;
	int complete   = 0;
	reqFn(vc, req, &complete);
	if(!complete)
        {   
	   MPIU_Assert(complete == TRUE);
	}
    }
    if (REQ_FIELD(req,iov) != NULL)
	MPIU_Free((REQ_FIELD(req,iov)));
    mpid_nem_newmad_pending_send_req--;
}
Esempio n. 3
0
static int handle_probe(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const req = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);

    if (e->ni_fail_type == PTL_NI_NO_MATCH) {
        REQ_PTL(req)->found = FALSE;
        goto fn_exit;
    }

    REQ_PTL(req)->found = TRUE;
    req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
    req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
    req->status.count = NPTL_HEADER_GET_LENGTH(e->match_bits);

    MPIDI_CH3U_Request_complete(req);
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
int MPIDI_CH3U_Handle_recv_req(MPIDI_VC_t * vc, MPID_Request * rreq, 
			       int * complete)
{
    static int in_routine ATTRIBUTE((unused)) = FALSE;
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);

    MPIU_Assert(in_routine == FALSE);
    in_routine = TRUE;

    reqFn = rreq->dev.OnDataAvail;
    if (!reqFn) {
	MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
	MPIDI_CH3U_Request_complete(rreq);
	*complete = TRUE;
    }
    else {
        mpi_errno = reqFn( vc, rreq, complete );
    }

    in_routine = FALSE;
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ);
    return mpi_errno;
}
Esempio n. 5
0
int MPIDI_CH3_PktHandler_RndvSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, 
				   MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPIDI_CH3_Pkt_rndv_send_t * rs_pkt = &pkt->rndv_send;
    int mpi_errno = MPI_SUCCESS;
    int complete;
    char *data_buf;
    MPIDI_msg_sz_t data_len;
    MPID_Request *req;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received rndv send (data) pkt");

    MPID_Request_get_ptr(rs_pkt->receiver_req_id, req);

    data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= req->dev.recv_data_sz)
                ? req->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t));
    data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
    
    if (req->dev.recv_data_sz == 0) {
        *buflen = sizeof(MPIDI_CH3_Pkt_t);
	MPIDI_CH3U_Request_complete(req);
	*rreqp = NULL;
    }
    else {
        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len,
                                                  &complete);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv",
			     "**ch3|postrecv %s", "MPIDI_CH3_PKT_RNDV_SEND");
	}

        *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) 
        {
            MPIDI_CH3U_Request_complete(req);
            *rreqp = NULL;
        }
        else
        {
            *rreqp = req;
        }
   }
	
 fn_fail:
    return mpi_errno;
}
Esempio n. 6
0
int MPID_nem_ib_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq)
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV);

    dprintf("lmt_done_recv,enter,rreq=%p,head=%p\n", rreq, MPID_nem_ib_lmtq.head);


    int is_contig;
    MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);
    if (!is_contig) {
        dprintf("lmt_done_recv,copying noncontiguous data to user buffer\n");

        /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
        /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
        MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz;
        MPID_Segment seg;
        MPI_Aint last;

        MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0);
        last = unpack_sz;
        MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf));
        if (last != unpack_sz) {
            /* --BEGIN ERROR HANDLING-- */
            /* received data was not entirely consumed by unpack()
             * because too few bytes remained to fill the next basic
             * datatype */
            MPIR_STATUS_SET_COUNT(rreq->status, last);
            rreq->status.MPI_ERROR =
                MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                     MPI_ERR_TYPE, "**MPID_nem_ib_lmt_done_recv", 0);
            /* --END ERROR HANDLING-- */
        }

        //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf));
        MPID_nem_ib_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t) rreq->ch.lmt_data_sz);
    }

    dprintf("lmt_done_recv,1,req=%p,pcc=%d\n", rreq, MPIDI_CH3I_progress_completion_count.v);
    MPIDI_CH3U_Request_complete(rreq);
    dprintf("lmt_done_recv,complete,req=%p\n", rreq);
    dprintf("lmt_done_recv,2,pcc=%d\n", MPIDI_CH3I_progress_completion_count.v);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV);
    return mpi_errno;
    //fn_fail:
    goto fn_exit;
}
Esempio n. 7
0
static inline int check_req_complete(MPIDI_VC_t *vc, MPID_Request *req, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    reqFn = req->dev.OnDataAvail;
    if (reqFn) {
        *complete = 0;
        mpi_errno = reqFn(vc, req, complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
    else {
        *complete = 1;
        MPIDI_CH3U_Request_complete(req);
    }

fn_fail:
    return mpi_errno;
}
Esempio n. 8
0
int MPID_nem_lmt_dma_done_send(MPIDI_VC_t *vc, MPID_Request *sreq)
{
    int mpi_errno = MPI_SUCCESS;
    int complete = 0;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);

    /* free cookie from RTS packet */
    MPIU_Free(sreq->ch.s_cookie);

    /* We shouldn't ever need to handle the more IOVs case here.  The DONE
       message should only be sent when all of the data is truly transferred.
       However in the interest of robustness, we'll start to handle it and
       assert if it looks like we were supposed to send more data for some
       reason. */
    reqFn = sreq->dev.OnDataAvail;
    if (!reqFn) {
        MPIDI_CH3U_Request_complete(sreq);
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
        goto fn_exit;
    }

    complete = 0;
    mpi_errno = reqFn(vc, sreq, &complete);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        
    if (complete) {
        /* request was completed by the OnDataAvail fn */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
        goto fn_exit;
    }
    else {
        /* There is more data to send. */
        MPIU_Assert(("should never be incomplete!", 0));
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND);
fn_exit:
    return MPI_SUCCESS;
fn_fail:
    goto fn_exit;
}
Esempio n. 9
0
int MPID_nem_ib_lmt_done_send(struct MPIDI_VC *vc, struct MPID_Request *req)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_DONE_SEND);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_DONE_SEND);

    dprintf("lmt_done_send,enter,%d<-%d,req=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n",
            MPID_nem_ib_myrank, vc->pg_rank, req, REQ_FIELD(req, lmt_pack_buf));


    /* free memory area for cookie */
    if (!req->ch.s_cookie) {
        dprintf("lmt_done_send,enter,req->ch.s_cookie is zero");
    }
    MPIU_Free(req->ch.s_cookie);
    //dprintf("lmt_done_send,free cookie,%p\n", req->ch.s_cookie);

    /* free temporal buffer for eager-send non-contiguous data.
     * MPIDI_CH3U_Recvq_FDU_or_AEP (in mpid_isend.c) sets req->dev.datatype */
    int is_contig;
    MPID_Datatype_is_contig(req->dev.datatype, &is_contig);
    if (!is_contig && REQ_FIELD(req, lmt_pack_buf)) {
        dprintf("lmt_done_send,lmt-get,non-contiguous,free lmt_pack_buf\n");
#if 1   /* debug, enable again later */
        MPIU_Free(REQ_FIELD(req, lmt_pack_buf));
#endif
    }

    /* mark completion on sreq */
    MPIU_ERR_CHKANDJUMP(req->dev.OnDataAvail, mpi_errno, MPI_ERR_OTHER,
                        "**MPID_nem_ib_lmt_done_send");
    dprintf("lmt_done_send,1,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v);
    MPIDI_CH3U_Request_complete(req);
    dprintf("lmt_done_send,complete,req=%p\n", req);
    dprintf("lmt_done_send,2,req=%p,pcc=%d\n", req, MPIDI_CH3I_progress_completion_count.v);
    //dprintf("lmt_done_send, mark completion on sreq\n");

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_DONE_SEND);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Esempio n. 10
0
/* This dequeues req from the posted recv queue, set req's error code to comm_fail, and updates the req pointer.
   Note that this creates a new error code if one hasn't already been created (i.e., if *error is MPI_SUCCESS). */
static inline void dequeue_and_set_error(MPID_Request **req,  MPID_Request *prev_req, int *error, int rank)
{
    MPID_Request *next = (*req)->dev.next;

    if (*error == MPI_SUCCESS)
        MPIU_ERR_SET1(*error, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", rank);

    /* remove from queue */
    if (recvq_posted_head == *req)
        recvq_posted_head = (*req)->dev.next;
    else
        prev_req->dev.next = (*req)->dev.next;
    if (recvq_posted_tail == *req)
        recvq_posted_tail = prev_req;
    
    /* set error and complete */
    (*req)->status.MPI_ERROR = *error;
    MPIDI_CH3U_Request_complete(*req);
    *req = next;
}
Esempio n. 11
0
static int _mxm_handle_sreq(MPID_Request * req)
{
    int complete = FALSE;
    int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    vc_area = VC_BASE(req->ch.vc);
    req_area = REQ_BASE(req);

    _dbg_mxm_out_buf(req_area->iov_buf[0].ptr,
                     (req_area->iov_buf[0].length >
                      16 ? 16 : req_area->iov_buf[0].length));

    vc_area->pending_sends -= 1;
    if (((req->dev.datatype_ptr != NULL) && (req->dev.tmpbuf != NULL))) {
        MPIU_Free(req->dev.tmpbuf);
    }

    if (req_area->iov_count > MXM_MPICH_MAX_IOV) {
        MPIU_Free(req_area->iov_buf);
        req_area->iov_buf = req_area->tmp_buf;
        req_area->iov_count = 0;
    }

    reqFn = req->dev.OnDataAvail;
    if (!reqFn) {
        MPIDI_CH3U_Request_complete(req);
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
    }
    else {
        MPIDI_VC_t *vc = req->ch.vc;

        reqFn(vc, req, &complete);
        if (!complete) {
            MPIU_Assert(complete == TRUE);
        }
    }

    return complete;
}
int MPIDI_CH3U_Handle_send_req(MPIDI_VC_t * vc, MPID_Request * sreq, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);

    /* Use the associated function rather than switching on the old ca field */
    /* Routines can call the attached function directly */
    reqFn = sreq->dev.OnDataAvail;
    if (!reqFn) {
        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
        MPIDI_CH3U_Request_complete(sreq);
        *complete = 1;
    }
    else {
        mpi_errno = reqFn(vc, sreq, complete);
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
    return mpi_errno;
}
Esempio n. 13
0
static inline int check_req_complete(MPIDI_VC_t *vc, MPID_Request *req, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    reqFn = req->dev.OnDataAvail;
    if (reqFn) {
        *complete = 0;

        /* XXX DJG FIXME this feels like a hack */
        req->dev.iov_count = MPID_IOV_LIMIT;
        req->dev.iov_offset = 0;

        mpi_errno = reqFn(vc, req, complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }
    else {
        *complete = 1;
        MPIDI_CH3U_Request_complete(req);
    }

fn_fail:
    return mpi_errno;
}
Esempio n. 14
0
int MPID_nem_scif_error_out_send_queue(struct MPIDI_VC *const vc, int req_errno)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *req;
    MPID_nem_scif_vc_area *const vc_scif = VC_SCIF(vc);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_ERROR_OUT_SEND_QUEUE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_ERROR_OUT_SEND_QUEUE);

    /* we don't call onDataAvail or onFinal handlers because this is
     * an error condition and we just want to mark them as complete */

    /* send queue */
    while (!MPIDI_CH3I_Sendq_empty(vc_scif->send_queue)) {
        MPIDI_CH3I_Sendq_dequeue(&vc_scif->send_queue, &req);
        req->status.MPI_ERROR = req_errno;

        MPIDI_CH3U_Request_complete(req);
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_ERROR_OUT_SEND_QUEUE);
    return mpi_errno;
}
Esempio n. 15
0
int MPID_nem_tcp_module_lmt_start_send (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV r_cookie)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPIDI_msg_sz_t last;
    int nb;
    int s_len = 0;
    int r_len;
    int r_port;
    char *r_hostname;
    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);

    mpi_errno = read_r_cookie (r_cookie, &r_hostname, &r_port, &r_len);
    if (mpi_errno) MPIU_ERR_POP (mpi_errno);

    free_cookie (vc_ch->net.tcp.lmt_cookie);

    if (!vc_ch->net.tcp.lmt_connected)
    {
        struct sockaddr_in saddr;
        struct hostent *hp;

        vc_ch->net.tcp.lmt_desc = socket (AF_INET, SOCK_STREAM, 0);
        MPIU_ERR_CHKANDJUMP2 (vc_ch->net.tcp.lmt_desc == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", strerror (errno), errno);

        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);

        hp = gethostbyname (r_hostname);
        MPIU_ERR_CHKANDJUMP2 (hp == NULL, mpi_errno, MPI_ERR_OTHER, "**gethostbyname", "**gethostbyname %s %d", hstrerror (h_errno), h_errno);

        memset (&saddr, sizeof(saddr), 0);
        saddr.sin_family = AF_INET;
        saddr.sin_port   = htons (r_port);
        MPIU_Memcpy (&saddr.sin_addr, hp->h_addr, hp->h_length);

        set_sockopts (vc_ch->net.tcp.lmt_desc);

        ret = connect (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, sizeof(saddr));
        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);

        vc_ch->net.tcp.lmt_connected = 1;
    }

    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    if (r_len < data_sz)
    {
        /* message will be truncated */
        s_len = data_sz;
        data_sz = r_len;
 	req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", s_len, r_len);
    }

    MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0);
    req->dev.segment_first = 0;
    req->dev.segment_size = data_sz;
    req->dev.iov_count = MPID_IOV_LIMIT;
    req->dev.iov_offset = 0;
    last = data_sz;

    do
    {
        int iov_offset;
        int left_to_send;
        MPID_Segment_pack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count);

        left_to_send = last - req->dev.segment_first;
        iov_offset = 0;

#ifdef TESTING_CHUNKING
        {
            char *buf = req->dev.iov[0].MPID_IOV_BUF;
            int l;
            while (left_to_send)
            {
                if (left_to_send > CHUNK)
                    l = CHUNK;
                else
                    l = left_to_send;

                do
                    nb = write (vc_ch->net.tcp.lmt_desc, buf, l);
                while (nb == -1 && errno == EINTR);
                MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");

                left_to_send -= nb;
                buf += nb;
            }

            MPIDI_CH3U_Request_complete (req);
            goto fn_exit;
        }
#endif

        do
            nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
        while (nb == -1 && errno == EINTR);
        MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");

        left_to_send -= nb;
        while (left_to_send)
        { /* send rest of iov */
            while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN)
            { /* update iov to reflect sent bytes */
                nb -= req->dev.iov[iov_offset].MPID_IOV_LEN;
                ++iov_offset;
            }
            req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb;
            req->dev.iov[iov_offset].MPID_IOV_LEN -= nb;

            do
                nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
            while (nb == -1 && errno == EINTR);
            MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
            left_to_send -= nb;
        }
    }
    while (last < data_sz);

    MPIDI_CH3U_Request_complete (req);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Esempio n. 16
0
int MPID_nem_tcp_module_lmt_start_recv (MPIDI_VC_t *vc, MPID_Request *req)
{
    int mpi_errno = MPI_SUCCESS;
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPIDI_msg_sz_t last;
    int nb;
    int r_len;
    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);

    free_cookie (vc_ch->net.tcp.lmt_cookie);

    if (!vc_ch->net.tcp.lmt_connected)
    {
        int len;
        struct sockaddr_in saddr;
        int connfd;

        len = sizeof (saddr);
        connfd = accept (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, &len);
        MPIU_ERR_CHKANDJUMP2 (connfd == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|accept", "**sock|poll|accept %d %s", errno, strerror (errno));

        /* close listen fd */
        do
            ret = close (vc_ch->net.tcp.lmt_desc);
        while (ret == -1 && errno == EINTR);
        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**closesocket", "**closesocket %s %d", strerror (errno), errno);

        /* set lmt_desc to new connected fd */
        vc_ch->net.tcp.lmt_desc = connfd;
        vc_ch->net.tcp.lmt_connected = 1;

        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
    }

    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    if (data_sz > vc_ch->net.tcp.lmt_s_len)
    {
        data_sz = vc_ch->net.tcp.lmt_s_len;
    }
    else if (data_sz < vc_ch->net.tcp.lmt_s_len)
    {
        /* message will be truncated */
        r_len = data_sz;
 	req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", vc_ch->net.tcp.lmt_s_len, r_len);
    }

    MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0);
    req->dev.segment_first = 0;
    req->dev.segment_size = data_sz;
    req->dev.iov_count = MPID_IOV_LIMIT;
    req->dev.iov_offset = 0;
    last = data_sz;

    do
    {
        int iov_offset;
        int left_to_recv;

        MPID_Segment_unpack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count);

        left_to_recv = last - req->dev.segment_first;
        iov_offset = 0;

#ifdef TESTING_CHUNKING
        {
            char *buf = req->dev.iov[0].MPID_IOV_BUF;
            int l;
            while (left_to_recv)
            {
                if (left_to_recv > CHUNK)
                    l = CHUNK;
                else
                    l = left_to_recv;

                do
                    nb = read (vc_ch->net.tcp.lmt_desc, buf, l);
                while (nb == -1 && errno == EINTR);
                MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");

                left_to_recv -= nb;
                buf += nb;
            }
            MPIDI_CH3U_Request_complete (req);
            goto fn_exit;
        }
#endif

        do
            nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
        while (nb == -1 && errno == EINTR);
        MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
        MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail");

        left_to_recv -= nb;
        while (left_to_recv)
        { /* recv rest of iov */
            while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN)
            { /* update iov to reflect sent bytes */
                nb -= req->dev.iov[iov_offset].MPID_IOV_LEN;
                ++iov_offset;
            }
            req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb;
            req->dev.iov[iov_offset].MPID_IOV_LEN -= nb;

            do
                nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
            while (nb == -1 && errno == EINTR);
            MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
            MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail");
            left_to_recv -= nb;
        }
    }
    while (last < data_sz);

    MPIDI_CH3U_Request_complete (req);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Esempio n. 17
0
MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag, 
                                           int context_id, MPID_Comm *comm, void *user_buf,
                                           int user_count, MPI_Datatype datatype, int * foundp)
{
    MPID_Time_t timer_start;
    int found;
    MPID_Request *rreq, *prev_rreq;
    MPIDI_Message_match match;
    MPIDI_Message_match mask;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);

    MPIU_THREAD_CS_ASSERT_HELD(MSGQUEUE);

    /* Store how much time is spent traversing the queue */
    MPIR_T_START_TIMER(RECVQ_STATISTICS, timer_start);

    /* Optimize this loop for an empty unexpected receive queue */
    rreq = recvq_unexpected_head;
    if (rreq) {
	prev_rreq = NULL;

	match.parts.context_id = context_id;
	match.parts.tag = tag;
	match.parts.rank = source;

	if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) {
	    do {
                MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts);
		if (MATCH_WITH_NO_MASK(rreq->dev.match, match)) {
		    if (prev_rreq != NULL) {
			prev_rreq->dev.next = rreq->dev.next;
		    }
		    else {
			recvq_unexpected_head = rreq->dev.next;
		    }

		    if (rreq->dev.next == NULL) {
			recvq_unexpected_tail = prev_rreq;
		    }
                    MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen);

                    if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG)
                        MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz);

		    rreq->comm = comm;
		    MPIR_Comm_add_ref(comm);
		    rreq->dev.user_buf = user_buf;
		    rreq->dev.user_count = user_count;
		    rreq->dev.datatype = datatype;
		    found = TRUE;
		    goto lock_exit;
		}
		prev_rreq = rreq;
		rreq      = rreq->dev.next;
	    } while (rreq);
	}
	else {
	    mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0;
	    if (tag == MPI_ANY_TAG)
		match.parts.tag = mask.parts.tag = 0;
	    if (source == MPI_ANY_SOURCE)
		match.parts.rank = mask.parts.rank = 0;

	    do {
                MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts);
		if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) {
		    if (prev_rreq != NULL) {
			prev_rreq->dev.next = rreq->dev.next;
		    }
		    else {
			recvq_unexpected_head = rreq->dev.next;
		    }
		    if (rreq->dev.next == NULL) {
			recvq_unexpected_tail = prev_rreq;
		    }
                    MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen);

                    if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG)
                        MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz);

		    rreq->comm                 = comm;
		    MPIR_Comm_add_ref(comm);
		    rreq->dev.user_buf         = user_buf;
		    rreq->dev.user_count       = user_count;
		    rreq->dev.datatype         = datatype;
		    found = TRUE;
		    goto lock_exit;
		}
		prev_rreq = rreq;
		rreq = rreq->dev.next;
	    } while (rreq);
	}
    }
    MPIR_T_END_TIMER(RECVQ_STATISTICS, timer_start, time_matching_unexpectedq);

    /* A matching request was not found in the unexpected queue, so we 
       need to allocate a new request and add it to the posted queue */
    {
	int mpi_errno = MPI_SUCCESS;

        found = FALSE;

	MPIDI_Request_create_rreq( rreq, mpi_errno, goto lock_exit );
	rreq->dev.match.parts.tag	   = tag;
	rreq->dev.match.parts.rank	   = source;
	rreq->dev.match.parts.context_id   = context_id;

	/* Added a mask for faster search on 64-bit capable
	 * platforms */
	rreq->dev.mask.parts.context_id = ~0;
	if (rreq->dev.match.parts.rank == MPI_ANY_SOURCE)
	    rreq->dev.mask.parts.rank = 0;
	else
	    rreq->dev.mask.parts.rank = ~0;
	if (rreq->dev.match.parts.tag == MPI_ANY_TAG)
	    rreq->dev.mask.parts.tag = 0;
	else
	    rreq->dev.mask.parts.tag = ~0;

        rreq->comm                 = comm;
        MPIR_Comm_add_ref(comm);
        rreq->dev.user_buf         = user_buf;
        rreq->dev.user_count       = user_count;
        rreq->dev.datatype         = datatype;

        /* check whether VC has failed, or this is an ANY_SOURCE in a
           failed communicator */
        if (source != MPI_ANY_SOURCE) {
            MPIDI_VC_t *vc;
            MPIDI_Comm_get_vc(comm, source, &vc);
            if (vc->state == MPIDI_VC_STATE_MORIBUND) {
                MPIU_ERR_SET1(mpi_errno, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                rreq->status.MPI_ERROR = mpi_errno;
                MPIDI_CH3U_Request_complete(rreq);
                goto lock_exit;
            }
        } else if (!MPIDI_CH3I_Comm_AS_enabled(comm)) {
            MPIU_ERR_SET(mpi_errno, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail");
            rreq->status.MPI_ERROR = mpi_errno;
            MPIDI_CH3U_Request_complete(rreq);
            goto lock_exit;
        }
        
	rreq->dev.next = NULL;
	if (recvq_posted_tail != NULL) {
	    recvq_posted_tail->dev.next = rreq;
	}
	else {
	    recvq_posted_head = rreq;
	}
	recvq_posted_tail = rreq;
        MPIR_T_INC(RECVQ_STATISTICS, posted_qlen);
	MPIDI_POSTED_RECV_ENQUEUE_HOOK(rreq);
    }
    
  lock_exit:
    *foundp = found;

    /* If a match was not found, the timer was stopped after the traversal */
    if (found)
        MPIR_T_END_TIMER(RECVQ_STATISTICS, timer_start, time_matching_unexpectedq);
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
    return rreq;
}
Esempio n. 18
0
void MPIDI_CH3_Rendezvous_r3_push(MPIDI_VC_t * vc, MPID_Request * sreq)
{
    vbuf *buf;
    MPID_IOV iov[MPID_IOV_LIMIT + 1];
    int n_iov;
    int msg_buffered = 0;
    int nb;
    int complete = 0;
    int seqnum;
    int finished = 0;
    int mpi_errno;
    int wait_for_rndv_r3_ack = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH);

    MPIDI_CH3_Pkt_rndv_r3_data_t pkt_head;

    MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_RNDV_R3_DATA);
    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t);
    iov[0].MPID_IOV_BUF = (void*) &pkt_head;
    pkt_head.receiver_req_id = sreq->mrail.partner_id;

    do {
        do {
#ifndef DAPL_DEFAULT_PROVIDER
	    /* stop sending more R3 data to avoid SRQ flooding at receiver */
            if (MPIDI_CH3I_RDMA_Process.has_srq) {
                if (vc->ch.pending_r3_data >= rdma_max_r3_pending_data) {
                    wait_for_rndv_r3_ack = 1;
                    break;
                }
            }		
#endif	    
            MPIDI_VC_FAI_send_seqnum(vc, seqnum);
            MPIDI_Pkt_set_seqnum(&pkt_head, seqnum);
            MPIDI_Request_set_seqnum(sreq, seqnum);

            MPIU_Memcpy((void *) &iov[1],
                   &sreq->dev.iov[sreq->dev.iov_offset],
                   (sreq->dev.iov_count -
                    sreq->dev.iov_offset) * sizeof(MPID_IOV));
            n_iov = sreq->dev.iov_count - sreq->dev.iov_offset + 1;

            DEBUG_PRINT("iov count (sreq): %d, offset %d, len[1] %d\n",
                        sreq->dev.iov_count, sreq->dev.iov_offset,
                        sreq->dev.iov[0].MPID_IOV_LEN);

            {
                int i = 0;
                size_t  total_len = 0;
                for (i = 0; i < n_iov; i++) {
                    total_len += (iov[i].MPID_IOV_LEN);
                }

                mpi_errno =
                    MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, 
                        total_len, &nb, &buf);
            }

            DEBUG_PRINT("[istartmsgv] mpierr %d, nb %d\n", mpi_errno,
                    nb);

            if (MPI_SUCCESS != mpi_errno
                && MPI_MRAIL_MSG_QUEUED != mpi_errno) {
                vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
                sreq->status.MPI_ERROR = MPI_ERR_INTERN;
                MPIDI_CH3U_Request_complete(sreq);
                return;
            } else if (MPI_MRAIL_MSG_QUEUED == mpi_errno) {
                msg_buffered = 1;
            }

            nb -= sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t);
            finished = MPIDI_CH3I_Request_adjust_iov(sreq, nb);
            DEBUG_PRINT("ajust iov finish: %d\n", finished);
            vc->ch.pending_r3_data += nb;
        } while (!finished/* && !msg_buffered*/);

        if (wait_for_rndv_r3_ack) {
            break;
        }
        if (finished && sreq->dev.OnDataAvail ==
			MPIDI_CH3_ReqHandler_SendReloadIOV) {
            MPIDI_CH3U_Handle_send_req(vc, sreq, &complete);
            nb = 0;
            complete = 0;
        } else if (finished) {
            complete = 1;
        }
    } while (/* 1 != msg_buffered && */0 == complete);

    DEBUG_PRINT("exit loop with complete %d, msg_buffered %d wiat %d pending data:%d \n", complete,
                msg_buffered, wait_for_rndv_r3_ack, vc->ch.pending_r3_data);

    if (wait_for_rndv_r3_ack) { //|| 0 == complete && 1 == msg_buffered) {
        sreq->mrail.nearly_complete = 0;
    } else if (1 == msg_buffered) {
        buf->sreq = (void *) sreq;
        sreq->mrail.nearly_complete = 1;
    } else {
        buf->sreq = NULL;
        MPIDI_CH3U_Handle_send_req(vc, sreq, &complete);
        sreq->mrail.nearly_complete = 1;
    }

    if (sreq->mrail.nearly_complete) {
        DEBUG_PRINT("R3 PUSH completed\n");
    } else {
        DEBUG_PRINT("Send Max R3 Pending Data. waiting for ACK\n");
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH);
}
Esempio n. 19
0
static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t * vc,
                                                MPID_Request * sreq)
{
    int nb;
    int complete = 0;
    int seqnum;
    int mpi_errno;
    MPIDI_CH3_Pkt_rndv_r3_data_t pkt_head;
    MPID_Request * send_req;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);

    MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_RNDV_R3_DATA);
    pkt_head.receiver_req_id = sreq->mrail.partner_id;
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(&pkt_head, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);
#if defined(_SMP_LIMIC_)
    /* Use limic2 for contiguous data 
     * Use shared memory for non-contiguous data
     */
    if (!g_smp_use_limic2 ||
        sreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_SendReloadIOV ||
        sreq->dev.iov_count > 1) {
        pkt_head.send_req_id = NULL;
    } else {
        pkt_head.send_req_id = sreq;
    }
#endif

    mpi_errno = MPIDI_CH3_iStartMsg(vc, &pkt_head,
                                    sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t),
                                    &send_req);

    if (mpi_errno != MPI_SUCCESS) {
         MPIU_Object_set_ref(sreq, 0);
         MPIDI_CH3_Request_destroy(sreq);
         sreq = NULL;
         mpi_errno =
             MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME,
                              __LINE__, MPI_ERR_OTHER, "**ch3|rtspkt",
                              0);
        return mpi_errno;
    }
    /* --END ERROR HANDLING-- */
    if (send_req != NULL) {
        DEBUG_PRINT("r3 packet not sent \n");
        MPID_Request_release(send_req);
    }
 
#if defined(_SMP_LIMIC_)
      if (pkt_head.send_req_id) {
        sreq->mrail.nearly_complete = 1;
        return MPI_SUCCESS;
    }
#endif

    vc->smp.send_current_pkt_type = SMP_RNDV_MSG;

    DEBUG_PRINT("r3 sent req is %p\n", sreq);
    if (MPIDI_CH3I_SMP_SendQ_empty(vc)) {
        for (;;) {
            DEBUG_PRINT("iov count (sreq): %d, offset %d, len[1] %d\n",
                        sreq->dev.iov_count, sreq->dev.iov_offset,
                        sreq->dev.iov[0].MPID_IOV_LEN);

            if (vc->smp.send_current_pkt_type == SMP_RNDV_MSG) {
                mpi_errno = MPIDI_CH3I_SMP_writev_rndv_data(vc, 
                                &sreq->dev.iov[sreq->dev.iov_offset], 
                                sreq->dev.iov_count - sreq->dev.iov_offset,
                                &nb);
            } else {
                MPIU_Assert(vc->smp.send_current_pkt_type == SMP_RNDV_MSG_CONT);
                MPIDI_CH3I_SMP_writev_rndv_data_cont(vc,
                                &sreq->dev.iov[sreq->dev.iov_offset],
                                sreq->dev.iov_count - sreq->dev.iov_offset,
                                &nb);
            }

            if (MPI_SUCCESS != mpi_errno) {
                vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
                sreq->status.MPI_ERROR = MPI_ERR_INTERN;
                MPIDI_CH3U_Request_complete(sreq);
                return mpi_errno;
            }

            if (nb >= 0) {
                if (MPIDI_CH3I_Request_adjust_iov(sreq, nb)) {
                    MPIDI_CH3U_Handle_send_req(vc, sreq, &complete);
                    if (complete) {
                        sreq->mrail.nearly_complete = 1;
                        break;
                    } else {
                        vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT;
                    }
                } else {
                    sreq->ch.reqtype = REQUEST_RNDV_R3_DATA;
                    MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq);
                    vc->smp.send_active = sreq;
                    sreq->mrail.nearly_complete = 1;
                    vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT;
                    break;
                }
            } else {
                MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq);
                vc->smp.send_active = sreq;
                sreq->mrail.nearly_complete = 1;
                break;
            }
        }
    } else {
        sreq->ch.reqtype = REQUEST_RNDV_R3_DATA;
        MPIDI_CH3I_SMP_SendQ_enqueue(vc, sreq);
        sreq->mrail.nearly_complete = 1;
        vc->smp.send_current_pkt_type = SMP_RNDV_MSG;
        DEBUG_PRINT("Enqueue sreq %p", sreq);
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);
    return MPI_SUCCESS;
}
Esempio n. 20
0
static int ReadMoreData( MPIDI_CH3I_Connection_t * conn, MPID_Request *rreq )
{
    int mpi_errno = MPI_SUCCESS;
    
    while (1) {
	MPID_IOV * iovp;
	MPIU_Size_t nb;
	
	iovp = rreq->dev.iov;
			    
	mpi_errno = MPIDU_Sock_readv(conn->sock, iovp, 
				     rreq->dev.iov_count, &nb);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS) {
	    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
					     "**ch3|sock|immedread", "ch3|sock|immedread %p %p %p",
					     rreq, conn, conn->vc);
	    goto fn_fail;
	}
	/* --END ERROR HANDLING-- */

	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
		 (MPIU_DBG_FDEST,"immediate readv, vc=%p nb=" MPIDI_MSG_SZ_FMT ", rreq=0x%08x",
		  conn->vc, nb, rreq->handle));
				
	if (nb > 0 && adjust_iov(&iovp, &rreq->dev.iov_count, nb)) {
	    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
	    int complete;
	    
	    reqFn = rreq->dev.OnDataAvail;
	    if (!reqFn) {
		MPIU_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
		MPIDI_CH3U_Request_complete(rreq);
		complete = TRUE;
	    }
	    else {
		mpi_errno = reqFn( conn->vc, rreq, &complete );
		if (mpi_errno) MPIU_ERR_POP(mpi_errno);
	    }
	    
	    if (complete) {
		conn->recv_active = NULL; /* -- already set to NULL */
		mpi_errno = connection_post_recv_pkt(conn);
		if (mpi_errno != MPI_SUCCESS) {
		    MPIU_ERR_POP(mpi_errno);
		}
		
		break;
	    }
	}
	else {
	    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
        (MPIU_DBG_FDEST,"posting readv, vc=%p, rreq=0x%08x", 
	 conn->vc, rreq->handle));
	    conn->recv_active = rreq;
	    mpi_errno = MPIDU_Sock_post_readv(conn->sock, iovp, rreq->dev.iov_count, NULL);
	    /* --BEGIN ERROR HANDLING-- */
	    if (mpi_errno != MPI_SUCCESS) {
		mpi_errno = MPIR_Err_create_code(
		 mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|sock|postread",
		"ch3|sock|postread %p %p %p", rreq, conn, conn->vc);
		goto fn_fail;
	    }
	    /* --END ERROR HANDLING-- */
	    break;
	}
    }

 fn_fail:
    return mpi_errno;
}
Esempio n. 21
0
int MPID_nem_scif_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *header,
                                MPIDI_msg_sz_t hdr_sz)
{
    int mpi_errno = MPI_SUCCESS;
    int iov_n;
    MPID_IOV iov[MPID_IOV_LIMIT];
    MPID_IOV *iov_p;
    MPIDI_msg_sz_t offset = 0;
    int complete;
    uint64_t seqno = 0;
    MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_SENDNONCONTIG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_SENDNONCONTIG);

    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "scif_SendNoncontig");
    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    iov[0].MPID_IOV_BUF = header;
    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);

    iov_n = MPID_IOV_LIMIT - 1;

    mpi_errno = MPIDI_CH3U_Request_load_send_iov(sreq, &iov[1], &iov_n);
    MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**loadsendiov");

    iov_n += 1;
    offset = 0;

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue) &&
        MPID_nem_scif_poll_send(vc_scif->sc->fd, &vc_scif->sc->csend)) {
        offset =
            MPID_nem_scif_writev(vc_scif->sc->fd, &vc_scif->sc->csend, iov, iov_n, &seqno);
        MPIU_ERR_CHKANDJUMP1(offset <= 0, mpi_errno, MPI_ERR_OTHER,
                             "**scif_writev", "**scif_writev %s", MPIU_Strerror(errno));

        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "scif_send noncontig " MPIDI_MSG_SZ_FMT, offset);
    }

    if (offset < iov[0].MPID_IOV_LEN) {
        /* header was not yet sent, save it in req */
        sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) header;
        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) & sreq->dev.pending_pkt;
        iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);
    }

    /* check if whole iov was sent, and save any unsent portion of
     * iov */
    sreq->dev.iov_count = 0;
    complete = 1;
    for (iov_p = &iov[0]; iov_p < &iov[iov_n]; ++iov_p) {
        if (offset < iov_p->MPID_IOV_LEN) {
            sreq->dev.iov[sreq->dev.iov_count].MPID_IOV_BUF =
                (MPID_IOV_BUF_CAST) ((char *) iov_p->MPID_IOV_BUF + offset);
            sreq->dev.iov[sreq->dev.iov_count].MPID_IOV_LEN = iov_p->MPID_IOV_LEN - offset;
            offset = 0;
            ++sreq->dev.iov_count;
            complete = 0;
            seqno = 0;
        }
        else
            offset -= iov_p->MPID_IOV_LEN;
    }
    if (seqno)
        complete = MPID_nem_scif_chk_seqno(&vc_scif->sc->csend, seqno);

    if (complete) {
        /* sent whole iov */
        int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);

        reqFn = sreq->dev.OnDataAvail;
        if (!reqFn) {
            MPIDI_CH3U_Request_complete(sreq);
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
            goto fn_exit;
        }

        complete = 0;
        mpi_errno = reqFn(vc, sreq, &complete);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);

        if (complete) {
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
            goto fn_exit;
        }
        seqno = 0;
    }

    /* enqueue request */
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "enqueuing");
    MPIU_Assert(seqno || (sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0));

    RQ_SCIF(sreq)->seqno = seqno;
    sreq->ch.vc = vc;
    sreq->dev.iov_offset = 0;

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue)) {
        /* this will be the first send on the queue: queue it and set
         * the write flag on the pollfd */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
    }
    else {
        /* there are other sends in the queue before this one: try to
         * send from the queue */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
        mpi_errno = MPID_nem_scif_send_queued(vc, &vc_scif->send_queue);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_SENDNONCONTIG);
    return mpi_errno;
  fn_fail:
    MPIU_Object_set_ref(sreq, 0);
    MPIDI_CH3_Request_destroy(sreq);
    goto fn_exit;
}
Esempio n. 22
0
int MPIDI_CH3_Packetized_send(MPIDI_VC_t * vc, MPID_Request * sreq)
{
    MPIDI_CH3_Pkt_packetized_send_start_t send_start;
    MPIDI_CH3_Pkt_packetized_send_data_t pkt_head;

    vbuf *buf;
    int mpi_errno = MPI_SUCCESS;
    int n_iov;
    int msg_buffered = 0;
    int nb;
    int complete;
    int pkt_len;
    int seqnum;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SENDV);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SENDV);
    MPIU_DBG_PRINTF(("ch3_isendv\n"));
    MPIDI_DBG_PRINTF((50, FCNAME, "entering"));

    MPIDI_Pkt_init(&send_start, MPIDI_CH3_PKT_PACKETIZED_SEND_START);
    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_packetized_send_start_t);
    iov[0].MPID_IOV_BUF = (void*) &send_start;
    MPIU_Memcpy(&iov[1], sreq->dev.iov, sreq->dev.iov_count * sizeof(MPID_IOV));
    n_iov = 1 + sreq->dev.iov_count;

    GET_SEQ_NUM(sreq->dev.iov[0].MPID_IOV_BUF, seqnum);
    
    if (-1 == seqnum) {
        MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    }
    MPIDI_Pkt_set_seqnum(&send_start, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);
    send_start.origin_head_size = sreq->dev.iov[0].MPID_IOV_LEN;

    Calculate_IOV_len(iov, n_iov, pkt_len);

    mpi_errno =
        MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, pkt_len, &nb, &buf);
    DEBUG_PRINT("[pkt send] mpierr %d, nb %d\n", mpi_errno, nb);

    if (MPI_SUCCESS != mpi_errno && MPI_MRAIL_MSG_QUEUED != mpi_errno) {
        vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
        sreq->status.MPI_ERROR = MPI_ERR_INTERN;
        MPIDI_CH3U_Request_complete(sreq);
        goto fn_exit;
    } else if (MPI_MRAIL_MSG_QUEUED == mpi_errno) {
        msg_buffered = 1;
    }
    nb -= sizeof(MPIDI_CH3_Pkt_packetized_send_start_t);

    MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_PACKETIZED_SEND_DATA);
    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_packetized_send_data_t);
    iov[0].MPID_IOV_BUF = (void*) &pkt_head;

    do {
        while (!MPIDI_CH3I_Request_adjust_iov(sreq, nb)) {
            MPIDI_VC_FAI_send_seqnum(vc, seqnum);
            MPIDI_Pkt_set_seqnum(&pkt_head, seqnum);
            MPIDI_Request_set_seqnum(sreq, seqnum);

            MPIU_Memcpy((void *) &iov[1],
                   &sreq->dev.iov[sreq->dev.iov_offset],
                   (sreq->dev.iov_count -
                    sreq->dev.iov_offset) * sizeof(MPID_IOV));
            n_iov = sreq->dev.iov_count - sreq->dev.iov_offset + 1;

            Calculate_IOV_len(iov, n_iov, pkt_len);

            mpi_errno =
                MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, pkt_len, &nb,
                        &buf);
            DEBUG_PRINT("[istartmsgv] mpierr %d, nb %d\n", mpi_errno,
                    nb);
            MPIU_Assert(NULL == buf->sreq);

            if (MPI_SUCCESS != mpi_errno
                && MPI_MRAIL_MSG_QUEUED != mpi_errno) {
                vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
                sreq->status.MPI_ERROR = MPI_ERR_INTERN;
                MPIDI_CH3U_Request_complete(sreq);
                goto fn_exit;
            } else if (MPI_MRAIL_MSG_QUEUED == mpi_errno) {
                msg_buffered = 1;
            }

            nb -= sizeof(MPIDI_CH3_Pkt_packetized_send_data_t);
        }
        if (sreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_SendReloadIOV) {
            MPIDI_CH3U_Handle_send_req(vc, sreq, &complete);
            nb = 0;
            complete = 0;
        } else {
            complete = 1;
        }
    } while (!complete);

    if (msg_buffered) {
        mpi_errno = MPI_MRAIL_MSG_QUEUED;
        buf->sreq = (void *) sreq;
    } else {
        MPIDI_CH3U_Handle_send_req(vc, sreq, &complete);
    }

  fn_exit:
    MPIDI_DBG_PRINTF((50, FCNAME, "exiting"));
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SENDV);
    return mpi_errno;

}
Esempio n. 23
0
int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPID_Request * sreq, 
		     MPID_IOV * iov, int n_iov)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3I_VC *vcch = &vc->ch;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISENDV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISENDV);

    MPIU_Assert(n_iov <= MPID_IOV_LIMIT);
    MPIU_Assert(iov[0].MPID_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));

    /* The sock channel uses a fixed length header, the size of which is the 
       maximum of all possible packet headers */
    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);
    MPIU_DBG_STMT(CH3_CHANNEL,VERBOSE,
	 MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)iov[0].MPID_IOV_BUF));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */
    {
	/* Connection already formed.  If send queue is empty attempt to send 
	   data, queuing any unsent data. */
	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
	{
	    MPIU_Size_t nb;
	    int rc;

	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send queue empty, attempting to write");
	    
	    MPIU_DBG_PKT(vcch->conn,(MPIDI_CH3_Pkt_t*)iov[0].MPID_IOV_BUF,
			 "isendv");
	    /* MT - need some signalling to lock down our right to use the 
	       channel, thus insuring that the progress engine does
               also try to write */

	    /* FIXME: the current code only agressively writes the first IOV.  
	       Eventually it should be changed to agressively write
               as much as possible.  Ideally, the code would be shared between 
	       the send routines and the progress engine. */
	    rc = MPIDU_Sock_writev(vcch->sock, iov, n_iov, &nb);
	    if (rc == MPI_SUCCESS)
	    {
		int offset = 0;

		MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
			       "wrote %ld bytes", (unsigned long) nb);
		
		while (offset < n_iov)
		{
		    if ((int)iov[offset].MPID_IOV_LEN <= nb)
		    {
			nb -= iov[offset].MPID_IOV_LEN;
			offset++;
		    }
		    else
		    {
			MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			     "partial write, request enqueued at head");
			update_request(sreq, iov, n_iov, offset, nb);
			MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
			MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
    (MPIU_DBG_FDEST,"posting writev, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
			vcch->conn->send_active = sreq;
			mpi_errno = MPIDU_Sock_post_writev(vcch->conn->sock, 
					   sreq->dev.iov + offset,
					   sreq->dev.iov_count - offset, NULL);
			/* --BEGIN ERROR HANDLING-- */
			if (mpi_errno != MPI_SUCCESS)
			{
			    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							     "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
							     sreq, vcch->conn, vc);
			}
			/* --END ERROR HANDLING-- */

			break;
		    }

		}
		if (offset == n_iov)
		{
		    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
				 "write complete, calling OnDataAvail fcn");
		    reqFn = sreq->dev.OnDataAvail;
		    if (!reqFn) {
			MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
			MPIDI_CH3U_Request_complete(sreq);
		    }
		    else {
			int complete;
			mpi_errno = reqFn( vc, sreq, &complete );
			if (mpi_errno) MPIU_ERR_POP(mpi_errno);
			if (!complete) {
			    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
			    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
    (MPIU_DBG_FDEST,"posting writev, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
			    vcch->conn->send_active = sreq;
			    mpi_errno = MPIDU_Sock_post_writev(
				vcch->conn->sock, sreq->dev.iov, 
				sreq->dev.iov_count, NULL);
			    /* --BEGIN ERROR HANDLING-- */
			    if (mpi_errno != MPI_SUCCESS)
			    {
				mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
								 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
								 sreq, vcch->conn, vc);
			    }
			    /* --END ERROR HANDLING-- */
			}
		    }
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else if (MPIR_ERR_GET_CLASS(rc) == MPIDU_SOCK_ERR_NOMEM)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,
			     "MPIDU_Sock_writev failed, out of memory");
		sreq->status.MPI_ERROR = MPIR_ERR_MEMALLOCFAILED;
	    }
	    else
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,TYPICAL,
			       "MPIDU_Sock_writev failed, rc=%d", rc);
		/* Connection just failed.  Mark the request complete and 
		   return an error. */
		MPIU_DBG_VCCHSTATECHANGE(vc,VC_STATE_FAILED);
		/* FIXME: Shouldn't the vc->state also change? */

		vcch->state = MPIDI_CH3I_VC_STATE_FAILED;
		sreq->status.MPI_ERROR = MPIR_Err_create_code( rc,
			       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
			       MPI_ERR_INTERN, "**ch3|sock|writefailed", 
			       "**ch3|sock|writefailed %d", rc );
		 /* MT - CH3U_Request_complete performs write barrier */
		MPIDI_CH3U_Request_complete(sreq);
		/* Return error to calling routine */
		mpi_errno = sreq->status.MPI_ERROR;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"send queue not empty, enqueuing");
	    update_request(sreq, iov, n_iov, 0, 0);
	    MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	}
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING)
    {
	/* queuing the data so it can be sent later. */
	MPIU_DBG_VCUSE(vc,"connecting.  Enqueuing request");
	update_request(sreq, iov, n_iov, 0, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED)
    {
	/* Form a new connection, queuing the data so it can be sent later. */
	MPIU_DBG_VCUSE(vc,"unconnected.  Enqueuing request");
	update_request(sreq, iov, n_iov, 0, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	mpi_errno = MPIDI_CH3I_VC_post_connect(vc);
	if (mpi_errno) {
	    MPIU_ERR_POP(mpi_errno);
	}
    }
    else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPIU_DBG_VCUSE(vc,"still connecting.  enqueuing request");
	update_request(sreq, iov, n_iov, 0, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	MPIU_DBG_VCUSE(vc,"connection failed");
	/* Connection failed.  Mark the request complete and return an error. */
	/* TODO: Create an appropriate error message */
	sreq->status.MPI_ERROR = MPI_ERR_INTERN;
	/* MT - CH3U_Request_complete performs write barrier */
	MPIDI_CH3U_Request_complete(sreq);
    }
    /* --END ERROR HANDLING-- */

 fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISENDV);
    return mpi_errno;
}
Esempio n. 24
0
int MPID_nem_lmt_dma_progress(void)
{
    int mpi_errno = MPI_SUCCESS;
    struct lmt_dma_node *prev = NULL;
    struct lmt_dma_node *free_me = NULL;
    struct lmt_dma_node *cur = outstanding_head;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
    
    /* Iterate over a linked-list of (req,status_idx)-tuples looking for
       completed/failed requests.  Currently knem only provides status to the
       receiver, so all of these requests are recv requests. */
    while (cur) {
        switch (*cur->status_p) {
            case KNEM_STATUS_SUCCESS:
                {
                    /* complete the request if all data has been sent, remove it from the list */
                    int complete = 0;
                    mpi_errno = check_req_complete(cur->vc, cur->req, &complete);
                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

                    free_status_index(cur->status_p - knem_status);

                    if (complete) {
                        /* request was completed by the OnDataAvail fn */
                        MPID_nem_lmt_send_DONE(cur->vc, cur->req); /* tell the other side to complete its request */
                        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");

                    }
                    else {
                        /* There is more data to send.  We must inform the sender that we have
                           completely received the current batch and that the next batch should
                           be sent. */
                        MPID_nem_lmt_send_COOKIE(cur->vc, cur->req, NULL, 0);
                    }

                    /* Right now we always free the cur element, even if the
                       request is incomplete because it simplifies the logic. */
                    if (cur == outstanding_head) {
                        outstanding_head = cur->next;
                        prev = NULL;
                        free_me = cur;
                        cur = cur->next;
                    }
                    else {
                        prev->next = cur->next;
                        free_me = cur;
                        cur = cur->next;
                    }
                    if (free_me) MPIU_Free(free_me);
                    --MPID_nem_local_lmt_pending;
                    continue;
                }
                break;
            case KNEM_STATUS_FAILED:
                /* set the error status for the request, complete it then dequeue the entry */
                cur->req->status.MPI_ERROR = MPI_SUCCESS;
                MPIU_ERR_SET1(cur->req->status.MPI_ERROR, MPI_ERR_OTHER, "**recv_status", "**recv_status %d", *cur->status_p);

                MPIDI_CH3U_Request_complete(cur->req);

                if (cur == outstanding_head) {
                    outstanding_head = cur->next;
                    prev = NULL;
                    free_me = cur;
                    cur = cur->next;
                }
                else {
                    prev->next = cur->next;
                    free_me = cur;
                    cur = cur->next;
                }

                if (free_me) MPIU_Free(free_me);
                --MPID_nem_local_lmt_pending;
                continue;
                
                break;
            case KNEM_STATUS_PENDING:
                /* nothing to do here */
                break;
            default:
                MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**invalid_knem_status",
                                     "**invalid_knem_status %d", *cur->status_p);
                break;
        }

        prev = cur;
        cur = cur->next;
    }

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Esempio n. 25
0
int MPID_nem_scif_send_queued(MPIDI_VC_t * vc, reqq_t * send_queue)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq;
    MPIDI_msg_sz_t offset;
    MPID_IOV *iov;
    int complete;
    uint64_t seqno;
    MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_SEND_QUEUED);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_SEND_QUEUED);

    MPIU_Assert(vc != NULL);

    if (MPIDI_CH3I_Sendq_empty(*send_queue)) {
        if (vc_scif->terminate)
            MPID_nem_scif_vc_terminated(vc);
        goto fn_exit;
    }
    while (!MPIDI_CH3I_Sendq_empty(*send_queue)) {
        sreq = MPIDI_CH3I_Sendq_head(*send_queue);
        seqno = RQ_SCIF(sreq)->seqno;
        complete = 0;
        if (seqno) {
            /* We were just waiting for DMA to finish */
            complete = MPID_nem_scif_chk_seqno(&vc_scif->sc->csend, seqno);
        }
        else if (MPID_nem_scif_poll_send(vc_scif->sc->fd, &vc_scif->sc->csend)) {
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "Sending %p", sreq);

            iov = &sreq->dev.iov[sreq->dev.iov_offset];

            offset = MPID_nem_scif_writev(vc_scif->sc->fd, &vc_scif->sc->csend,
                                          iov, sreq->dev.iov_count, &seqno);
            MPIU_ERR_CHKANDJUMP1(offset <= 0, mpi_errno, MPI_ERR_OTHER,
                                 "**scif_writev", "**scif_writev %s", MPIU_Strerror(errno));
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write " MPIDI_MSG_SZ_FMT, offset);
            RQ_SCIF(sreq)->seqno = seqno;
            complete = 1;
            for (iov = &sreq->dev.iov[sreq->dev.iov_offset];
                 iov < &sreq->dev.iov[sreq->dev.iov_offset + sreq->dev.iov_count]; ++iov) {
                if (offset < iov->MPID_IOV_LEN) {
                    iov->MPID_IOV_BUF = (char *) iov->MPID_IOV_BUF + offset;
                    iov->MPID_IOV_LEN -= offset;
                    /* iov_count should be equal to the number of iov's remaining */
                    sreq->dev.iov_count -= ((iov - sreq->dev.iov) - sreq->dev.iov_offset);
                    sreq->dev.iov_offset = iov - sreq->dev.iov;
                    complete = 0;
                    seqno = 0;
                    break;
                }
                offset -= iov->MPID_IOV_LEN;
            }
            if (seqno)
                complete = 0;
        }
        if (!complete) {
            /* writev couldn't write the entire iov, give up for now */
            break;
        }
        else {
            /* sent whole message */
            int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);

            RQ_SCIF(sreq)->seqno = 0;
            reqFn = sreq->dev.OnDataAvail;
            if (!reqFn) {
                MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                MPIDI_CH3U_Request_complete(sreq);
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                MPIDI_CH3I_Sendq_dequeue(send_queue, &sreq);
                continue;
            }

            complete = 0;
            mpi_errno = reqFn(vc, sreq, &complete);
            if (mpi_errno)
                MPIU_ERR_POP(mpi_errno);

            if (complete) {
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                MPIDI_CH3I_Sendq_dequeue(send_queue, &sreq);
                continue;
            }
            sreq->dev.iov_offset = 0;
        }
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_SEND_QUEUED);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Esempio n. 26
0
int MPID_nem_scif_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
                              MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t offset = 0;
    MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
    scifconn_t *sc = vc_scif->sc;
    uint64_t seqno = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_ISENDCONTIGMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_ISENDCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "scif_iSendContig");

    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue) &&
        MPID_nem_scif_poll_send(sc->fd, &sc->csend)) {
        MPID_IOV iov[2];

        iov[0].MPID_IOV_BUF = hdr;
        iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);
        iov[1].MPID_IOV_BUF = data;
        iov[1].MPID_IOV_LEN = data_sz;

        offset = MPID_nem_scif_writev(sc->fd, &sc->csend, iov, 2, &seqno);
        MPIU_ERR_CHKANDJUMP1(offset <= 0, mpi_errno, MPI_ERR_OTHER,
                             "**scif_writev", "**scif_writev %s", MPIU_Strerror(errno));
        MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE,
                         (MPIU_DBG_FDEST, "scif_send " MPIDI_MSG_SZ_FMT " fd=%d",
                          offset, sc->fd));

        if (offset == sizeof(MPIDI_CH3_Pkt_t) + data_sz) {
            /* sent whole message */
            int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);

            if (seqno)
                goto enqueue_request;
            reqFn = sreq->dev.OnDataAvail;
            if (!reqFn) {
                MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                MPIDI_CH3U_Request_complete(sreq);
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                goto fn_exit;
            }
            else {
                int complete = 0;

                mpi_errno = reqFn(vc, sreq, &complete);
                if (mpi_errno)
                    MPIU_ERR_POP(mpi_errno);

                if (complete) {
                    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                    goto fn_exit;
                }

                /* not completed: more to send */
                goto enqueue_request;
            }
        }
    }

    /* save iov */
    if (offset < sizeof(MPIDI_CH3_Pkt_t)) {
        sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
        sreq->dev.iov[0].MPID_IOV_BUF = (char *) &sreq->dev.pending_pkt + offset;
        sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t) - offset;
        if (data_sz) {
            sreq->dev.iov[1].MPID_IOV_BUF = data;
            sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
            sreq->dev.iov_count = 2;
        }
        else
            sreq->dev.iov_count = 1;
        seqno = 0;
    }
    else {
        sreq->dev.iov[0].MPID_IOV_BUF = (char *) data + (offset - sizeof(MPIDI_CH3_Pkt_t));
        sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_Pkt_t));
        sreq->dev.iov_count = 1;
        seqno = 0;
    }

  enqueue_request:
    /* enqueue request */
    RQ_SCIF(sreq)->seqno = seqno;
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "enqueuing");
    MPIU_Assert(seqno || (sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0));

    sreq->ch.vc = vc;
    sreq->dev.iov_offset = 0;

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue)) {
        /* this will be the first send on the queue: queue it and set
         * the write flag on the pollfd */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
    }
    else {
        /* there are other sends in the queue before this one: try to
         * send from the queue */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
        mpi_errno = MPID_nem_scif_send_queued(vc, &vc_scif->send_queue);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_ISENDCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Esempio n. 27
0
/* MPIDI_CH3I_SendNoncontig - Sends a message by packing
   directly into cells.  The caller must initialize sreq->dev.segment
   as well as segment_first and segment_size. */
int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz )
{
    int mpi_errno = MPI_SUCCESS;
    int again = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)header);

    MPIU_THREAD_CS_ENTER(MPIDCOMM,);

    if (!MPIDI_CH3I_Sendq_empty(MPIDI_CH3I_shm_sendq)) /* MT */
    {
        /* send queue is not empty, enqueue the request then check to
           see if we can send any now */

        MPIDI_DBG_PRINTF((55, FCNAME, "enqueuing"));

	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
        sreq->ch.noncontig    = TRUE;
        sreq->ch.header_sz    = hdr_sz;
	sreq->ch.vc           = vc;

        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        mpi_errno = MPIDI_CH3I_Shm_send_progress();
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        goto fn_exit;
    }

    /* send as many cells of data as you can */
    MPID_nem_mpich_send_seg_header(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, header, hdr_sz, vc, &again);
    while(!again && sreq->dev.segment_first < sreq->dev.segment_size)
        MPID_nem_mpich_send_seg(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, vc, &again);

    if (again)
    {
        /* we didn't finish sending everything */
        sreq->ch.noncontig = TRUE;
        sreq->ch.vc = vc;
        if (sreq->dev.segment_first == 0) /* nothing was sent, save header */
        {
            sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
            sreq->ch.header_sz    = hdr_sz;
        }
        else
        {
            /* part of message was sent, make this req an active send */
            MPIU_Assert(MPIDI_CH3I_shm_active_send == NULL);
            MPIDI_CH3I_shm_active_send = sreq;
        }
        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        goto fn_exit;
    }

    /* finished sending all data, complete the request */
    if (!sreq->dev.OnDataAvail)
    {
        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
        MPIDI_CH3U_Request_complete(sreq);
        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }
    else
    {
        int complete = 0;
        mpi_errno = sreq->dev.OnDataAvail(vc, sreq, &complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPIU_Assert(complete); /* all data has been sent, we should always complete */

        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }

 fn_exit:
    MPIU_THREAD_CS_EXIT(MPIDCOMM,);
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Esempio n. 28
0
MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag, 
                                           int context_id, MPID_Comm *comm, void *user_buf,
                                           int user_count, MPI_Datatype datatype, int * foundp)
{
    int found;
    MPID_Request *rreq, *prev_rreq;
    MPIDI_Message_match match;
    MPIDI_Message_match mask;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);

    MPIU_THREAD_CS_ASSERT_HELD(MSGQUEUE);

    /* Optimize this loop for an empty unexpected receive queue */
    rreq = recvq_unexpected_head;
    if (rreq) {
	prev_rreq = NULL;

	match.parts.context_id = context_id;
	match.parts.tag = tag;
	match.parts.rank = source;

	if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) {
	    do {
		if (MATCH_WITH_NO_MASK(rreq->dev.match, match)) {
		    if (prev_rreq != NULL) {
			prev_rreq->dev.next = rreq->dev.next;
		    }
		    else {
			recvq_unexpected_head = rreq->dev.next;
		    }

		    if (rreq->dev.next == NULL) {
			recvq_unexpected_tail = prev_rreq;
		    }

		    rreq->comm = comm;
		    MPIR_Comm_add_ref(comm);
		    rreq->dev.user_buf = user_buf;
		    rreq->dev.user_count = user_count;
		    rreq->dev.datatype = datatype;
		    found = TRUE;
		    goto lock_exit;
		}
		prev_rreq = rreq;
		rreq      = rreq->dev.next;
	    } while (rreq);
	}
	else {
	    mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0;
	    if (tag == MPI_ANY_TAG)
		match.parts.tag = mask.parts.tag = 0;
	    if (source == MPI_ANY_SOURCE)
		match.parts.rank = mask.parts.rank = 0;

	    do {
		if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) {
		    if (prev_rreq != NULL) {
			prev_rreq->dev.next = rreq->dev.next;
		    }
		    else {
			recvq_unexpected_head = rreq->dev.next;
		    }
		    if (rreq->dev.next == NULL) {
			recvq_unexpected_tail = prev_rreq;
		    }
		    rreq->comm                 = comm;
		    MPIR_Comm_add_ref(comm);
		    rreq->dev.user_buf         = user_buf;
		    rreq->dev.user_count       = user_count;
		    rreq->dev.datatype         = datatype;
		    found = TRUE;
		    goto lock_exit;
		}
		prev_rreq = rreq;
		rreq = rreq->dev.next;
	    } while (rreq);
	}
    }
    
    /* A matching request was not found in the unexpected queue, so we 
       need to allocate a new request and add it to the posted queue */
    {
	int mpi_errno = MPI_SUCCESS;

        found = FALSE;

	MPIDI_Request_create_rreq( rreq, mpi_errno, goto lock_exit );
	rreq->dev.match.parts.tag	   = tag;
	rreq->dev.match.parts.rank	   = source;
	rreq->dev.match.parts.context_id   = context_id;

	/* Added a mask for faster search on 64-bit capable
	 * platforms */
	rreq->dev.mask.parts.context_id = ~0;
	if (rreq->dev.match.parts.rank == MPI_ANY_SOURCE)
	    rreq->dev.mask.parts.rank = 0;
	else
	    rreq->dev.mask.parts.rank = ~0;
	if (rreq->dev.match.parts.tag == MPI_ANY_TAG)
	    rreq->dev.mask.parts.tag = 0;
	else
	    rreq->dev.mask.parts.tag = ~0;

        rreq->comm                 = comm;
        MPIR_Comm_add_ref(comm);
        rreq->dev.user_buf         = user_buf;
        rreq->dev.user_count       = user_count;
        rreq->dev.datatype         = datatype;

        /* check whether VC has failed, or this is an ANY_SOURCE in a
           failed communicator */
        if (source != MPI_ANY_SOURCE) {
            MPIDI_VC_t *vc;
            MPIDI_Comm_get_vc(comm, source, &vc);
            if (vc->state == MPIDI_VC_STATE_MORIBUND) {
                MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                rreq->status.MPI_ERROR = mpi_errno;
                MPIDI_CH3U_Request_complete(rreq);
                goto lock_exit;
            }
        } else if (MPID_VCRT_Contains_failed_vc(comm->vcrt)) {
            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**comm_fail");
            rreq->status.MPI_ERROR = mpi_errno;
            MPIDI_CH3U_Request_complete(rreq);
            goto lock_exit;
        }
        
	rreq->dev.next = NULL;
	if (recvq_posted_tail != NULL) {
	    recvq_posted_tail->dev.next = rreq;
	}
	else {
	    recvq_posted_head = rreq;
	}
	recvq_posted_tail = rreq;
	MPIDI_POSTED_RECV_ENQUEUE_HOOK(rreq);
    }
    
  lock_exit:

    *foundp = found;
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP);
    return rreq;
}
Esempio n. 29
0
static int MPIDI_CH3I_Progress_handle_sock_event(MPIDU_Sock_event_t * event)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);

    MPIU_DBG_MSG_D(CH3_OTHER,VERBOSE,"Socket event of type %d", event->op_type );

    switch (event->op_type)
    {
	case MPIDU_SOCK_OP_READ:
	{
	    MPIDI_CH3I_Connection_t * conn = 
		(MPIDI_CH3I_Connection_t *) event->user_ptr;
		
	    MPID_Request * rreq = conn->recv_active;

	    /* --BEGIN ERROR HANDLING-- */
	    if (event->error != MPI_SUCCESS)
	    {
		/* FIXME: the following should be handled by the close 
		   protocol */
		if (MPIR_ERR_GET_CLASS(event->error) != MPIDU_SOCK_ERR_CONN_CLOSED) {
		    mpi_errno = event->error;
		    MPIU_ERR_POP(mpi_errno);
		}		    
		break;
	    }
	    /* --END ERROR HANDLING-- */
		
	    if (conn->state == CONN_STATE_CONNECTED)
	    {
		if (conn->recv_active == NULL)
		{
                    MPIDI_msg_sz_t buflen = sizeof (MPIDI_CH3_Pkt_t);
		    MPIU_Assert(conn->pkt.type < MPIDI_CH3_PKT_END_CH3);
                    
		    mpi_errno = pktArray[conn->pkt.type]( conn->vc, &conn->pkt,
							  &buflen, &rreq );
		    if (mpi_errno != MPI_SUCCESS) {
			MPIU_ERR_POP(mpi_errno);
		    }
                    MPIU_Assert(buflen == sizeof (MPIDI_CH3_Pkt_t));

		    if (rreq == NULL)
		    {
			if (conn->state != CONN_STATE_CLOSING)
			{
			    /* conn->recv_active = NULL;  -- 
			       already set to NULL */
			    mpi_errno = connection_post_recv_pkt(conn);
			    if (mpi_errno != MPI_SUCCESS) {
				MPIU_ERR_POP(mpi_errno);
			    }
			}
		    }
		    else
		    {
			mpi_errno = ReadMoreData( conn, rreq );
			if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
		    }
		}
		else /* incoming data */
		{
		    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
		    int complete;

		    reqFn = rreq->dev.OnDataAvail;
		    if (!reqFn) {
			MPIU_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
			MPIDI_CH3U_Request_complete(rreq);
			complete = TRUE;
		    }
		    else {
			mpi_errno = reqFn( conn->vc, rreq, &complete );
			if (mpi_errno) MPIU_ERR_POP(mpi_errno);
		    }
			
		    if (complete)
		    {
			conn->recv_active = NULL;
			mpi_errno = connection_post_recv_pkt(conn);
			if (mpi_errno != MPI_SUCCESS) {
			    MPIU_ERR_POP(mpi_errno);
			}
		    }
		    else /* more data to be read */
		    {
			mpi_errno = ReadMoreData( conn, rreq );
			if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
		    }
		}
	    }
	    else if (conn->state == CONN_STATE_OPEN_LRECV_DATA)
	    {
		mpi_errno = MPIDI_CH3_Sockconn_handle_connopen_event( conn );
		if (mpi_errno) { MPIU_ERR_POP( mpi_errno ); }
	    }
	    else /* Handling some internal connection establishment or 
		    tear down packet */
	    { 
		mpi_errno = MPIDI_CH3_Sockconn_handle_conn_event( conn );
		if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	    }
	    break;
	}

	/* END OF SOCK_OP_READ */

	case MPIDU_SOCK_OP_WRITE:
	{
	    MPIDI_CH3I_Connection_t * conn = 
		(MPIDI_CH3I_Connection_t *) event->user_ptr;
	    /* --BEGIN ERROR HANDLING-- */
	    if (event->error != MPI_SUCCESS) {
		mpi_errno = event->error;
		MPIU_ERR_POP(mpi_errno);
	    }
	    /* --END ERROR HANDLING-- */
		
	    if (conn->send_active)
	    {
		MPID_Request * sreq = conn->send_active;
		int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
		int complete;

		reqFn = sreq->dev.OnDataAvail;
		if (!reqFn) {
		    MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
		    MPIDI_CH3U_Request_complete(sreq);
		    complete = TRUE;
		}
		else {
		    mpi_errno = reqFn( conn->vc, sreq, &complete );
		    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
		}
		    
		if (complete)
		{
		    mpi_errno = connection_pop_sendq_req(conn);
		    if (mpi_errno != MPI_SUCCESS) {
			MPIU_ERR_POP(mpi_errno);
		    }
		}
		else /* more data to send */
		{
		    for(;;)
		    {
			MPID_IOV * iovp;
			MPIU_Size_t nb;
				
			iovp = sreq->dev.iov;
			    
			mpi_errno = MPIDU_Sock_writev(conn->sock, iovp, sreq->dev.iov_count, &nb);
			/* --BEGIN ERROR HANDLING-- */
			if (mpi_errno != MPI_SUCCESS)
			{
			    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							     "**ch3|sock|immedwrite", "ch3|sock|immedwrite %p %p %p",
							     sreq, conn, conn->vc);
			    goto fn_fail;
			}
			/* --END ERROR HANDLING-- */

			MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
       (MPIU_DBG_FDEST,"immediate writev, vc=%p, sreq=0x%08x, nb=" MPIDI_MSG_SZ_FMT,
	conn->vc, sreq->handle, nb));
			    
			if (nb > 0 && adjust_iov(&iovp, &sreq->dev.iov_count, nb))
			{
			    reqFn = sreq->dev.OnDataAvail;
			    if (!reqFn) {
				MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
				MPIDI_CH3U_Request_complete(sreq);
				complete = TRUE;
			    }
			    else {
				mpi_errno = reqFn( conn->vc, sreq, &complete );
				if (mpi_errno) MPIU_ERR_POP(mpi_errno);
			    }
			    if (complete)
			    {
				mpi_errno = connection_pop_sendq_req(conn);
				if (mpi_errno != MPI_SUCCESS) {
				    MPIU_ERR_POP(mpi_errno);
				}
				break;
			    }
			}
			else
			{
			    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
       (MPIU_DBG_FDEST,"posting writev, vc=%p, conn=%p, sreq=0x%08x", 
	conn->vc, conn, sreq->handle));
			    mpi_errno = MPIDU_Sock_post_writev(conn->sock, iovp, sreq->dev.iov_count, NULL);
			    /* --BEGIN ERROR HANDLING-- */
			    if (mpi_errno != MPI_SUCCESS)
			    {
				mpi_errno = MPIR_Err_create_code(
				    mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|sock|postwrite",
				    "ch3|sock|postwrite %p %p %p", sreq, conn, conn->vc);
				goto fn_fail;
			    }
			    /* --END ERROR HANDLING-- */

			    break;
			}
		    }
		}
	    }
	    else /* finished writing internal packet header */
	    {
		/* the connection is not active yet */
		mpi_errno = MPIDI_CH3_Sockconn_handle_connwrite( conn );
		if (mpi_errno) { MPIU_ERR_POP( mpi_errno ); }
	    }
	    break;
	}
	/* END OF SOCK_OP_WRITE */

	case MPIDU_SOCK_OP_ACCEPT:
	{
	    mpi_errno = MPIDI_CH3_Sockconn_handle_accept_event();
	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	    break;
	}
	    
	case MPIDU_SOCK_OP_CONNECT:
	{
	    mpi_errno = MPIDI_CH3_Sockconn_handle_connect_event( 
				(MPIDI_CH3I_Connection_t *) event->user_ptr,
				event->error );
	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	    break;
	}
	    
	case MPIDU_SOCK_OP_CLOSE:
	{
	    mpi_errno = MPIDI_CH3_Sockconn_handle_close_event( 
			      (MPIDI_CH3I_Connection_t *) event->user_ptr );
	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	    break;
	}

	case MPIDU_SOCK_OP_WAKEUP:
	{
	    MPIDI_CH3_Progress_signal_completion();
	    /* MPIDI_CH3I_progress_completion_count++; */
	    break;
	}
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}