int MPIDI_CH3U_Handle_ordered_recv_pkt(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_t * pkt, void *data,
				       intptr_t *buflen, MPIR_Request ** rreqp)
{
    int mpi_errno = MPI_SUCCESS;
    static MPIDI_CH3_PktHandler_Fcn *pktArray[MPIDI_CH3_PKT_END_CH3+1];
    static int needsInit = 1;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);

    MPL_DBG_STMT(MPIDI_CH3_DBG_OTHER,VERBOSE,MPIDI_DBG_Print_packet(pkt));

    /* FIXME: We can turn this into something like

       MPIR_Assert(pkt->type <= MAX_PACKET_TYPE);
       mpi_errno = MPIDI_CH3_ProgressFunctions[pkt->type](vc,pkt,rreqp);
       
       in the progress engine itself.  Then this routine is not necessary.
    */

    if (needsInit) {
	MPIDI_CH3_PktHandler_Init( pktArray, MPIDI_CH3_PKT_END_CH3 );
	needsInit = 0;
    }
    /* Packet type is an enum and hence >= 0 */
    MPIR_Assert(pkt->type <= MPIDI_CH3_PKT_END_CH3);
    mpi_errno = pktArray[pkt->type](vc, pkt, data, buflen, rreqp);

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_ORDERED_RECV_PKT);
    return mpi_errno;
}
Beispiel #2
0
int MPID_nem_mxm_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data,
                                 MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq = NULL;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iStartContigMsg");
    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);

    /* create a request */
    sreq = MPID_Request_create();
    MPIU_Assert(sreq != NULL);
    MPIU_Object_set_ref(sreq, 2);
    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));
    sreq->kind = MPID_REQUEST_SEND;
    sreq->dev.OnDataAvail = NULL;
    sreq->dev.tmpbuf = NULL;

    _dbg_mxm_output(5,
                    "iStartContigMsg ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
                    vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
                    data_sz);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area->ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 1;
    req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt);
    req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t);
    if (data_sz) {
        req_area->iov_count = 2;
        req_area->iov_buf[1].ptr = (void *) data;
        req_area->iov_buf[1].length = data_sz;
    }

    vc_area->pending_sends += 1;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = FALSE;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM,
                           mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);

  fn_exit:
    *sreq_ptr = sreq;
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #3
0
int MPID_nem_mxm_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz,
                             void *data, MPIDI_msg_sz_t data_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_mxm_vc_area *vc_area = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iSendContig");
    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);

    MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t));

    _dbg_mxm_output(5,
                    "iSendContig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n",
                    vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t),
                    data_sz);

    vc_area = VC_BASE(vc);
    req_area = REQ_BASE(sreq);

    req_area->ctx = sreq;
    req_area->iov_buf = req_area->tmp_buf;
    req_area->iov_count = 0;

    req_area->iov_buf[req_area->iov_count].ptr = (void *) &(sreq->dev.pending_pkt);
    req_area->iov_buf[req_area->iov_count].length = sizeof(MPIDI_CH3_Pkt_t);
    (req_area->iov_count)++;

    if (sreq->dev.ext_hdr_sz != 0) {
        req_area->iov_buf[req_area->iov_count].ptr = (void *) (sreq->dev.ext_hdr_ptr);
        req_area->iov_buf[req_area->iov_count].length = sreq->dev.ext_hdr_sz;
        (req_area->iov_count)++;
    }

    if (data_sz) {
        req_area->iov_buf[req_area->iov_count].ptr = (void *) data;
        req_area->iov_buf[req_area->iov_count].length = data_sz;
        (req_area->iov_count)++;
    }

    vc_area->pending_sends += 1;
    sreq->ch.vc = vc;
    sreq->ch.noncontig = FALSE;

    mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM,
                           mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, MPIDI_msg_sz_t hdr_sz, 
			MPID_Request ** sreq_ptr)
{
    MPID_Request * sreq = NULL;
    MPIDI_CH3I_VC *vcch = &vc->ch;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    
    MPIU_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    /* The SOCK channel uses a fixed length header, the size of which is the 
       maximum of all possible packet headers */
    hdr_sz = sizeof(MPIDI_CH3_Pkt_t);
    MPIU_DBG_STMT(CH3_CHANNEL,VERBOSE,
		  MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */
    {
	/* Connection already formed.  If send queue is empty attempt to send 
	   data, queuing any unsent data. */
	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
	{
	    MPIU_Size_t nb;
	    int rc;

	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send queue empty, attempting to write");
	    
	    MPIU_DBG_PKT(vcch->conn,hdr,"istartmsg");
	    /* MT: need some signalling to lock down our right to use the 
	       channel, thus insuring that the progress engine does
               not also try to write */
	    rc = MPIDU_Sock_write(vcch->sock, hdr, hdr_sz, &nb);
	    if (rc == MPI_SUCCESS)
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
			       "wrote %ld bytes", (unsigned long) nb);
		
		if (nb == hdr_sz)
		{ 
		    MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
				   "entire write complete, " MPIDI_MSG_SZ_FMT " bytes", nb);
		    /* done.  get us out of here as quickly as possible. */
		}
		else
		{
		    MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
                    "partial write of " MPIDI_MSG_SZ_FMT " bytes, request enqueued at head", nb);
		    sreq = create_request(hdr, hdr_sz, nb);
		    if (!sreq) {
			MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
		    }

		    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
		    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
     (MPIU_DBG_FDEST,"posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
		    vcch->conn->send_active = sreq;
		    mpi_errno = MPIDU_Sock_post_write(vcch->conn->sock, sreq->dev.iov[0].MPID_IOV_BUF,
						      sreq->dev.iov[0].MPID_IOV_LEN, sreq->dev.iov[0].MPID_IOV_LEN, NULL);
		    /* --BEGIN ERROR HANDLING-- */
		    if (mpi_errno != MPI_SUCCESS)
		    {
			mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
							 sreq, vcch->conn, vc);
			goto fn_fail;
		    }
		    /* --END ERROR HANDLING-- */
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,TYPICAL,
			       "ERROR - MPIDU_Sock_write failed, rc=%d", rc);
		sreq = MPID_Request_create();
		if (!sreq) {
		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
		}
		sreq->kind = MPID_REQUEST_SEND;
		MPID_cc_set(&(sreq->cc), 0);
		sreq->status.MPI_ERROR = MPIR_Err_create_code( rc,
			       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
			       MPI_ERR_INTERN, "**ch3|sock|writefailed",
			       "**ch3|sock|writefailed %d", rc );
		/* Make sure that the caller sees this error */
		mpi_errno = sreq->status.MPI_ERROR;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send in progress, request enqueued");
	    sreq = create_request(hdr, hdr_sz, 0);
	    if (!sreq) {
		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	    }
	    MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	}
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING) /* MT */
    {
	MPIU_DBG_VCUSE(vc,
		       "connecteding. enqueuing request");
	
	/* queue the data so it can be sent after the connection is formed */
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */
    {
	MPIU_DBG_VCUSE(vc,
		       "unconnected.  posting connect and enqueuing request");
	
	/* queue the data so it can be sent after the connection is formed */
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);

	/* Form a new connection */
	MPIDI_CH3I_VC_post_connect(vc);
    }
    else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPIU_DBG_VCUSE(vc,"forming connection, request enqueued");
	sreq = create_request(hdr, hdr_sz, 0);
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	/* Connection failed, so allocate a request and return an error. */
	MPIU_DBG_VCUSE(vc,"ERROR - connection failed");
	sreq = MPID_Request_create();
	if (!sreq) {
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem");
	}
	sreq->kind = MPID_REQUEST_SEND;
	MPID_cc_set(&sreq->cc, 0);
	
	sreq->status.MPI_ERROR = MPIR_Err_create_code( MPI_SUCCESS,
		       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
		       MPI_ERR_INTERN, "**ch3|sock|connectionfailed",0 );
	/* Make sure that the caller sees this error */
	mpi_errno = sreq->status.MPI_ERROR;
    }
    /* --END ERROR HANDLING-- */

  fn_fail:
    *sreq_ptr = sreq;
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    return mpi_errno;
}
/* MPIDI_CH3I_SendNoncontig - Sends a message by packing
   directly into cells.  The caller must initialize sreq->dev.segment
   as well as segment_first and segment_size. */
int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz )
{
    int mpi_errno = MPI_SUCCESS;
    int again = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)header);

    MPIU_THREAD_CS_ENTER(MPIDCOMM,);

    if (!MPIDI_CH3I_Sendq_empty(MPIDI_CH3I_shm_sendq)) /* MT */
    {
        /* send queue is not empty, enqueue the request then check to
           see if we can send any now */

        MPIDI_DBG_PRINTF((55, FCNAME, "enqueuing"));

	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
        sreq->ch.noncontig    = TRUE;
        sreq->ch.header_sz    = hdr_sz;
	sreq->ch.vc           = vc;

        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        mpi_errno = MPIDI_CH3I_Shm_send_progress();
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        goto fn_exit;
    }

    /* send as many cells of data as you can */
    MPID_nem_mpich_send_seg_header(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, header, hdr_sz, vc, &again);
    while(!again && sreq->dev.segment_first < sreq->dev.segment_size)
        MPID_nem_mpich_send_seg(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, vc, &again);

    if (again)
    {
        /* we didn't finish sending everything */
        sreq->ch.noncontig = TRUE;
        sreq->ch.vc = vc;
        if (sreq->dev.segment_first == 0) /* nothing was sent, save header */
        {
            sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
            sreq->ch.header_sz    = hdr_sz;
        }
        else
        {
            /* part of message was sent, make this req an active send */
            MPIU_Assert(MPIDI_CH3I_shm_active_send == NULL);
            MPIDI_CH3I_shm_active_send = sreq;
        }
        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        goto fn_exit;
    }

    /* finished sending all data, complete the request */
    if (!sreq->dev.OnDataAvail)
    {
        MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
        MPIDI_CH3U_Request_complete(sreq);
        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }
    else
    {
        int complete = 0;
        mpi_errno = sreq->dev.OnDataAvail(vc, sreq, &complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPIU_Assert(complete); /* all data has been sent, we should always complete */

        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }

 fn_exit:
    MPIU_THREAD_CS_EXIT(MPIDCOMM,);
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #6
0
int MPIDI_CH3_iSend(MPIDI_VC_t * vc, MPIR_Request * sreq, void * hdr,
		    intptr_t hdr_sz)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
    MPIDI_CH3I_VC *vcch = &vc->ch;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISEND);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISEND);

    MPIR_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t) );

    /* The sock channel uses a fixed length header, the size of which is the 
       maximum of all possible packet headers */
    hdr_sz = sizeof(MPIDI_CH3_Pkt_t);
    MPL_DBG_STMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
		  MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */
    {
	/* Connection already formed.  If send queue is empty attempt to send 
	   data, queuing any unsent data. */
	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
	{
	    size_t nb;
	    int rc;

	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
			 "send queue empty, attempting to write");
	    MPL_DBG_PKT(vcch->conn,hdr,"isend");
	    /* MT: need some signalling to lock down our right to use the 
	       channel, thus insuring that the progress engine does
               also try to write */
	    rc = MPIDI_CH3I_Sock_write(vcch->sock, hdr, hdr_sz, &nb);
	    if (rc == MPI_SUCCESS)
	    {
		MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
			       "wrote %ld bytes", (unsigned long) nb);
		
		if (nb == hdr_sz)
		{
		    MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
                     "write complete %" PRIdPTR " bytes, calling OnDataAvail fcn", nb);
		    reqFn = sreq->dev.OnDataAvail;
		    if (!reqFn) {
			MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                        mpi_errno = MPID_Request_complete(sreq);
                        if (mpi_errno != MPI_SUCCESS) {
                            MPIR_ERR_POP(mpi_errno);
                        }
		    }
		    else {
			int complete;
			mpi_errno = reqFn( vc, sreq, &complete );
			if (mpi_errno) MPIR_ERR_POP(mpi_errno);
			if (!complete) {
			    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
			    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
					     (MPL_DBG_FDEST,
                    "posting writev, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
			    vcch->conn->send_active = sreq;
			    mpi_errno = MPIDI_CH3I_Sock_post_writev(
				vcch->conn->sock, sreq->dev.iov, 
				sreq->dev.iov_count, NULL);
			    /* --BEGIN ERROR HANDLING-- */
			    if (mpi_errno != MPI_SUCCESS)
			    {
				mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
								 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
								 sreq, vcch->conn, vc);
			    }
			    /* --END ERROR HANDLING-- */
			}
		    }
		}
		else
		{
		    MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
                     "partial write of %" PRIdPTR " bytes, request enqueued at head", nb);
		    update_request(sreq, hdr, hdr_sz, nb);
		    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
		    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
     (MPL_DBG_FDEST,"posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
		    vcch->conn->send_active = sreq;
		    mpi_errno = MPIDI_CH3I_Sock_post_write(vcch->conn->sock,
					  sreq->dev.iov[0].MPL_IOV_BUF,
				          sreq->dev.iov[0].MPL_IOV_LEN, 
					  sreq->dev.iov[0].MPL_IOV_LEN, NULL);
		    /* --BEGIN ERROR HANDLING-- */
		    if (mpi_errno != MPI_SUCCESS)
		    {
			mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
							 sreq, vcch->conn, vc);
		    }
		    /* --END ERROR HANDLING-- */
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else if (MPIR_ERR_GET_CLASS(rc) == MPIDI_CH3I_SOCK_ERR_NOMEM)
	    {
		MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,TYPICAL,
			     "MPIDI_CH3I_Sock_write failed, out of memory");
		sreq->status.MPI_ERROR = MPIR_ERR_MEMALLOCFAILED;
	    }
	    else
	    {
		MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,TYPICAL,
			       "MPIDI_CH3I_Sock_write failed, rc=%d", rc);
		/* Connection just failed. Mark the request complete and 
		   return an error. */
		MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_FAILED);
		/* FIXME: Shouldn't the vc->state also change? */
		vcch->state = MPIDI_CH3I_VC_STATE_FAILED;
		sreq->status.MPI_ERROR = MPIR_Err_create_code( rc,
			       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
			       MPI_ERR_INTERN, "**ch3|sock|writefailed", 
			       "**ch3|sock|writefailed %d", rc );
		 /* MT -CH3U_Request_complete() performs write barrier */
		MPID_Request_complete(sreq);
		/* Make sure that the caller sees this error */
		mpi_errno = sreq->status.MPI_ERROR;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"send queue not empty, enqueuing");
	    update_request(sreq, hdr, hdr_sz, 0);
	    MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	}
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING) /* MT */
    {
	/* queuing the data so it can be sent later. */
	MPL_DBG_VCUSE(vc,"connecting.  enqueuing request");
	update_request(sreq, hdr, hdr_sz, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */
    {
	/* Form a new connection, queuing the data so it can be sent later. */
	MPL_DBG_VCUSE(vc,"unconnected.  enqueuing request");
	update_request(sreq, hdr, hdr_sz, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	mpi_errno = MPIDI_CH3I_VC_post_connect(vc);
	if (mpi_errno) {
	    MPIR_ERR_POP(mpi_errno);
	}
    }
    else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPL_DBG_VCUSE(vc,"still connecting. Enqueuing request");
	update_request(sreq, hdr, hdr_sz, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	/* Connection failed.  Mark the request complete and return an error. */
	/* TODO: Create an appropriate error message */
	sreq->status.MPI_ERROR = MPI_ERR_INTERN;
	/* MT - CH3U_Request_complete() performs write barrier */
	MPID_Request_complete(sreq);
    }
    /* --END ERROR HANDLING-- */

 fn_fail:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISEND);
    return mpi_errno;
}
Beispiel #7
0
int MPID_nem_scif_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr,
                              MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t offset = 0;
    MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
    scifconn_t *sc = vc_scif->sc;
    uint64_t seqno = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_ISENDCONTIGMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_ISENDCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "scif_iSendContig");

    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue) &&
        MPID_nem_scif_poll_send(sc->fd, &sc->csend)) {
        MPID_IOV iov[2];

        iov[0].MPID_IOV_BUF = hdr;
        iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);
        iov[1].MPID_IOV_BUF = data;
        iov[1].MPID_IOV_LEN = data_sz;

        offset = MPID_nem_scif_writev(sc->fd, &sc->csend, iov, 2, &seqno);
        MPIU_ERR_CHKANDJUMP1(offset <= 0, mpi_errno, MPI_ERR_OTHER,
                             "**scif_writev", "**scif_writev %s", MPIU_Strerror(errno));
        MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE,
                         (MPIU_DBG_FDEST, "scif_send " MPIDI_MSG_SZ_FMT " fd=%d",
                          offset, sc->fd));

        if (offset == sizeof(MPIDI_CH3_Pkt_t) + data_sz) {
            /* sent whole message */
            int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);

            if (seqno)
                goto enqueue_request;
            reqFn = sreq->dev.OnDataAvail;
            if (!reqFn) {
                MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                MPIDI_CH3U_Request_complete(sreq);
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                goto fn_exit;
            }
            else {
                int complete = 0;

                mpi_errno = reqFn(vc, sreq, &complete);
                if (mpi_errno)
                    MPIU_ERR_POP(mpi_errno);

                if (complete) {
                    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                    goto fn_exit;
                }

                /* not completed: more to send */
                goto enqueue_request;
            }
        }
    }

    /* save iov */
    if (offset < sizeof(MPIDI_CH3_Pkt_t)) {
        sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
        sreq->dev.iov[0].MPID_IOV_BUF = (char *) &sreq->dev.pending_pkt + offset;
        sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t) - offset;
        if (data_sz) {
            sreq->dev.iov[1].MPID_IOV_BUF = data;
            sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
            sreq->dev.iov_count = 2;
        }
        else
            sreq->dev.iov_count = 1;
        seqno = 0;
    }
    else {
        sreq->dev.iov[0].MPID_IOV_BUF = (char *) data + (offset - sizeof(MPIDI_CH3_Pkt_t));
        sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_Pkt_t));
        sreq->dev.iov_count = 1;
        seqno = 0;
    }

  enqueue_request:
    /* enqueue request */
    RQ_SCIF(sreq)->seqno = seqno;
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "enqueuing");
    MPIU_Assert(seqno || (sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0));

    sreq->ch.vc = vc;
    sreq->dev.iov_offset = 0;

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue)) {
        /* this will be the first send on the queue: queue it and set
         * the write flag on the pollfd */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
    }
    else {
        /* there are other sends in the queue before this one: try to
         * send from the queue */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
        mpi_errno = MPID_nem_scif_send_queued(vc, &vc_scif->send_queue);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_ISENDCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #8
0
int MPID_nem_scif_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz,
                                  void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq = NULL;
    MPIDI_msg_sz_t offset = 0;
    MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
    scifconn_t *sc = vc_scif->sc;
    uint64_t seqno = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_ISTARTCONTIGMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_ISTARTCONTIGMSG);

    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));

    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "scif_iStartContigMsg");
    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr);

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue) &&
        MPID_nem_scif_poll_send(sc->fd, &sc->csend)) {
        MPID_IOV iov[2];

        iov[0].MPID_IOV_BUF = hdr;
        iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);
        iov[1].MPID_IOV_BUF = data;
        iov[1].MPID_IOV_LEN = data_sz;

        offset = MPID_nem_scif_writev(sc->fd, &sc->csend, iov, 2, &seqno);
        MPIU_ERR_CHKANDJUMP1(offset <= 0, mpi_errno, MPI_ERR_OTHER,
                             "**scif_writev", "**scif_writev %s", MPIU_Strerror(errno));
        MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE,
                         (MPIU_DBG_FDEST, "scif_send " MPIDI_MSG_SZ_FMT " fd=%d",
                          offset, sc->fd));

        if (offset == sizeof(MPIDI_CH3_Pkt_t) + data_sz) {
            if (!seqno) {
                /* sent whole message */
                *sreq_ptr = NULL;
                goto fn_exit;
            }
        }
    }

    /* create and enqueue request */
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "enqueuing");

    /* create a request */
    sreq = MPID_Request_create();
    RQ_SCIF(sreq)->seqno = seqno;
    MPIU_Assert(sreq != NULL);
    MPIU_Object_set_ref(sreq, 2);
    sreq->kind = MPID_REQUEST_SEND;

    sreq->dev.OnDataAvail = 0;
    sreq->ch.vc = vc;
    sreq->dev.iov_offset = 0;

    if (!seqno) {
        if (offset < sizeof(MPIDI_CH3_Pkt_t)) {
            sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr;
            sreq->dev.iov[0].MPID_IOV_BUF = (char *) &sreq->dev.pending_pkt + offset;
            sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t) - offset;
            if (data_sz) {
                sreq->dev.iov[1].MPID_IOV_BUF = data;
                sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
                sreq->dev.iov_count = 2;
            }
            else
                sreq->dev.iov_count = 1;
        }
        else {
            sreq->dev.iov[0].MPID_IOV_BUF = (char *) data + (offset - sizeof(MPIDI_CH3_Pkt_t));
            sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_Pkt_t));
            sreq->dev.iov_count = 1;
        }

        MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0);
    }

    if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue)) {
        /* this will be the first send on the queue: queue it and set
         * the write flag on the pollfd */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
    }
    else {
        /* there are other sends in the queue before this one: try to
         * send from the queue */
        MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq);
        mpi_errno = MPID_nem_scif_send_queued(vc, &vc_scif->send_queue);
        if (mpi_errno)
            MPIU_ERR_POP(mpi_errno);
    }

    *sreq_ptr = sreq;

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_ISTARTCONTIGMSG);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
/* MPIDI_CH3I_SendNoncontig - Sends a message by packing
   directly into cells.  The caller must initialize sreq->dev.segment
   as well as segment_first and segment_size. */
int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPIR_Request *sreq, void *header, intptr_t hdr_sz,
                              MPL_IOV *hdr_iov, int n_hdr_iov)
{
    int mpi_errno = MPI_SUCCESS;
    int again = 0;
    intptr_t orig_segment_first = sreq->dev.segment_first;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)header);

    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);

    if (n_hdr_iov > 0) {
        /* translate segments to iovs and combine with the extended header iov. */
        mpi_errno = MPIDI_CH3_SendNoncontig_iov(vc, sreq, header, hdr_sz,
                                                hdr_iov, n_hdr_iov);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        goto fn_exit;
    }

    if (!MPIDI_CH3I_Sendq_empty(MPIDI_CH3I_shm_sendq)) /* MT */
    {
        /* send queue is not empty, enqueue the request then check to
           see if we can send any now */

        MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, TERSE, "enqueuing");

	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
        sreq->ch.noncontig    = TRUE;
        sreq->ch.header_sz    = hdr_sz;
	sreq->ch.vc           = vc;

        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        mpi_errno = MPIDI_CH3I_Shm_send_progress();
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        goto fn_exit;
    }

    /* send as many cells of data as you can */
    MPID_nem_mpich_send_seg_header(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size,
                                   header, hdr_sz, vc, &again);
    while(!again && sreq->dev.segment_first < sreq->dev.segment_size)
        MPID_nem_mpich_send_seg(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, vc, &again);

    if (again)
    {
        /* we didn't finish sending everything */
        sreq->ch.noncontig = TRUE;
        sreq->ch.vc = vc;
        if (sreq->dev.segment_first == orig_segment_first) /* nothing was sent, save header */
        {
            sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
            sreq->ch.header_sz    = hdr_sz;
        }
        else
        {
            /* part of message was sent, make this req an active send */
            MPIR_Assert(MPIDI_CH3I_shm_active_send == NULL);
            MPIDI_CH3I_shm_active_send = sreq;
        }
        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        goto fn_exit;
    }

    /* finished sending all data, complete the request */
    if (!sreq->dev.OnDataAvail)
    {
        MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
        mpi_errno = MPID_Request_complete(sreq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
        MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }
    else
    {
        int complete = 0;
        mpi_errno = sreq->dev.OnDataAvail(vc, sreq, &complete);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        MPIR_Assert(complete); /* all data has been sent, we should always complete */

        MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }

 fn_exit:
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #10
0
int MPIDI_CH3_iStartMsgv(MPIDI_VC_t * vc, MPL_IOV * iov, int n_iov, MPIR_Request ** sreq_ptr)
{
    MPIR_Request *sreq = NULL;
    MPIDI_CH3I_VC *vcch = &vc->ch;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSGV);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSGV);

    MPIR_Assert(n_iov <= MPL_IOV_LIMIT);

    /* The SOCK channel uses a fixed length header, the size of which is the
     * maximum of all possible packet headers */
    iov[0].MPL_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);
    MPL_DBG_STMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE,
                 MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) iov[0].MPL_IOV_BUF));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) { /* MT */
        /* Connection already formed.  If send queue is empty attempt to send
         * data, queuing any unsent data. */
        if (MPIDI_CH3I_SendQ_empty(vcch)) {     /* MT */
            int rc;
            size_t nb;

            MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "send queue empty, attempting to write");
            MPL_DBG_PKT(vcch->conn, (MPIDI_CH3_Pkt_t *) iov[0].MPL_IOV_BUF, "isend");

            /* MT - need some signalling to lock down our right to use the
             * channel, thus insuring that the progress engine does
             * also try to write */
            rc = MPIDI_CH3I_Sock_writev(vcch->sock, iov, n_iov, &nb);
            if (rc == MPI_SUCCESS) {
                int offset = 0;

                MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE,
                              "wrote %ld bytes", (unsigned long) nb);

                while (offset < n_iov) {
                    if (nb >= (int) iov[offset].MPL_IOV_LEN) {
                        nb -= iov[offset].MPL_IOV_LEN;
                        offset++;
                    } else {
                        MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE,
                                    "partial write, request enqueued at head");
                        sreq = create_request(iov, n_iov, offset, nb);
                        if (sreq == NULL) {
                            MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
                        }
                        MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
                        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE,
                                        (MPL_DBG_FDEST, "posting writev, vc=0x%p, sreq=0x%08x", vc,
                                         sreq->handle));
                        vcch->conn->send_active = sreq;
                        mpi_errno =
                            MPIDI_CH3I_Sock_post_writev(vcch->conn->sock, sreq->dev.iov + offset,
                                                        sreq->dev.iov_count - offset, NULL);
                        /* --BEGIN ERROR HANDLING-- */
                        if (mpi_errno != MPI_SUCCESS) {
                            mpi_errno =
                                MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, __func__, __LINE__,
                                                     MPI_ERR_OTHER, "**ch3|sock|postwrite",
                                                     "ch3|sock|postwrite %p %p %p", sreq,
                                                     vcch->conn, vc);
                        }
                        /* --END ERROR HANDLING-- */
                        break;
                    }
                }

                if (offset == n_iov) {
                    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "entire write complete");
                }
            }
            /* --BEGIN ERROR HANDLING-- */
            else {
                MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, TYPICAL,
                              "ERROR - MPIDI_CH3I_Sock_writev failed, rc=%d", rc);
                sreq = MPIR_Request_create(MPIR_REQUEST_KIND__SEND);
                if (sreq == NULL) {
                    MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
                }
                MPIR_cc_set(&(sreq->cc), 0);
                sreq->status.MPI_ERROR = MPIR_Err_create_code(rc,
                                                              MPIR_ERR_RECOVERABLE, __func__,
                                                              __LINE__, MPI_ERR_INTERN,
                                                              "**ch3|sock|writefailed",
                                                              "**ch3|sock|writefailed %d", rc);
                /* Make sure that the caller sees this error */
                mpi_errno = sreq->status.MPI_ERROR;
            }
            /* --END ERROR HANDLING-- */
        } else {
            MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "send in progress, request enqueued");
            sreq = create_request(iov, n_iov, 0, 0);
            if (sreq == NULL) {
                MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
            }
            MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
        }
    } else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING) {
        MPL_DBG_VCUSE(vc, "connecting.  enqueuing request");

        /* queue the data so it can be sent after the connection is formed */
        sreq = create_request(iov, n_iov, 0, 0);
        if (sreq == NULL) {
            MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
        }
        MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    } else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED) {
        MPL_DBG_VCUSE(vc, "unconnected.  posting connect and enqueuing request");

        /* queue the data so it can be sent after the connection is formed */
        sreq = create_request(iov, n_iov, 0, 0);
        if (sreq == NULL) {
            MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
        }
        MPIDI_CH3I_SendQ_enqueue(vcch, sreq);

        /* Form a new connection */
        MPIDI_CH3I_VC_post_connect(vc);
    } else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED) {
        /* Unable to send data at the moment, so queue it for later */
        MPL_DBG_VCUSE(vc, "forming connection, request enqueued");
        sreq = create_request(iov, n_iov, 0, 0);
        if (sreq == NULL) {
            MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
        }
        MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else {
        /* Connection failed, so allocate a request and return an error. */
        MPL_DBG_VCUSE(vc, "ERROR - connection failed");
        sreq = MPIR_Request_create(MPIR_REQUEST_KIND__SEND);
        if (sreq == NULL) {
            MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
        }
        MPIR_cc_set(&(sreq->cc), 0);
        sreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
                                                      MPIR_ERR_RECOVERABLE, __func__, __LINE__,
                                                      MPI_ERR_INTERN, "**ch3|sock|connectionfailed",
                                                      0);
        /* Make sure that the caller sees this error */
        mpi_errno = sreq->status.MPI_ERROR;
    }
    /* --END ERROR HANDLING-- */

  fn_fail:
    *sreq_ptr = sreq;
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSGV);
    return mpi_errno;
}
Beispiel #11
0
int MPIDI_CH3_iSendv(MPIDI_VC_t * vc, MPID_Request * sreq, 
		     MPID_IOV * iov, int n_iov)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3I_VC *vcch = &vc->ch;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISENDV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISENDV);

    MPIU_Assert(n_iov <= MPID_IOV_LIMIT);
    MPIU_Assert(iov[0].MPID_IOV_LEN <= sizeof(MPIDI_CH3_Pkt_t));

    /* The sock channel uses a fixed length header, the size of which is the 
       maximum of all possible packet headers */
    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t);
    MPIU_DBG_STMT(CH3_CHANNEL,VERBOSE,
	 MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)iov[0].MPID_IOV_BUF));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */
    {
	/* Connection already formed.  If send queue is empty attempt to send 
	   data, queuing any unsent data. */
	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
	{
	    MPIU_Size_t nb;
	    int rc;

	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			 "send queue empty, attempting to write");
	    
	    MPIU_DBG_PKT(vcch->conn,(MPIDI_CH3_Pkt_t*)iov[0].MPID_IOV_BUF,
			 "isendv");
	    /* MT - need some signalling to lock down our right to use the 
	       channel, thus insuring that the progress engine does
               also try to write */

	    /* FIXME: the current code only agressively writes the first IOV.  
	       Eventually it should be changed to agressively write
               as much as possible.  Ideally, the code would be shared between 
	       the send routines and the progress engine. */
	    rc = MPIDU_Sock_writev(vcch->sock, iov, n_iov, &nb);
	    if (rc == MPI_SUCCESS)
	    {
		int offset = 0;

		MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE,
			       "wrote %ld bytes", (unsigned long) nb);
		
		while (offset < n_iov)
		{
		    if ((int)iov[offset].MPID_IOV_LEN <= nb)
		    {
			nb -= iov[offset].MPID_IOV_LEN;
			offset++;
		    }
		    else
		    {
			MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
			     "partial write, request enqueued at head");
			update_request(sreq, iov, n_iov, offset, nb);
			MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
			MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
    (MPIU_DBG_FDEST,"posting writev, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
			vcch->conn->send_active = sreq;
			mpi_errno = MPIDU_Sock_post_writev(vcch->conn->sock, 
					   sreq->dev.iov + offset,
					   sreq->dev.iov_count - offset, NULL);
			/* --BEGIN ERROR HANDLING-- */
			if (mpi_errno != MPI_SUCCESS)
			{
			    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							     "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
							     sreq, vcch->conn, vc);
			}
			/* --END ERROR HANDLING-- */

			break;
		    }

		}
		if (offset == n_iov)
		{
		    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
				 "write complete, calling OnDataAvail fcn");
		    reqFn = sreq->dev.OnDataAvail;
		    if (!reqFn) {
			MPIU_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
			MPIDI_CH3U_Request_complete(sreq);
		    }
		    else {
			int complete;
			mpi_errno = reqFn( vc, sreq, &complete );
			if (mpi_errno) MPIU_ERR_POP(mpi_errno);
			if (!complete) {
			    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
			    MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,
    (MPIU_DBG_FDEST,"posting writev, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
			    vcch->conn->send_active = sreq;
			    mpi_errno = MPIDU_Sock_post_writev(
				vcch->conn->sock, sreq->dev.iov, 
				sreq->dev.iov_count, NULL);
			    /* --BEGIN ERROR HANDLING-- */
			    if (mpi_errno != MPI_SUCCESS)
			    {
				mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
								 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
								 sreq, vcch->conn, vc);
			    }
			    /* --END ERROR HANDLING-- */
			}
		    }
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else if (MPIR_ERR_GET_CLASS(rc) == MPIDU_SOCK_ERR_NOMEM)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,
			     "MPIDU_Sock_writev failed, out of memory");
		sreq->status.MPI_ERROR = MPIR_ERR_MEMALLOCFAILED;
	    }
	    else
	    {
		MPIU_DBG_MSG_D(CH3_CHANNEL,TYPICAL,
			       "MPIDU_Sock_writev failed, rc=%d", rc);
		/* Connection just failed.  Mark the request complete and 
		   return an error. */
		MPIU_DBG_VCCHSTATECHANGE(vc,VC_STATE_FAILED);
		/* FIXME: Shouldn't the vc->state also change? */

		vcch->state = MPIDI_CH3I_VC_STATE_FAILED;
		sreq->status.MPI_ERROR = MPIR_Err_create_code( rc,
			       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
			       MPI_ERR_INTERN, "**ch3|sock|writefailed", 
			       "**ch3|sock|writefailed %d", rc );
		 /* MT - CH3U_Request_complete performs write barrier */
		MPIDI_CH3U_Request_complete(sreq);
		/* Return error to calling routine */
		mpi_errno = sreq->status.MPI_ERROR;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"send queue not empty, enqueuing");
	    update_request(sreq, iov, n_iov, 0, 0);
	    MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	}
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING)
    {
	/* queuing the data so it can be sent later. */
	MPIU_DBG_VCUSE(vc,"connecting.  Enqueuing request");
	update_request(sreq, iov, n_iov, 0, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED)
    {
	/* Form a new connection, queuing the data so it can be sent later. */
	MPIU_DBG_VCUSE(vc,"unconnected.  Enqueuing request");
	update_request(sreq, iov, n_iov, 0, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	mpi_errno = MPIDI_CH3I_VC_post_connect(vc);
	if (mpi_errno) {
	    MPIU_ERR_POP(mpi_errno);
	}
    }
    else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPIU_DBG_VCUSE(vc,"still connecting.  enqueuing request");
	update_request(sreq, iov, n_iov, 0, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	MPIU_DBG_VCUSE(vc,"connection failed");
	/* Connection failed.  Mark the request complete and return an error. */
	/* TODO: Create an appropriate error message */
	sreq->status.MPI_ERROR = MPI_ERR_INTERN;
	/* MT - CH3U_Request_complete performs write barrier */
	MPIDI_CH3U_Request_complete(sreq);
    }
    /* --END ERROR HANDLING-- */

 fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISENDV);
    return mpi_errno;
}
int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, MPIDI_msg_sz_t hdr_sz, MPID_Request ** sreq_ptr)
{
    MPID_Request * sreq = NULL;
    int mpi_errno = MPI_SUCCESS;
    int stream_no, ppid;
    MPIDI_CH3_Pkt_t* pkt;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    
    MPIDI_DBG_PRINTF((50, FCNAME, "entering"));
#ifdef MPICH_DBG_OUTPUT
    /* --BEGIN ERROR HANDLING-- */
    if (hdr_sz > sizeof(MPIDI_CH3_Pkt_t))
    {
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**arg", 0);
	goto fn_fail;
    }
    /* --END ERROR HANDLING-- */
#endif

    /* The sctp channel uses a fixed length header, the size of which is the maximum of all possible packet headers */
    hdr_sz = sizeof(MPIDI_CH3_Pkt_t);
    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr);

    /* find out stream no. */
    pkt = (MPIDI_CH3_Pkt_t*) hdr;
    stream_no = Req_Stream_from_pkt_and_req(pkt, *sreq_ptr);  /*  don't know pkt type here so pass it in */
    ppid = 0;

    if (SEND_CONNECTED(vc, stream_no) == MPIDI_CH3I_VC_STATE_CONNECTED)
    {
	/* Connection already formed.  If send queue is empty attempt to send data, queuing any unsent data. */
	if (!SEND_ACTIVE(vc, stream_no)) /* MT */
	{
	    MPIU_Assert(MPIDI_CH3I_SendQ_empty_x(vc, stream_no));

	    MPIU_Size_t nb;
	    int rc;

	    MPIDI_DBG_PRINTF((55, FCNAME, "send queue empty, attempting to write"));
	    
	    /* MT - need some signalling to lock down our right to use the channel, thus insuring that the progress engine does
               not also try to write */
	    rc = MPIDU_Sctp_write(vc, hdr, hdr_sz, stream_no, ppid, &nb);
	    
	    if (rc == MPI_SUCCESS)
	    {
		MPIDI_DBG_PRINTF((55, FCNAME, "wrote %ld bytes", (unsigned long) nb));
		
		if (nb == hdr_sz)
		{
		    MPIDI_DBG_PRINTF((55, FCNAME, "entire write complete, %d bytes", nb));
		    /* done.  get us out of here as quickly as possible. */
		}
		else
		{
		    MPIDI_DBG_PRINTF((55, FCNAME, "partial write of %d bytes, request enqueued at head", nb));
		    sreq = create_request(hdr, hdr_sz, nb);

		    /* --BEGIN ERROR HANDLING-- */
		    if (sreq == NULL)
		    {
			mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							 "**nomem", 0);
			goto fn_fail;
		    }
		    /* --END ERROR HANDLING-- */
                    
		    /* put in in Global SendQ */
		    MPIDU_Sctp_post_write(vc, sreq, hdr_sz-nb, hdr_sz-nb, NULL, stream_no); 

		    MPIDI_DBG_PRINTF((55, FCNAME, "posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
		    
		    /* --BEGIN ERROR HANDLING-- */
		    if (mpi_errno != MPI_SUCCESS)
		    {
			mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p", /* FIXME change error code */
							 sreq, vc->ch, vc);
			goto fn_fail;
		    }
		    /* --END ERROR HANDLING-- */
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else
	    {
		MPIDI_DBG_PRINTF((55, FCNAME, "ERROR - MPIDU_Sctp_write failed, rc=%d", rc));
		sreq = MPID_Request_create();
		if (sreq == NULL)
		{
		    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
		    goto fn_fail;
		}
		sreq->kind = MPID_REQUEST_SEND;
		sreq->cc = 0;
		sreq->status.MPI_ERROR = MPI_ERR_INTERN;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPIDI_DBG_PRINTF((55, FCNAME, "send in progress, request enqueued"));
	    sreq = create_request(hdr, hdr_sz, 0);
	    /* --BEGIN ERROR HANDLING-- */
	    if (sreq == NULL)
	    {
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
		goto fn_fail;
	    }
	    /* --END ERROR HANDLING-- */
	    MPIDI_CH3I_SendQ_enqueue_x(vc, sreq, stream_no);
	}
    }
    else if (SEND_CONNECTED(vc, stream_no) == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */
    {
	MPIDI_DBG_PRINTF((55, FCNAME, "unconnected.  posting connect and enqueuing request"));
	
	/* queue the data so it can be sent after the connection is formed */
	sreq = create_request(hdr, hdr_sz, 0);
	/* --BEGIN ERROR HANDLING-- */
	if (sreq == NULL)
	{
	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
	    goto fn_fail;
	}
	/* --END ERROR HANDLING-- */
        
	/* Form a new connection, called once per association (i.e. not per stream) */
        if(vc->ch.pkt == NULL)
        {
            mpi_errno = MPIDI_CH3I_VC_post_connect(vc);
            /* --BEGIN ERROR HANDLING-- */
            if (mpi_errno != MPI_SUCCESS)
            {
                mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0);
                goto fn_fail;
            }
            /* --END ERROR HANDLING-- */
        }
        
	MPIDU_Sctp_stream_init(vc, sreq, stream_no);
    }
    else if (vc->ch.state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPIDI_DBG_PRINTF((55, FCNAME, "forming connection, request enqueued"));
	sreq = create_request(hdr, hdr_sz, 0);
	/* --BEGIN ERROR HANDLING-- */
	if (sreq == NULL)
	{
	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
	    goto fn_fail;
	}
	/* --END ERROR HANDLING-- */
	MPIDU_Sctp_stream_init(vc, sreq, stream_no);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	/* Connection failed, so allocate a request and return an error. */
	MPIDI_DBG_PRINTF((55, FCNAME, "ERROR - connection failed"));
	sreq = MPID_Request_create();
	if (sreq == NULL)
	{
	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
	    goto fn_fail;
	}
	sreq->kind = MPID_REQUEST_SEND;
	sreq->cc = 0;
	/* TODO: Create an appropriate error message */
	sreq->status.MPI_ERROR = MPI_ERR_INTERN;
    }
    /* --END ERROR HANDLING-- */

  fn_exit:
    *sreq_ptr = sreq;
    MPIDI_DBG_PRINTF((50, FCNAME, "exiting"));
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG);
    return mpi_errno;
 fn_fail:
    /* --BEGIN ERROR HANDLING-- */    
    goto fn_exit;
    /* --END ERROR HANDLING-- */    
}