Beispiel #1
0
int MPIDI_CH3_PktHandler_RndvSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, 
				   MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPIDI_CH3_Pkt_rndv_send_t * rs_pkt = &pkt->rndv_send;
    int mpi_errno = MPI_SUCCESS;
    int complete;
    char *data_buf;
    MPIDI_msg_sz_t data_len;
    MPID_Request *req;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received rndv send (data) pkt");

    MPID_Request_get_ptr(rs_pkt->receiver_req_id, req);

    data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= req->dev.recv_data_sz)
                ? req->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t));
    data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
    
    if (req->dev.recv_data_sz == 0) {
        *buflen = sizeof(MPIDI_CH3_Pkt_t);
        mpi_errno = MPID_Request_complete(req);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
	*rreqp = NULL;
    }
    else {
        mpi_errno = MPIDI_CH3U_Receive_data_found(req, data_buf, &data_len,
                                                  &complete);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv",
			     "**ch3|postrecv %s", "MPIDI_CH3_PKT_RNDV_SEND");
	}

        *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

        if (complete) 
        {
            mpi_errno = MPID_Request_complete(req);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
            *rreqp = NULL;
        }
        else
        {
            *rreqp = req;
        }
   }
	
 fn_fail:
    return mpi_errno;
}
Beispiel #2
0
int MPIDI_CH3_PktHandler_EagerSyncAck( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
				       intptr_t *buflen, MPIR_Request **rreqp )
{
    MPIDI_CH3_Pkt_eager_sync_ack_t * esa_pkt = &pkt->eager_sync_ack;
    MPIR_Request * sreq;
    int mpi_errno = MPI_SUCCESS;
    
    MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE,
	   "received eager sync ack pkt, sreq=0x%08x", esa_pkt->sender_req_id);
	    
    MPIR_Request_get_ptr(esa_pkt->sender_req_id, sreq);
    /* decrement CC (but don't mark data transfer as complete since the 
       transfer could still be in progress) */

    /* FIXME: This sometimes segfaults */
    mpi_errno = MPID_Request_complete(sreq);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }
    
    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    *rreqp = NULL;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #3
0
static inline int check_req_complete(MPIDI_VC_t *vc, MPID_Request *req, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
    reqFn = req->dev.OnDataAvail;
    if (reqFn) {
        *complete = 0;

        /* XXX DJG FIXME this feels like a hack */
        req->dev.iov_count = MPL_IOV_LIMIT;
        req->dev.iov_offset = 0;

        mpi_errno = reqFn(vc, req, complete);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }
    else {
        *complete = 1;
        mpi_errno = MPID_Request_complete(req);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }

fn_fail:
    return mpi_errno;
}
Beispiel #4
0
static int handle_probe(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const req = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);

    if (e->ni_fail_type == PTL_NI_NO_MATCH) {
        REQ_PTL(req)->found = FALSE;
        goto finish_probe;
    }

    REQ_PTL(req)->found = TRUE;
    req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
    req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
    MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data));

 finish_probe:
    mpi_errno = MPID_Request_complete(req);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #5
0
int MPIDI_CH3U_Handle_send_req(MPIDI_VC_t * vc, MPIR_Request * sreq, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn) (MPIDI_VC_t *, MPIR_Request *, int *);
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);

    /* Use the associated function rather than switching on the old ca field */
    /* Routines can call the attached function directly */
    reqFn = sreq->dev.OnDataAvail;
    if (!reqFn) {
        MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
        mpi_errno = MPID_Request_complete(sreq);
        *complete = 1;
    }
    else {
        mpi_errno = reqFn(vc, sreq, complete);
    }
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

  fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_SEND_REQ);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #6
0
int MPID_Rget_accumulate(const void *origin_addr, int origin_count,
                         MPI_Datatype origin_datatype, void *result_addr, int result_count,
                         MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
                         int target_count, MPI_Datatype target_datatype, MPI_Op op,
                         MPIR_Win * win_ptr, MPIR_Request ** request)
{
    int mpi_errno = MPI_SUCCESS;
    int dt_contig ATTRIBUTE((unused));
    MPIR_Datatype*dtp;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    intptr_t data_sz, trg_data_sz;
    MPIR_Request *ureq;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_RGET_ACCUMULATE);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_RGET_ACCUMULATE);

    /* request-based RMA operations are only valid within a passive epoch */
    MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state != MPIDI_RMA_PER_TARGET &&
                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_CALLED &&
                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_ISSUED &&
                        win_ptr->states.access_state != MPIDI_RMA_LOCK_ALL_GRANTED,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

    /* Create user request, initially cc=1, ref=1 */
    ureq = MPIR_Request_create(MPIR_REQUEST_KIND__RMA);
    MPIR_ERR_CHKANDJUMP(ureq == NULL, mpi_errno, MPI_ERR_OTHER, "**nomemreq");

    /* This request is referenced by user and ch3 by default. */
    MPIR_Object_set_ref(ureq, 2);

    /* Note that GACC is only a no-op if no data goes in both directions */
    MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);
    MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, trg_data_sz, dtp, dt_true_lb);

    /* Enqueue or perform the RMA operation */
    if (target_rank != MPI_PROC_NULL && (data_sz != 0 || trg_data_sz != 0)) {
        mpi_errno = MPIDI_CH3I_Get_accumulate(origin_addr, origin_count,
                                              origin_datatype, result_addr,
                                              result_count, result_datatype,
                                              target_rank, target_disp,
                                              target_count, target_datatype, op, win_ptr, ureq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }
    else {
        mpi_errno = MPID_Request_complete(ureq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }

    *request = ureq;

  fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_RGET_ACCUMULATE);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #7
0
static int handler_recv_complete(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const rreq = e->user_ptr;
    int ret;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_COMPLETE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_COMPLETE);
    
    MPIU_Assert(e->type == PTL_EVENT_REPLY || e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW);

    if (REQ_PTL(rreq)->md != PTL_INVALID_HANDLE) {
        ret = PtlMDRelease(REQ_PTL(rreq)->md);
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdrelease", "**ptlmdrelease %s", MPID_nem_ptl_strerror(ret));
    }

    for (i = 0; i < MPID_NEM_PTL_NUM_CHUNK_BUFFERS; ++i)
        if (REQ_PTL(rreq)->chunk_buffer[i])
            MPIU_Free(REQ_PTL(rreq)->chunk_buffer[i]);
    
    mpi_errno = MPID_Request_complete(rreq);
    if (mpi_errno) {
        MPIR_ERR_POP(mpi_errno);
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_COMPLETE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #8
0
int MPIDI_CH3_Req_handler_rma_op_complete(MPIR_Request * sreq)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Request *ureq = NULL;
    MPIR_Win *win_ptr = NULL;

    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);

    if (sreq->dev.rma_target_ptr != NULL) {
        (sreq->dev.rma_target_ptr)->num_pkts_wait_for_local_completion--;
    }

    /* get window, decrement active request cnt on window */
    MPIR_Win_get_ptr(sreq->dev.source_win_handle, win_ptr);
    MPIR_Assert(win_ptr != NULL);
    MPIDI_CH3I_RMA_Active_req_cnt--;
    MPIR_Assert(MPIDI_CH3I_RMA_Active_req_cnt >= 0);

    if (sreq->dev.request_handle != MPI_REQUEST_NULL) {
        /* get user request */
        MPIR_Request_get_ptr(sreq->dev.request_handle, ureq);
        mpi_errno = MPID_Request_complete(ureq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }

  fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_REQ_HANDLER_RMA_OP_COMPLETE);
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}
Beispiel #9
0
int MPID_nem_ptl_pkt_cancel_send_resp_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
                                              MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq;
    MPIDI_nem_ptl_pkt_cancel_send_resp_t *resp_pkt = (MPIDI_nem_ptl_pkt_cancel_send_resp_t *)pkt;
    int i, ret;

    MPID_Request_get_ptr(resp_pkt->sender_req_id, sreq);

    if (resp_pkt->ack) {
        MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);

        /* remove/free any remaining get MEs and handles */
        for (i = 0; i < REQ_PTL(sreq)->num_gets; i++) {
            ret = PtlMEUnlink(REQ_PTL(sreq)->get_me_p[i]);
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeunlink", "**ptlmeunlink %s", MPID_nem_ptl_strerror(ret));
            mpi_errno = MPID_Request_complete(sreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
        }
        if (REQ_PTL(sreq)->get_me_p)
            MPIU_Free(REQ_PTL(sreq)->get_me_p);

        MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"message cancelled");
    } else {
        MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE);
        MPIU_DBG_MSG(CH3_OTHER,TYPICAL,"unable to cancel message");
    }

    mpi_errno = MPID_Request_complete(sreq);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

     *rreqp = NULL;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #10
0
static int handle_mprobe(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const req = e->user_ptr;
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);

    if (e->ni_fail_type == PTL_NI_NO_MATCH) {
        REQ_PTL(req)->found = FALSE;
        goto finish_mprobe;
    }

    REQ_PTL(req)->found = TRUE;
    req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
    req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
    MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data));
    MPIDI_Request_set_sync_send_flag(req, e->hdr_data & NPTL_SSEND);

    MPIU_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf");
    MPIU_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength);
    req->dev.recv_data_sz = e->mlength;

    if (!(e->hdr_data & NPTL_LARGE)) {
        MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_EAGER_MSG);
    }
    else {
        MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD);
        req->dev.match.parts.tag = req->status.MPI_TAG;
        req->dev.match.parts.context_id = NPTL_MATCH_GET_CTX(e->match_bits);
        req->dev.match.parts.rank = req->status.MPI_SOURCE;
        MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_RNDV_MSG);
    }

    /* At this point we know the ME is unlinked. Invalidate the handle to
       prevent further accesses, e.g. an attempted cancel. */
    REQ_PTL(req)->put_me = PTL_INVALID_HANDLE;
    req->dev.recv_pending_count = 1;

  finish_mprobe:
    mpi_errno = MPID_Request_complete(req);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

  fn_exit:
    MPIU_CHKPMEM_COMMIT();
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
    return mpi_errno;
 fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}
Beispiel #11
0
int MPID_Cancel_recv(MPIR_Request * rreq)
{
    int netmod_cancelled = TRUE;
    int mpi_errno = MPI_SUCCESS;

    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_RECV);
    
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_RECV);
    
    MPIR_Assert(rreq->kind == MPIR_REQUEST_KIND__RECV);
    
    /* If the netmod has its own cancel_recv function, we need to call
       it here. ANYSOURCE cancels (netmod and otherwise) are handled by
       MPIDI_CH3U_Recvq_DP below. */
#ifdef ENABLE_COMM_OVERRIDES
    if (rreq->dev.match.parts.rank != MPI_ANY_SOURCE)
    {
        MPIDI_VC_t *vc;
        MPIDI_Comm_get_vc_set_active(rreq->comm, rreq->dev.match.parts.rank, &vc);
        if (vc->comm_ops && vc->comm_ops->cancel_recv)
            netmod_cancelled = !vc->comm_ops->cancel_recv(NULL, rreq);
    }
#endif

    if (netmod_cancelled && MPIDI_CH3U_Recvq_DP(rreq))
    {
	MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE,
		       "request 0x%08x cancelled", rreq->handle);
        MPIR_STATUS_SET_CANCEL_BIT(rreq->status, TRUE);
        MPIR_STATUS_SET_COUNT(rreq->status, 0);
        mpi_errno = MPID_Request_complete(rreq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }
    else
    {
	MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE,
	    "request 0x%08x already matched, unable to cancel", rreq->handle);
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_RECV);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #12
0
static int handler_send(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const sreq = e->user_ptr;

    int i, ret;

    MPIDI_STATE_DECL(MPID_STATE_HANDLER_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_SEND);

    MPIU_Assert(e->type == PTL_EVENT_SEND || e->type == PTL_EVENT_GET);

    /* if we are done, release all netmod resources */
    if (MPID_cc_get(sreq->cc) == 1) {
        if (REQ_PTL(sreq)->md != PTL_INVALID_HANDLE) {
            ret = PtlMDRelease(REQ_PTL(sreq)->md);
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdrelease", "**ptlmdrelease %s", MPID_nem_ptl_strerror(ret));
        }

        for (i = 0; i < MPID_NEM_PTL_NUM_CHUNK_BUFFERS; ++i)
            if (REQ_PTL(sreq)->chunk_buffer[i])
                MPIU_Free(REQ_PTL(sreq)->chunk_buffer[i]);

        if (REQ_PTL(sreq)->get_me_p)
            MPIU_Free(REQ_PTL(sreq)->get_me_p);
    }
    mpi_errno = MPID_Request_complete(sreq);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_SEND);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Beispiel #13
0
int MPII_Genutil_sched_start(MPII_Genutil_sched_t * sched, MPIR_Comm * comm, MPIR_Request ** req)
{
    int mpi_errno = MPI_SUCCESS;
    int is_complete;
    int made_progress;
    MPIR_Request *reqp;

    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPII_GENUTIL_SCHED_START);
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPII_GENUTIL_SCHED_START);

    /* Create a request */
    reqp = MPIR_Request_create(MPIR_REQUEST_KIND__COLL);
    if (!reqp)
        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**nomem");
    *req = reqp;
    MPIR_Request_add_ref(reqp);

    /* Make some progress */
    mpi_errno = MPII_Genutil_sched_poke(sched, &is_complete, &made_progress);
    if (is_complete) {
        MPID_Request_complete(reqp);
        goto fn_exit;
    }

    /* Enqueue schedule and activate progress hook if not already activated */
    reqp->u.nbc.coll.sched = (void *) sched;
    if (coll_queue.head == NULL)
        MPID_Progress_activate_hook(MPII_Genutil_progress_hook_id);
    DL_APPEND(coll_queue.head, &(reqp->u.nbc.coll));

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPII_GENUTIL_SCHED_START);

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #14
0
int MPIDI_CH3_iSend(MPIDI_VC_t * vc, MPIR_Request * sreq, void * hdr,
		    intptr_t hdr_sz)
{
    int mpi_errno = MPI_SUCCESS;
    int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
    MPIDI_CH3I_VC *vcch = &vc->ch;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3_ISEND);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3_ISEND);

    MPIR_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t) );

    /* The sock channel uses a fixed length header, the size of which is the 
       maximum of all possible packet headers */
    hdr_sz = sizeof(MPIDI_CH3_Pkt_t);
    MPL_DBG_STMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
		  MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr));

    if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */
    {
	/* Connection already formed.  If send queue is empty attempt to send 
	   data, queuing any unsent data. */
	if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */
	{
	    size_t nb;
	    int rc;

	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
			 "send queue empty, attempting to write");
	    MPL_DBG_PKT(vcch->conn,hdr,"isend");
	    /* MT: need some signalling to lock down our right to use the 
	       channel, thus insuring that the progress engine does
               also try to write */
	    rc = MPIDI_CH3I_Sock_write(vcch->sock, hdr, hdr_sz, &nb);
	    if (rc == MPI_SUCCESS)
	    {
		MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
			       "wrote %ld bytes", (unsigned long) nb);
		
		if (nb == hdr_sz)
		{
		    MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
                     "write complete %" PRIdPTR " bytes, calling OnDataAvail fcn", nb);
		    reqFn = sreq->dev.OnDataAvail;
		    if (!reqFn) {
			MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                        mpi_errno = MPID_Request_complete(sreq);
                        if (mpi_errno != MPI_SUCCESS) {
                            MPIR_ERR_POP(mpi_errno);
                        }
		    }
		    else {
			int complete;
			mpi_errno = reqFn( vc, sreq, &complete );
			if (mpi_errno) MPIR_ERR_POP(mpi_errno);
			if (!complete) {
			    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
			    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
					     (MPL_DBG_FDEST,
                    "posting writev, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
			    vcch->conn->send_active = sreq;
			    mpi_errno = MPIDI_CH3I_Sock_post_writev(
				vcch->conn->sock, sreq->dev.iov, 
				sreq->dev.iov_count, NULL);
			    /* --BEGIN ERROR HANDLING-- */
			    if (mpi_errno != MPI_SUCCESS)
			    {
				mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
								 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
								 sreq, vcch->conn, vc);
			    }
			    /* --END ERROR HANDLING-- */
			}
		    }
		}
		else
		{
		    MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
                     "partial write of %" PRIdPTR " bytes, request enqueued at head", nb);
		    update_request(sreq, hdr, hdr_sz, nb);
		    MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq);
		    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
     (MPL_DBG_FDEST,"posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle));
		    vcch->conn->send_active = sreq;
		    mpi_errno = MPIDI_CH3I_Sock_post_write(vcch->conn->sock,
					  sreq->dev.iov[0].MPL_IOV_BUF,
				          sreq->dev.iov[0].MPL_IOV_LEN, 
					  sreq->dev.iov[0].MPL_IOV_LEN, NULL);
		    /* --BEGIN ERROR HANDLING-- */
		    if (mpi_errno != MPI_SUCCESS)
		    {
			mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							 "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p",
							 sreq, vcch->conn, vc);
		    }
		    /* --END ERROR HANDLING-- */
		}
	    }
	    /* --BEGIN ERROR HANDLING-- */
	    else if (MPIR_ERR_GET_CLASS(rc) == MPIDI_CH3I_SOCK_ERR_NOMEM)
	    {
		MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,TYPICAL,
			     "MPIDI_CH3I_Sock_write failed, out of memory");
		sreq->status.MPI_ERROR = MPIR_ERR_MEMALLOCFAILED;
	    }
	    else
	    {
		MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL,TYPICAL,
			       "MPIDI_CH3I_Sock_write failed, rc=%d", rc);
		/* Connection just failed. Mark the request complete and 
		   return an error. */
		MPL_DBG_VCCHSTATECHANGE(vc,VC_STATE_FAILED);
		/* FIXME: Shouldn't the vc->state also change? */
		vcch->state = MPIDI_CH3I_VC_STATE_FAILED;
		sreq->status.MPI_ERROR = MPIR_Err_create_code( rc,
			       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, 
			       MPI_ERR_INTERN, "**ch3|sock|writefailed", 
			       "**ch3|sock|writefailed %d", rc );
		 /* MT -CH3U_Request_complete() performs write barrier */
		MPID_Request_complete(sreq);
		/* Make sure that the caller sees this error */
		mpi_errno = sreq->status.MPI_ERROR;
	    }
	    /* --END ERROR HANDLING-- */
	}
	else
	{
	    MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,VERBOSE,"send queue not empty, enqueuing");
	    update_request(sreq, hdr, hdr_sz, 0);
	    MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	}
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING) /* MT */
    {
	/* queuing the data so it can be sent later. */
	MPL_DBG_VCUSE(vc,"connecting.  enqueuing request");
	update_request(sreq, hdr, hdr_sz, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */
    {
	/* Form a new connection, queuing the data so it can be sent later. */
	MPL_DBG_VCUSE(vc,"unconnected.  enqueuing request");
	update_request(sreq, hdr, hdr_sz, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
	mpi_errno = MPIDI_CH3I_VC_post_connect(vc);
	if (mpi_errno) {
	    MPIR_ERR_POP(mpi_errno);
	}
    }
    else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED)
    {
	/* Unable to send data at the moment, so queue it for later */
	MPL_DBG_VCUSE(vc,"still connecting. Enqueuing request");
	update_request(sreq, hdr, hdr_sz, 0);
	MPIDI_CH3I_SendQ_enqueue(vcch, sreq);
    }
    /* --BEGIN ERROR HANDLING-- */
    else
    {
	/* Connection failed.  Mark the request complete and return an error. */
	/* TODO: Create an appropriate error message */
	sreq->status.MPI_ERROR = MPI_ERR_INTERN;
	/* MT - CH3U_Request_complete() performs write barrier */
	MPID_Request_complete(sreq);
    }
    /* --END ERROR HANDLING-- */

 fn_fail:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3_ISEND);
    return mpi_errno;
}
Beispiel #15
0
static int ReadMoreData( MPIDI_CH3I_Connection_t * conn, MPIR_Request *rreq )
{
    int mpi_errno = MPI_SUCCESS;
    
    while (1) {
	MPL_IOV * iovp;
	size_t nb;
	
	iovp = rreq->dev.iov;
			    
	mpi_errno = MPIDI_CH3I_Sock_readv(conn->sock, iovp,
				     rreq->dev.iov_count, &nb);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS) {
	    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
					     "**ch3|sock|immedread", "ch3|sock|immedread %p %p %p",
					     rreq, conn, conn->vc);
	    goto fn_fail;
	}
	/* --END ERROR HANDLING-- */

	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
		 (MPL_DBG_FDEST,"immediate readv, vc=%p nb=%" PRIdPTR ", rreq=0x%08x",
		  conn->vc, nb, rreq->handle));
				
	if (nb > 0 && adjust_iov(&iovp, &rreq->dev.iov_count, nb)) {
	    int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
	    int complete;
	    
	    reqFn = rreq->dev.OnDataAvail;
	    if (!reqFn) {
		MPIR_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                mpi_errno = MPID_Request_complete(rreq);
                if (mpi_errno != MPI_SUCCESS) {
                    MPIR_ERR_POP(mpi_errno);
                }
		complete = TRUE;
	    }
	    else {
		mpi_errno = reqFn( conn->vc, rreq, &complete );
		if (mpi_errno) MPIR_ERR_POP(mpi_errno);
	    }
	    
	    if (complete) {
		conn->recv_active = NULL; /* -- already set to NULL */
		mpi_errno = connection_post_recv_pkt(conn);
		if (mpi_errno != MPI_SUCCESS) {
		    MPIR_ERR_POP(mpi_errno);
		}
		
		break;
	    }
	}
	else {
	    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
        (MPL_DBG_FDEST,"posting readv, vc=%p, rreq=0x%08x",
	 conn->vc, rreq->handle));
	    conn->recv_active = rreq;
	    mpi_errno = MPIDI_CH3I_Sock_post_readv(conn->sock, iovp, rreq->dev.iov_count, NULL);
	    /* --BEGIN ERROR HANDLING-- */
	    if (mpi_errno != MPI_SUCCESS) {
		mpi_errno = MPIR_Err_create_code(
		 mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|sock|postread",
		"ch3|sock|postread %p %p %p", rreq, conn, conn->vc);
		goto fn_fail;
	    }
	    /* --END ERROR HANDLING-- */
	    break;
	}
    }

 fn_fail:
    return mpi_errno;
}
Beispiel #16
0
int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
                   origin_datatype, int target_rank, MPI_Aint target_disp,
                   int target_count, MPI_Datatype target_datatype, MPID_Win * win_ptr,
                   MPID_Request * ureq)
{
    int mpi_errno = MPI_SUCCESS;
    int dt_contig ATTRIBUTE((unused)), rank;
    MPID_Datatype *dtp;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    MPIDI_msg_sz_t data_sz;
    MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
    int made_progress = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PUT);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PUT);

    MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

    MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);

    if (data_sz == 0) {
        goto fn_exit;
    }

    rank = win_ptr->comm_ptr->rank;

    if (win_ptr->shm_allocated == TRUE && target_rank != rank &&
        win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
        /* check if target is local and shared memory is allocated on window,
         * if so, we directly perform this operation on shared memory region. */

        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
         * the same node. However, in ch3:sock, even if origin and target are on the same node, they do
         * not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
         * which is only set to TRUE when SHM region is allocated in nemesis.
         * In future we need to figure out a way to check if origin and target are in the same "SHM comm".
         */
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
    }

    /* If the put is a local operation, do it here */
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
        mpi_errno = MPIDI_CH3I_Shm_put_op(origin_addr, origin_count, origin_datatype, target_rank,
                                          target_disp, target_count, target_datatype, win_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        if (ureq) {
            /* Complete user request and release the ch3 ref */
            mpi_errno = MPID_Request_complete(ureq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
        }
    }
    else {
        MPIDI_RMA_Op_t *op_ptr = NULL;
        MPIDI_CH3_Pkt_put_t *put_pkt = NULL;
        int use_immed_pkt = FALSE;
        int is_origin_contig, is_target_contig;

        /* queue it up */
        mpi_errno = MPIDI_CH3I_Win_get_op(win_ptr, &op_ptr);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);

        /******************** Setting operation struct areas ***********************/

        /* FIXME: For contig and very short operations, use a streamlined op */
        op_ptr->origin_addr = (void *) origin_addr;
        op_ptr->origin_count = origin_count;
        op_ptr->origin_datatype = origin_datatype;
        op_ptr->target_rank = target_rank;

        /* Remember user request */
        op_ptr->ureq = ureq;

        /* if source or target datatypes are derived, increment their
         * reference counts */
        if (!MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
            MPID_Datatype_get_ptr(origin_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
        if (!MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            MPID_Datatype_get_ptr(target_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }

        MPID_Datatype_is_contig(origin_datatype, &is_origin_contig);
        MPID_Datatype_is_contig(target_datatype, &is_target_contig);

        /* Judge if we can use IMMED data packet */
        if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype) && is_origin_contig && is_target_contig) {
            if (data_sz <= MPIDI_RMA_IMMED_BYTES)
                use_immed_pkt = TRUE;
        }

        /* Judge if this operation is an piggyback candidate */
        if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            /* FIXME: currently we only piggyback LOCK flag with op using predefined datatypes
             * for both origin and target data. We should extend this optimization to derived
             * datatypes as well. */
            if (data_sz <= MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE)
                op_ptr->piggyback_lock_candidate = 1;
        }

        /************** Setting packet struct areas in operation ****************/

        put_pkt = &(op_ptr->pkt.put);

        if (use_immed_pkt) {
            MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT_IMMED);
        }
        else {
            MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
        }

        put_pkt->addr = (char *) win_ptr->basic_info_table[target_rank].base_addr +
            win_ptr->basic_info_table[target_rank].disp_unit * target_disp;
        put_pkt->count = target_count;
        put_pkt->datatype = target_datatype;
        put_pkt->info.dataloop_size = 0;
        put_pkt->target_win_handle = win_ptr->basic_info_table[target_rank].win_handle;
        put_pkt->source_win_handle = win_ptr->handle;
        put_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
        if (use_immed_pkt) {
            void *src = (void *) origin_addr, *dest = (void *) (put_pkt->info.data);
            mpi_errno = immed_copy(src, dest, data_sz);
            if (mpi_errno != MPI_SUCCESS)
                MPIR_ERR_POP(mpi_errno);
        }

        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);

        mpi_errno = MPIDI_CH3I_Win_enqueue_op(win_ptr, op_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        if (MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD >= 0 &&
            MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
            while (MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
                mpi_errno = wait_progress_engine();
                if (mpi_errno != MPI_SUCCESS)
                    MPIR_ERR_POP(mpi_errno);
            }
        }
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PUT);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Beispiel #17
0
/* MPIDI_CH3I_SendNoncontig - Sends a message by packing
   directly into cells.  The caller must initialize sreq->dev.segment
   as well as segment_first and segment_size. */
int MPIDI_CH3I_SendNoncontig( MPIDI_VC_t *vc, MPIR_Request *sreq, void *header, intptr_t hdr_sz,
                              MPL_IOV *hdr_iov, int n_hdr_iov)
{
    int mpi_errno = MPI_SUCCESS;
    int again = 0;
    intptr_t orig_segment_first = sreq->dev.segment_first;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);

    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)header);

    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);

    if (n_hdr_iov > 0) {
        /* translate segments to iovs and combine with the extended header iov. */
        mpi_errno = MPIDI_CH3_SendNoncontig_iov(vc, sreq, header, hdr_sz,
                                                hdr_iov, n_hdr_iov);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        goto fn_exit;
    }

    if (!MPIDI_CH3I_Sendq_empty(MPIDI_CH3I_shm_sendq)) /* MT */
    {
        /* send queue is not empty, enqueue the request then check to
           see if we can send any now */

        MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER, TERSE, "enqueuing");

	sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
        sreq->ch.noncontig    = TRUE;
        sreq->ch.header_sz    = hdr_sz;
	sreq->ch.vc           = vc;

        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        mpi_errno = MPIDI_CH3I_Shm_send_progress();
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        goto fn_exit;
    }

    /* send as many cells of data as you can */
    MPID_nem_mpich_send_seg_header(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size,
                                   header, hdr_sz, vc, &again);
    while(!again && sreq->dev.segment_first < sreq->dev.segment_size)
        MPID_nem_mpich_send_seg(sreq->dev.segment_ptr, &sreq->dev.segment_first, sreq->dev.segment_size, vc, &again);

    if (again)
    {
        /* we didn't finish sending everything */
        sreq->ch.noncontig = TRUE;
        sreq->ch.vc = vc;
        if (sreq->dev.segment_first == orig_segment_first) /* nothing was sent, save header */
        {
            sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *)header;
            sreq->ch.header_sz    = hdr_sz;
        }
        else
        {
            /* part of message was sent, make this req an active send */
            MPIR_Assert(MPIDI_CH3I_shm_active_send == NULL);
            MPIDI_CH3I_shm_active_send = sreq;
        }
        MPIDI_CH3I_Sendq_enqueue(&MPIDI_CH3I_shm_sendq, sreq);
        goto fn_exit;
    }

    /* finished sending all data, complete the request */
    if (!sreq->dev.OnDataAvail)
    {
        MPIR_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
        mpi_errno = MPID_Request_complete(sreq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
        MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }
    else
    {
        int complete = 0;
        mpi_errno = sreq->dev.OnDataAvail(vc, sreq, &complete);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        MPIR_Assert(complete); /* all data has been sent, we should always complete */

        MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete %d bytes", (int) (sreq->dev.segment_size));
    }

 fn_exit:
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_SENDNONCONTIG);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #18
0
int MPID_Cancel_send(MPIR_Request * sreq)
{
    MPIDI_VC_t * vc;
    int proto;
    int flag;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_SEND);
    
    MPIR_Assert(sreq->kind == MPIR_REQUEST_KIND__SEND);

    MPIDI_Request_cancel_pending(sreq, &flag);
    if (flag)
    {
	goto fn_exit;
    }

    /*
     * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer
     * and no pointer to the underlying communication
     * request.  For now, we simply fail to cancel the request.  In the future,
     * we should add a new request kind to indicate that
     * the request is a BSEND.  Then we can properly cancel the request, much 
     * in the way we do persistent requests.
     */
    if (sreq->comm == NULL)
    {
	goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc);

    proto = MPIDI_Request_get_msg_type(sreq);

    if (proto == MPIDI_REQUEST_SELF_MSG)
    {
	MPIR_Request * rreq;
	
	MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,
		     "attempting to cancel message sent to self");
	
	MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
	rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match);
	MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
	if (rreq)
	{
	    MPIR_Assert(rreq->dev.partner_request == sreq);
	    
	    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
             "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x",
						sreq->handle, rreq->handle));

            /* Pull the message out of the unexpected queue since it's
             * being cancelled.  The below request release drops one
             * reference.  We explicitly drop a second reference,
             * because the receive request will never be visible to
             * the user. */
            MPIR_Request_free(rreq);
            MPIR_Request_free(rreq);

	    MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);
            mpi_errno = MPID_Request_complete(sreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
	}
	else
	{
	    MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE);
	    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
               "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x",
						sreq->handle, rreq->handle));
	}
	
	goto fn_exit;
    }

    /* If the message went over a netmod and it provides a cancel_send
       function, call it here. */
#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->cancel_send)
    {
        mpi_errno = vc->comm_ops->cancel_send(vc, sreq);
        goto fn_exit;
    }
#endif

    /* Check to see if the send is still in the send queue.  If so, remove it, 
       mark the request and cancelled and complete, and
       release the device's reference to the request object.  
    */
    {
	int cancelled;
	
	if (proto == MPIDI_REQUEST_RNDV_MSG)
	{
	    MPIR_Request * rts_sreq;
	    /* The cancellation of the RTS request needs to be atomic through 
	       the destruction of the RTS request to avoid
               conflict with release of the RTS request if the CTS is received
	       (see handling of a rendezvous CTS packet in
               MPIDI_CH3U_Handle_recv_pkt()).  
	       MPID_Request_fetch_and_clear_rts_sreq() is used to gurantee 
	       that atomicity. */
	    MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq);
	    if (rts_sreq != NULL) 
	    {
		cancelled = FALSE;
		
		/* since we attempted to cancel a RTS request, then we are 
		   responsible for releasing that request */
		MPIR_Request_free(rts_sreq);

		/* --BEGIN ERROR HANDLING-- */
		if (mpi_errno != MPI_SUCCESS)
		{
		    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
						     "**ch3|cancelrndv", 0);
		    goto fn_exit;
		}
		/* --END ERROR HANDLING-- */
		
		if (cancelled)
		{
		    MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);
		    /* no other thread should be waiting on sreq, so it is 
		       safe to reset ref_count and cc */
                    MPIR_cc_set(&sreq->cc, 0);
                    /* FIXME should be a decr and assert, not a set */
		    MPIR_Object_set_ref(sreq, 1);
		    goto fn_exit;
		}
	    }
	}
	else
	{
	    cancelled = FALSE;
	    if (cancelled)
	    {
		MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);
		/* no other thread should be waiting on sreq, so it is safe to 
		   reset ref_count and cc */
                MPIR_cc_set(&sreq->cc, 0);
                /* FIXME should be a decr and assert, not a set */
		MPIR_Object_set_ref(sreq, 1);
		goto fn_exit;
	    }
	}
    }

    /* Part or all of the message has already been sent, so we need to send a 
       cancellation request to the receiver in an attempt
       to catch the message before it is matched. */
    {
	int was_incomplete;
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_cancel_send_req_t * const csr_pkt = &upkt.cancel_send_req;
	MPIR_Request * csr_sreq;
	
	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
              "sending cancel request to %d for 0x%08x", 
	      sreq->dev.match.parts.rank, sreq->handle));
	
	/* The completion counter and reference count are incremented to keep 
	   the request around long enough to receive a
	   response regardless of what the user does (free the request before 
	   waiting, etc.). */
	MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
	if (!was_incomplete)
	{
	    /* The reference count is incremented only if the request was 
	       complete before the increment. */
	    MPIR_Request_add_ref( sreq );
	}

	MPIDI_Pkt_init(csr_pkt, MPIDI_CH3_PKT_CANCEL_SEND_REQ);
	csr_pkt->match.parts.rank = sreq->comm->rank;
	csr_pkt->match.parts.tag = sreq->dev.match.parts.tag;
	csr_pkt->match.parts.context_id = sreq->dev.match.parts.context_id;
	csr_pkt->sender_req_id = sreq->handle;
	
	MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, csr_pkt, sizeof(*csr_pkt), &csr_sreq);
	MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|cancelreq");
	}
	if (csr_sreq != NULL)
	{
	    MPIR_Request_free(csr_sreq);
	}
    }
    
    /* FIXME: if send cancellation packets are allowed to arrive out-of-order 
       with respect to send packets, then we need to
       timestamp send and cancel packets to insure that a cancellation request 
       does not bypass the send packet to be cancelled
       and erroneously cancel a previously sent message with the same request 
       handle. */
    /* FIXME: A timestamp is more than is necessary; a message sequence number
       should be adequate. */
 fn_fail:
 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_SEND);
    return mpi_errno;
}
Beispiel #19
0
int MPIDI_CH3_PktHandler_EagerSyncSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
					intptr_t *buflen, MPIR_Request **rreqp )
{
    MPIDI_CH3_Pkt_eager_send_t * es_pkt = &pkt->eager_send;
    MPIR_Request * rreq;
    int found;
    int complete;
    char *data_buf;
    intptr_t data_len;
    int mpi_errno = MPI_SUCCESS;
    
    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
     "received eager sync send pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d",
	      es_pkt->sender_req_id, es_pkt->match.parts.rank, 
	      es_pkt->match.parts.tag, 
              es_pkt->match.parts.context_id));
    MPL_DBG_MSGPKT(vc,es_pkt->match.parts.tag,es_pkt->match.parts.context_id,
		    es_pkt->match.parts.rank,es_pkt->data_sz,
		    "ReceivedEagerSync");
	    
    rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&es_pkt->match, &found);
    MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp());

    /* If the completion counter is 0, that means that the communicator to
     * which this message is being sent has been revoked and we shouldn't
     * bother finishing this. */
    if (!found && MPIR_cc_get(rreq->cc) == 0) {
        *rreqp = NULL;
        goto fn_fail;
    }
    
    set_request_info(rreq, es_pkt, MPIDI_REQUEST_EAGER_MSG);

    data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= rreq->dev.recv_data_sz)
                ? rreq->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t));
    data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
    
    if (found)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack;
	MPIR_Request * esa_req;

	if (rreq->dev.recv_data_sz == 0) {
            *buflen = sizeof(MPIDI_CH3_Pkt_t);
            mpi_errno = MPID_Request_complete(rreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
	    *rreqp = NULL;
	}
	else {
	    mpi_errno = MPIDI_CH3U_Receive_data_found( rreq, data_buf,
                                                       &data_len, &complete );
	    if (mpi_errno != MPI_SUCCESS) {
		MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv",
		    "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND");
	    }

            *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

            if (complete) 
            {
                mpi_errno = MPID_Request_complete(rreq);
                if (mpi_errno != MPI_SUCCESS) {
                    MPIR_ERR_POP(mpi_errno);
                }
                *rreqp = NULL;
            }
            else
            {
                *rreqp = rreq;
            }
	}
	MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack");
	
	MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK);
	esa_pkt->sender_req_id = rreq->dev.sender_req_id;
	/* Because this is a packet handler, it is already within a CH3 CS */
	/* MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); */
	mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req);
	/* MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); */
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				"**ch3|syncack");
	}
	if (esa_req != NULL) {
	    MPIR_Request_free(esa_req);
	}
    }
    else
    {
	if (rreq->dev.recv_data_sz == 0) {
            *buflen = sizeof(MPIDI_CH3_Pkt_t);
            mpi_errno = MPID_Request_complete(rreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
	    *rreqp = NULL;
	}
	else {
	    mpi_errno = MPIDI_CH3U_Receive_data_unexpected( rreq, data_buf,
                                                            &data_len, &complete );
	    if (mpi_errno != MPI_SUCCESS) {
		MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv",
		    "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND");
	    }

            *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

            if (complete) 
            {
                mpi_errno = MPID_Request_complete(rreq);
                if (mpi_errno != MPI_SUCCESS) {
                    MPIR_ERR_POP(mpi_errno);
                }
                *rreqp = NULL;
            }
            else
            {
                *rreqp = rreq;
            }
	}
	MPIDI_Request_set_sync_send_flag(rreq, TRUE);
    }
 fn_fail:
    return mpi_errno;
}
Beispiel #20
0
static int MPIDI_CH3I_Progress_handle_sock_event(MPIDI_CH3I_Sock_event_t * event)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);

    MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE,"Socket event of type %d", event->op_type );

    switch (event->op_type)
    {
	case MPIDI_CH3I_SOCK_OP_READ:
	{
	    MPIDI_CH3I_Connection_t * conn = 
		(MPIDI_CH3I_Connection_t *) event->user_ptr;
            /* If we have a READ event on a discarded connection, we probably have
               an error on this connection, if the remote side is closed due to
               MPI_Finalize. Since the connection is discareded (and therefore not needed)
               it can be closed and the error can be ignored */
            if(conn->state == CONN_STATE_DISCARD){
                MPIDI_CH3_Sockconn_handle_close_event(conn);
                break;
            }
		
	    MPIR_Request * rreq = conn->recv_active;

	    /* --BEGIN ERROR HANDLING-- */
	    if (event->error != MPI_SUCCESS)
	    {
		/* FIXME: the following should be handled by the close 
		   protocol */
		if (MPIR_ERR_GET_CLASS(event->error) != MPIDI_CH3I_SOCK_ERR_CONN_CLOSED) {
		    mpi_errno = event->error;
		    MPIR_ERR_POP(mpi_errno);
		}		    
		break;
	    }
	    /* --END ERROR HANDLING-- */
		
	    if (conn->state == CONN_STATE_CONNECTED)
	    {
		if (conn->recv_active == NULL)
		{
                    intptr_t buflen = sizeof (MPIDI_CH3_Pkt_t);
		    MPIR_Assert(conn->pkt.type < MPIDI_CH3_PKT_END_CH3);
                    
		    mpi_errno = pktArray[conn->pkt.type]( conn->vc, &conn->pkt,
							  &buflen, &rreq );
		    if (mpi_errno != MPI_SUCCESS) {
			MPIR_ERR_POP(mpi_errno);
		    }
                    MPIR_Assert(buflen == sizeof (MPIDI_CH3_Pkt_t));

		    if (rreq == NULL)
		    {
			if (conn->state != CONN_STATE_CLOSING)
			{
			    /* conn->recv_active = NULL;  -- 
			       already set to NULL */
			    mpi_errno = connection_post_recv_pkt(conn);
			    if (mpi_errno != MPI_SUCCESS) {
				MPIR_ERR_POP(mpi_errno);
			    }
			}
		    }
		    else
		    {
			mpi_errno = ReadMoreData( conn, rreq );
			if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
		    }
		}
		else /* incoming data */
		{
		    int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
		    int complete;

		    reqFn = rreq->dev.OnDataAvail;
		    if (!reqFn) {
			MPIR_Assert(MPIDI_Request_get_type(rreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                        mpi_errno = MPID_Request_complete(rreq);
                        if (mpi_errno != MPI_SUCCESS) {
                            MPIR_ERR_POP(mpi_errno);
                        }
			complete = TRUE;
		    }
		    else {
			mpi_errno = reqFn( conn->vc, rreq, &complete );
			if (mpi_errno) MPIR_ERR_POP(mpi_errno);
		    }
			
		    if (complete)
		    {
			conn->recv_active = NULL;
			mpi_errno = connection_post_recv_pkt(conn);
			if (mpi_errno != MPI_SUCCESS) {
			    MPIR_ERR_POP(mpi_errno);
			}
		    }
		    else /* more data to be read */
		    {
			mpi_errno = ReadMoreData( conn, rreq );
			if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
		    }
		}
	    }
	    else if (conn->state == CONN_STATE_OPEN_LRECV_DATA)
	    {
		mpi_errno = MPIDI_CH3_Sockconn_handle_connopen_event( conn );
		if (mpi_errno) { MPIR_ERR_POP( mpi_errno ); }
	    }
	    else /* Handling some internal connection establishment or 
		    tear down packet */
	    { 
		mpi_errno = MPIDI_CH3_Sockconn_handle_conn_event( conn );
		if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
	    }
	    break;
	}

	/* END OF SOCK_OP_READ */

	case MPIDI_CH3I_SOCK_OP_WRITE:
	{
	    MPIDI_CH3I_Connection_t * conn = 
		(MPIDI_CH3I_Connection_t *) event->user_ptr;
	    /* --BEGIN ERROR HANDLING-- */
	    if (event->error != MPI_SUCCESS) {
		mpi_errno = event->error;
		MPIR_ERR_POP(mpi_errno);
	    }
	    /* --END ERROR HANDLING-- */
		
	    if (conn->send_active)
	    {
		MPIR_Request * sreq = conn->send_active;
		int (*reqFn)(MPIDI_VC_t *, MPIR_Request *, int *);
		int complete;

		reqFn = sreq->dev.OnDataAvail;
		if (!reqFn) {
		    MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                    mpi_errno = MPID_Request_complete(sreq);
                    if (mpi_errno != MPI_SUCCESS) {
                        MPIR_ERR_POP(mpi_errno);
                    }

		    complete = TRUE;
		}
		else {
		    mpi_errno = reqFn( conn->vc, sreq, &complete );
		    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
		}
		    
		if (complete)
		{
		    mpi_errno = connection_pop_sendq_req(conn);
		    if (mpi_errno != MPI_SUCCESS) {
			MPIR_ERR_POP(mpi_errno);
		    }
		}
		else /* more data to send */
		{
		    for(;;)
		    {
			MPL_IOV * iovp;
			size_t nb;
				
			iovp = sreq->dev.iov;
			    
			mpi_errno = MPIDI_CH3I_Sock_writev(conn->sock, iovp, sreq->dev.iov_count, &nb);
			/* --BEGIN ERROR HANDLING-- */
			if (mpi_errno != MPI_SUCCESS)
			{
			    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER,
							     "**ch3|sock|immedwrite", "ch3|sock|immedwrite %p %p %p",
							     sreq, conn, conn->vc);
			    goto fn_fail;
			}
			/* --END ERROR HANDLING-- */

			MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
       (MPL_DBG_FDEST,"immediate writev, vc=%p, sreq=0x%08x, nb=%" PRIdPTR,
	conn->vc, sreq->handle, nb));
			    
			if (nb > 0 && adjust_iov(&iovp, &sreq->dev.iov_count, nb))
			{
			    reqFn = sreq->dev.OnDataAvail;
			    if (!reqFn) {
				MPIR_Assert(MPIDI_Request_get_type(sreq)!=MPIDI_REQUEST_TYPE_GET_RESP);
                                mpi_errno = MPID_Request_complete(sreq);
                                if (mpi_errno != MPI_SUCCESS) {
                                    MPIR_ERR_POP(mpi_errno);
                                }
				complete = TRUE;
			    }
			    else {
				mpi_errno = reqFn( conn->vc, sreq, &complete );
				if (mpi_errno) MPIR_ERR_POP(mpi_errno);
			    }
			    if (complete)
			    {
				mpi_errno = connection_pop_sendq_req(conn);
				if (mpi_errno != MPI_SUCCESS) {
				    MPIR_ERR_POP(mpi_errno);
				}
				break;
			    }
			}
			else
			{
			    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL,VERBOSE,
       (MPL_DBG_FDEST,"posting writev, vc=%p, conn=%p, sreq=0x%08x",
	conn->vc, conn, sreq->handle));
			    mpi_errno = MPIDI_CH3I_Sock_post_writev(conn->sock, iovp, sreq->dev.iov_count, NULL);
			    /* --BEGIN ERROR HANDLING-- */
			    if (mpi_errno != MPI_SUCCESS)
			    {
				mpi_errno = MPIR_Err_create_code(
				    mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|sock|postwrite",
				    "ch3|sock|postwrite %p %p %p", sreq, conn, conn->vc);
				goto fn_fail;
			    }
			    /* --END ERROR HANDLING-- */

			    break;
			}
		    }
		}
	    }
	    else /* finished writing internal packet header */
	    {
		/* the connection is not active yet */
		mpi_errno = MPIDI_CH3_Sockconn_handle_connwrite( conn );
		if (mpi_errno) { MPIR_ERR_POP( mpi_errno ); }
	    }
	    break;
	}
	/* END OF SOCK_OP_WRITE */

	case MPIDI_CH3I_SOCK_OP_ACCEPT:
	{
	    mpi_errno = MPIDI_CH3_Sockconn_handle_accept_event();
	    if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
	    break;
	}
	    
	case MPIDI_CH3I_SOCK_OP_CONNECT:
	{
	    mpi_errno = MPIDI_CH3_Sockconn_handle_connect_event( 
				(MPIDI_CH3I_Connection_t *) event->user_ptr,
				event->error );
	    if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
	    break;
	}
	    
	case MPIDI_CH3I_SOCK_OP_CLOSE:
	{
	    mpi_errno = MPIDI_CH3_Sockconn_handle_close_event( 
			      (MPIDI_CH3I_Connection_t *) event->user_ptr );
	    if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
	    break;
	}

	case MPIDI_CH3I_SOCK_OP_WAKEUP:
	{
	    MPIDI_CH3_Progress_signal_completion();
	    /* MPIDI_CH3I_progress_completion_count++; */
	    break;
	}
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_HANDLE_SOCK_EVENT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #21
0
int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
                              MPI_Datatype origin_datatype, void *result_addr, int result_count,
                              MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
                              int target_count, MPI_Datatype target_datatype, MPI_Op op,
                              MPID_Win * win_ptr, MPID_Request * ureq)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t orig_data_sz, target_data_sz;
    int rank;
    int dt_contig ATTRIBUTE((unused));
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    MPID_Datatype *dtp;
    MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
    int made_progress = 0;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);

    MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

    MPIDI_Datatype_get_info(target_count, target_datatype, dt_contig, target_data_sz, dtp,
                            dt_true_lb);

    if (target_data_sz == 0) {
        goto fn_exit;
    }

    rank = win_ptr->comm_ptr->rank;

    if (win_ptr->shm_allocated == TRUE && target_rank != rank &&
        win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
        /* check if target is local and shared memory is allocated on window,
         * if so, we directly perform this operation on shared memory region. */

        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
         * the same node. However, in ch3:sock, even if origin and target are on the same node, they do
         * not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
         * which is only set to TRUE when SHM region is allocated in nemesis.
         * In future we need to figure out a way to check if origin and target are in the same "SHM comm".
         */
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
    }

    /* Do =! rank first (most likely branch?) */
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
        mpi_errno = MPIDI_CH3I_Shm_get_acc_op(origin_addr, origin_count, origin_datatype,
                                              result_addr, result_count, result_datatype,
                                              target_rank, target_disp, target_count,
                                              target_datatype, op, win_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        if (ureq) {
            /* Complete user request and release the ch3 ref */
            mpi_errno = MPID_Request_complete(ureq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
        }
    }
    else {
        MPIDI_RMA_Op_t *op_ptr = NULL;
        MPIDI_CH3_Pkt_get_accum_t *get_accum_pkt;
        MPI_Aint origin_type_size;
        MPI_Aint target_type_size;
        int use_immed_pkt = FALSE, i;
        int is_origin_contig, is_target_contig, is_result_contig;
        MPI_Aint stream_elem_count, stream_unit_count;
        MPI_Aint predefined_dtp_size, predefined_dtp_count, predefined_dtp_extent;
        MPID_Datatype *origin_dtp = NULL, *target_dtp = NULL, *result_dtp = NULL;
        int is_empty_origin = FALSE;

        /* Judge if origin buffer is empty */
        if (op == MPI_NO_OP)
            is_empty_origin = TRUE;

        /* Append the operation to the window's RMA ops queue */
        mpi_errno = MPIDI_CH3I_Win_get_op(win_ptr, &op_ptr);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        /* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */

        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);

        /******************** Setting operation struct areas ***********************/

        op_ptr->origin_addr = (void *) origin_addr;
        op_ptr->origin_count = origin_count;
        op_ptr->origin_datatype = origin_datatype;
        op_ptr->result_addr = result_addr;
        op_ptr->result_count = result_count;
        op_ptr->result_datatype = result_datatype;
        op_ptr->target_rank = target_rank;

        /* Remember user request */
        op_ptr->ureq = ureq;

        /* if source or target datatypes are derived, increment their
         * reference counts */
        if (is_empty_origin == FALSE && !MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
            MPID_Datatype_get_ptr(origin_datatype, origin_dtp);
        }
        if (!MPIR_DATATYPE_IS_PREDEFINED(result_datatype)) {
            MPID_Datatype_get_ptr(result_datatype, result_dtp);
        }
        if (!MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            MPID_Datatype_get_ptr(target_datatype, target_dtp);
        }

        if (is_empty_origin == FALSE) {
            MPID_Datatype_get_size_macro(origin_datatype, origin_type_size);
            MPIU_Assign_trunc(orig_data_sz, origin_count * origin_type_size, MPIDI_msg_sz_t);
        }
        else {
            /* If origin buffer is empty, set origin data size to 0 */
            orig_data_sz = 0;
        }

        MPID_Datatype_get_size_macro(target_datatype, target_type_size);

        /* Get size and count for predefined datatype elements */
        if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            predefined_dtp_size = target_type_size;
            predefined_dtp_count = target_count;
            MPID_Datatype_get_extent_macro(target_datatype, predefined_dtp_extent);
        }
        else {
            MPIU_Assert(target_dtp->basic_type != MPI_DATATYPE_NULL);
            MPID_Datatype_get_size_macro(target_dtp->basic_type, predefined_dtp_size);
            predefined_dtp_count = target_data_sz / predefined_dtp_size;
            MPID_Datatype_get_extent_macro(target_dtp->basic_type, predefined_dtp_extent);
        }
        MPIU_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 &&
                    predefined_dtp_extent > 0);

        /* Calculate number of predefined elements in each stream unit, and
         * total number of stream units. */
        stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
        stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
        MPIU_Assert(stream_elem_count > 0 && stream_unit_count > 0);

        for (i = 0; i < stream_unit_count; i++) {
            if (origin_dtp != NULL) {
                MPID_Datatype_add_ref(origin_dtp);
            }
            if (target_dtp != NULL) {
                MPID_Datatype_add_ref(target_dtp);
            }
            if (result_dtp != NULL) {
                MPID_Datatype_add_ref(result_dtp);
            }
        }

        if (is_empty_origin == FALSE) {
            MPID_Datatype_is_contig(origin_datatype, &is_origin_contig);
        }
        else {
            /* If origin buffer is empty, mark origin data as contig data */
            is_origin_contig = 1;
        }
        MPID_Datatype_is_contig(target_datatype, &is_target_contig);
        MPID_Datatype_is_contig(result_datatype, &is_result_contig);

        /* Judge if we can use IMMED data packet */
        if ((is_empty_origin == TRUE || MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) &&
            MPIR_DATATYPE_IS_PREDEFINED(result_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype) &&
            is_origin_contig && is_target_contig && is_result_contig) {
            if (target_data_sz <= MPIDI_RMA_IMMED_BYTES)
                use_immed_pkt = TRUE;
        }

        /* Judge if this operation is a piggyback candidate */
        if ((is_empty_origin == TRUE || MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) &&
            MPIR_DATATYPE_IS_PREDEFINED(result_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            /* FIXME: currently we only piggyback LOCK flag with op using predefined datatypes
             * for origin, target and result data. We should extend this optimization to derived
             * datatypes as well. */
            if (orig_data_sz <= MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE)
                op_ptr->piggyback_lock_candidate = 1;
        }

        /************** Setting packet struct areas in operation ****************/

        get_accum_pkt = &(op_ptr->pkt.get_accum);

        if (use_immed_pkt) {
            MPIDI_Pkt_init(get_accum_pkt, MPIDI_CH3_PKT_GET_ACCUM_IMMED);
        }
        else {
            MPIDI_Pkt_init(get_accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
        }

        get_accum_pkt->addr = (char *) win_ptr->basic_info_table[target_rank].base_addr +
            win_ptr->basic_info_table[target_rank].disp_unit * target_disp;
        get_accum_pkt->count = target_count;
        get_accum_pkt->datatype = target_datatype;
        get_accum_pkt->info.dataloop_size = 0;
        get_accum_pkt->op = op;
        get_accum_pkt->target_win_handle = win_ptr->basic_info_table[target_rank].win_handle;
        get_accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
        if (use_immed_pkt) {
            void *src = (void *) origin_addr, *dest = (void *) (get_accum_pkt->info.data);
            mpi_errno = immed_copy(src, dest, orig_data_sz);
            if (mpi_errno != MPI_SUCCESS)
                MPIR_ERR_POP(mpi_errno);
        }

        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);

        mpi_errno = MPIDI_CH3I_Win_enqueue_op(win_ptr, op_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        if (MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD >= 0 &&
            MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
            while (MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
                mpi_errno = wait_progress_engine();
                if (mpi_errno != MPI_SUCCESS)
                    MPIR_ERR_POP(mpi_errno);
            }
        }
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Beispiel #22
0
int MPIDI_CH3_RecvFromSelf( MPID_Request *rreq, void *buf, MPI_Aint count,
			    MPI_Datatype datatype )
#endif
{
    MPID_Request * const sreq = rreq->partner_request;
    int mpi_errno = MPI_SUCCESS;

    if (sreq != NULL)
    {
	MPIDI_msg_sz_t data_sz;

#if defined(FINEGRAIN_MPI)
        /* FG: Zerocopy */
        void * buf = (void *) (*buf_handle);
        if ( MPIDI_Request_get_self_zerocopy_flag(sreq) && MPIDI_Request_get_self_zerocopy_flag(rreq) )
        {
            int rdt_contig;
            MPI_Aint rdt_true_lb;
            MPID_Datatype * rdt_ptr;

            /* Unexpected Send-Collocated MPIX_Zsend/Izsend - MPIX_Zrecv/Izrecv pairing */
            MPIU_Assert(NULL == rreq->dev.user_buf);
            *(rreq->dev.user_buf_handle) = (void*) (*(sreq->dev.user_buf_handle));

            MPIDI_Datatype_get_info(count, datatype, rdt_contig, data_sz, rdt_ptr, rdt_true_lb);

            /* MPIX_Zsend buf_handle can't be set to NULL as we don't have
               a ptr to void **.  */
        }
        else if( MPIDI_Request_get_self_zerocopy_flag(sreq) && !MPIDI_Request_get_self_zerocopy_flag(rreq) ){
            /* Unexpected Send-Collocated MPIX_Zsend/Izsend<=>MPI_Recv/Irecv pairing. Freeing sender buffer */
            MPIDI_CH3U_Buffer_copy(*(sreq->dev.user_buf_handle), sreq->dev.user_count,
                                   sreq->dev.datatype, &sreq->status.MPI_ERROR,
                                   buf, count, datatype, &data_sz,
                                   &rreq->status.MPI_ERROR);

            /* Free the sender's buffer */
            MPIU_Free(*(sreq->dev.user_buf_handle));
        }
        else if( !MPIDI_Request_get_self_zerocopy_flag(sreq) && MPIDI_Request_get_self_zerocopy_flag(rreq) ){
            /* Unexpected Send-Collocated MPI_Send/Isend - MPIX_Zrecv/Izrecv pairing. Allocating receiver's buffer. */
            MPIU_Assert(NULL == rreq->dev.user_buf);
            /* Added checks for buffer count size as is done in MPIDI_CH3U_Buffer_copy() */
            MPIDI_CH3U_Buffer_allocate(sreq->dev.user_buf, sreq->dev.user_count,
                                       sreq->dev.datatype, &sreq->status.MPI_ERROR,
                                       rreq->dev.user_buf_handle, rreq->dev.user_count,
                                       rreq->dev.datatype, &data_sz, &rreq->status.MPI_ERROR);
            MPIDI_CH3U_Buffer_copy(sreq->dev.user_buf, sreq->dev.user_count,
                                   sreq->dev.datatype, &sreq->status.MPI_ERROR,
                                   *(rreq->dev.user_buf_handle), rreq->dev.user_count,
                                   rreq->dev.datatype, &data_sz, &rreq->status.MPI_ERROR);
        } else {
            /* Unexpected Send-Collocated MPI_Send/Isend - MPI_Recv/Irecv pairing */
#endif /* matches #if defined(FINEGRAIN_MPI) */

	MPIDI_CH3U_Buffer_copy(sreq->dev.user_buf, sreq->dev.user_count,
			       sreq->dev.datatype, &sreq->status.MPI_ERROR,
			       buf, count, datatype, &data_sz, 
			       &rreq->status.MPI_ERROR);
#if defined(FINEGRAIN_MPI)
        }
#endif

	MPIR_STATUS_SET_COUNT(rreq->status, data_sz);
	mpi_errno = MPID_Request_complete(sreq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }
    else
    {
	/* The sreq is missing which means an error occurred.  
	   rreq->status.MPI_ERROR should have been set when the
	   error was detected. */
    }
    
    /* no other thread can possibly be waiting on rreq, so it is safe to 
       reset ref_count and cc */
    mpi_errno = MPID_Request_complete(rreq);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}