Ejemplo n.º 1
0
static void dequeue_req(const ptl_event_t *e)
{
    int found;
    MPID_Request *const rreq = e->user_ptr;
    MPI_Aint s_len, r_len;

    /* At this point we know the ME is unlinked. Invalidate the handle to
       prevent further accesses, e.g. an attempted cancel. */
    REQ_PTL(rreq)->put_me = PTL_INVALID_HANDLE;

    found = MPIDI_CH3U_Recvq_DP(rreq);
    /* an MPI_ANY_SOURCE request may have been previously removed from the
       CH3 queue by an FDP (find and dequeue posted) operation */
    if (rreq->dev.match.parts.rank != MPI_ANY_SOURCE)
        MPIU_Assert(found);

    rreq->status.MPI_ERROR = MPI_SUCCESS;
    rreq->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
    rreq->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);

    MPID_Datatype_get_size_macro(rreq->dev.datatype, r_len);
    r_len *= rreq->dev.user_count;

    s_len = NPTL_HEADER_GET_LENGTH(e->hdr_data);

    if (s_len > r_len) {
        /* truncated data */
        MPIR_STATUS_SET_COUNT(rreq->status, r_len);
        MPIR_ERR_SET2(rreq->status.MPI_ERROR, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", s_len, r_len);
    } else {
        MPIR_STATUS_SET_COUNT(rreq->status, s_len);
    }
}
Ejemplo n.º 2
0
static void _mxm_recv_completion_cb(void *context)
{
    MPID_Request *req = (MPID_Request *) context;
    mxm_recv_req_t *mxm_rreq;
    MPID_nem_mxm_req_area *req_area = NULL;

    MPIU_Assert(req);
    _dbg_mxm_out_req(req);

    req_area = REQ_BASE(req);
    _mxm_to_mpi_status(req_area->mxm_req->item.base.error, &req->status);

    mxm_rreq = &req_area->mxm_req->item.recv;
    req->status.MPI_TAG = _mxm_tag_mxm2mpi(mxm_rreq->completion.sender_tag);
    req->status.MPI_SOURCE = mxm_rreq->completion.sender_imm;
    req->dev.recv_data_sz = mxm_rreq->completion.actual_len;
    MPIR_STATUS_SET_COUNT(req->status, req->dev.recv_data_sz);

    if (req->ch.vc) {
        MPID_nem_mxm_vc_area *vc_area = VC_BASE(req->ch.vc);
        list_enqueue(&vc_area->mxm_ep->free_queue, &req_area->mxm_req->queue);
    }
    else {
        list_enqueue(&mxm_obj->free_queue, &req_area->mxm_req->queue);
    }

    _dbg_mxm_output(5, "========> %s RECV req %p status %d\n",
                    (MPIR_STATUS_GET_CANCEL_BIT(req->status) ? "Canceling" : "Completing"),
                    req, req->status.MPI_ERROR);

    if (likely(!MPIR_STATUS_GET_CANCEL_BIT(req->status))) {
        _mxm_handle_rreq(req);
    }
}
Ejemplo n.º 3
0
static int handle_probe(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const req = e->user_ptr;
    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);

    if (e->ni_fail_type == PTL_NI_NO_MATCH) {
        REQ_PTL(req)->found = FALSE;
        goto finish_probe;
    }

    REQ_PTL(req)->found = TRUE;
    req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
    req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
    MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data));

 finish_probe:
    mpi_errno = MPID_Request_complete(req);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Ejemplo n.º 4
0
static int search_complete(uint64_t tag, size_t msglen, MPID_Request * rreq)
{
    int mpi_errno = MPI_SUCCESS;
    BEGIN_FUNC(FCNAME);
    rreq->status.MPI_SOURCE = get_source(tag);
    rreq->status.MPI_TAG = get_tag(tag);
    rreq->status.MPI_ERROR = MPI_SUCCESS;
    MPIR_STATUS_SET_COUNT(rreq->status, msglen);
    END_FUNC(FCNAME);
    return mpi_errno;
}
Ejemplo n.º 5
0
static int handle_mprobe(const ptl_event_t *e)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *const req = e->user_ptr;
    MPIU_CHKPMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE);

    MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE);

    if (e->ni_fail_type == PTL_NI_NO_MATCH) {
        REQ_PTL(req)->found = FALSE;
        goto finish_mprobe;
    }

    REQ_PTL(req)->found = TRUE;
    req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits);
    req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits);
    MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data));
    MPIDI_Request_set_sync_send_flag(req, e->hdr_data & NPTL_SSEND);

    MPIU_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf");
    MPIU_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength);
    req->dev.recv_data_sz = e->mlength;

    if (!(e->hdr_data & NPTL_LARGE)) {
        MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_EAGER_MSG);
    }
    else {
        MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD);
        req->dev.match.parts.tag = req->status.MPI_TAG;
        req->dev.match.parts.context_id = NPTL_MATCH_GET_CTX(e->match_bits);
        req->dev.match.parts.rank = req->status.MPI_SOURCE;
        MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_RNDV_MSG);
    }

    /* At this point we know the ME is unlinked. Invalidate the handle to
       prevent further accesses, e.g. an attempted cancel. */
    REQ_PTL(req)->put_me = PTL_INVALID_HANDLE;
    req->dev.recv_pending_count = 1;

  finish_mprobe:
    mpi_errno = MPID_Request_complete(req);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

  fn_exit:
    MPIU_CHKPMEM_COMMIT();
    MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE);
    return mpi_errno;
 fn_fail:
    MPIU_CHKPMEM_REAP();
    goto fn_exit;
}
Ejemplo n.º 6
0
int MPIO_Waitany(int count, MPIO_Request requests[], int *index, 
		 MPI_Status *status)
{
    int i, flag, err; 
    MPID_THREADPRIV_DECL;

    ROMIO_THREAD_CS_ENTER();

    if (count == 1) {
	err = MPIO_Wait( requests, status );
	if (!err) *index = 0;
	goto fn_exit;
    }

    /* Check for no active requests */
    for (i=0; i<count; i++) {
	if (requests[i] != MPIO_REQUEST_NULL) {
	    break;
	}
    }
    if (i == count) {
	*index = MPI_UNDEFINED;
#ifdef MPICH
	/* need to set empty status */
	if (status != MPI_STATUS_IGNORE) {
	    status->MPI_SOURCE = MPI_ANY_SOURCE;
	    status->MPI_TAG    = MPI_ANY_TAG;
            MPIR_STATUS_SET_COUNT(*status, 0);
            MPIR_STATUS_SET_CANCEL_BIT(*status, 0);
	}
#endif
	err = MPI_SUCCESS;
	goto fn_exit;
    }

    err = MPI_SUCCESS;
    do {
	flag = 0;
	for (i=0; i<count; i++) {
	    if (requests[i] != MPIO_REQUEST_NULL) {
		err = MPIO_Test( &requests[i], &flag, status );
		if (flag) {
		    if (!err) *index = i;
		    break;
		}
	    }
	}
    } while (flag == 0);

fn_exit:
    ROMIO_THREAD_CS_EXIT();

    return err;
}
Ejemplo n.º 7
0
int MPID_nem_ib_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq)
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV);
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV);

    dprintf("lmt_done_recv,enter,rreq=%p,head=%p\n", rreq, MPID_nem_ib_lmtq.head);


    int is_contig;
    MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig);
    if (!is_contig) {
        dprintf("lmt_done_recv,copying noncontiguous data to user buffer\n");

        /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */
        /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */
        MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz;
        MPID_Segment seg;
        MPI_Aint last;

        MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0);
        last = unpack_sz;
        MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf));
        if (last != unpack_sz) {
            /* --BEGIN ERROR HANDLING-- */
            /* received data was not entirely consumed by unpack()
             * because too few bytes remained to fill the next basic
             * datatype */
            MPIR_STATUS_SET_COUNT(rreq->status, last);
            rreq->status.MPI_ERROR =
                MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                     MPI_ERR_TYPE, "**MPID_nem_ib_lmt_done_recv", 0);
            /* --END ERROR HANDLING-- */
        }

        //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf));
        MPID_nem_ib_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t) rreq->ch.lmt_data_sz);
    }

    dprintf("lmt_done_recv,1,req=%p,pcc=%d\n", rreq, MPIDI_CH3I_progress_completion_count.v);
    MPIDI_CH3U_Request_complete(rreq);
    dprintf("lmt_done_recv,complete,req=%p\n", rreq);
    dprintf("lmt_done_recv,2,pcc=%d\n", MPIDI_CH3I_progress_completion_count.v);

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV);
    return mpi_errno;
    //fn_fail:
    goto fn_exit;
}
Ejemplo n.º 8
0
int MPID_Cancel_recv(MPIR_Request * rreq)
{
    int netmod_cancelled = TRUE;
    int mpi_errno = MPI_SUCCESS;

    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_RECV);
    
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_RECV);
    
    MPIR_Assert(rreq->kind == MPIR_REQUEST_KIND__RECV);
    
    /* If the netmod has its own cancel_recv function, we need to call
       it here. ANYSOURCE cancels (netmod and otherwise) are handled by
       MPIDI_CH3U_Recvq_DP below. */
#ifdef ENABLE_COMM_OVERRIDES
    if (rreq->dev.match.parts.rank != MPI_ANY_SOURCE)
    {
        MPIDI_VC_t *vc;
        MPIDI_Comm_get_vc_set_active(rreq->comm, rreq->dev.match.parts.rank, &vc);
        if (vc->comm_ops && vc->comm_ops->cancel_recv)
            netmod_cancelled = !vc->comm_ops->cancel_recv(NULL, rreq);
    }
#endif

    if (netmod_cancelled && MPIDI_CH3U_Recvq_DP(rreq))
    {
	MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE,
		       "request 0x%08x cancelled", rreq->handle);
        MPIR_STATUS_SET_CANCEL_BIT(rreq->status, TRUE);
        MPIR_STATUS_SET_COUNT(rreq->status, 0);
        mpi_errno = MPID_Request_complete(rreq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }
    else
    {
	MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE,
	    "request 0x%08x already matched, unable to cancel", rreq->handle);
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_RECV);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Ejemplo n.º 9
0
static int tsearch_callback(cq_tagged_entry_t * wc, MPID_Request * rreq)
{
    int mpi_errno = MPI_SUCCESS;
    BEGIN_FUNC(FCNAME);
    if (wc->data) {
        REQ_OFI(rreq)->match_state = TSEARCH_FOUND;
        rreq->status.MPI_SOURCE = get_source(wc->tag);
        rreq->status.MPI_TAG = get_tag(wc->tag);
        MPIR_STATUS_SET_COUNT(rreq->status, wc->len);
        rreq->status.MPI_ERROR = MPI_SUCCESS;
    }
    else {
        REQ_OFI(rreq)->match_state = TSEARCH_NOT_FOUND;
    }
    END_FUNC(FCNAME);
    return mpi_errno;
}
Ejemplo n.º 10
0
static int
ADD_SUFFIX(peek_callback)(cq_tagged_entry_t * wc, MPIR_Request * rreq)
{
    int mpi_errno = MPI_SUCCESS;
    BEGIN_FUNC(FCNAME);
    REQ_OFI(rreq)->match_state = PEEK_FOUND;
#if API_SET == API_SET_1
    rreq->status.MPI_SOURCE    = get_source(wc->tag);
#elif API_SET == API_SET_2
    rreq->status.MPI_SOURCE    = wc->data;
#endif
    rreq->status.MPI_TAG       = get_tag(wc->tag);
    MPIR_STATUS_SET_COUNT(rreq->status, wc->len);
    rreq->status.MPI_ERROR     = MPI_SUCCESS;
    END_FUNC(FCNAME);
    return mpi_errno;
}
Ejemplo n.º 11
0
int MPIO_Testall(int count, MPIO_Request requests[], int *flag,
		 MPI_Status statuses[])
{
    int done, i, err; 

    ROMIO_THREAD_CS_ENTER();
    if (count == 1)  {
	    err = MPIO_Test( requests, flag, statuses );
	    goto fn_exit;
    }

    /* This is actually very difficult to do.  We can't use MPIO_Test, 
       since we must change the requests only if *ALL* requests are complete
    */
    /* FIXME: THIS IS NOT CORRECT (see above).  But most applications won't 
     care */
    done = 1;
    for (i=0; i<count; i++) {
      if (requests[i] != MPIO_REQUEST_NULL) {
	err = MPIO_Test( &requests[i], flag, &statuses[i] );
	if (!*flag) done = 0;
	if (err) goto fn_exit;
      }
      else {
#ifdef MPICH
	  /* need to set empty status */
	  if (statuses != MPI_STATUSES_IGNORE) {
	      statuses[i].MPI_SOURCE = MPI_ANY_SOURCE;
	      statuses[i].MPI_TAG    = MPI_ANY_TAG;
              MPIR_STATUS_SET_COUNT(statuses[i], 0);
              MPIR_STATUS_SET_CANCEL_BIT(statuses[i], 0);
	  }
#else
	  ;
#endif
      }
    }
    
    *flag = done;

    err = MPI_SUCCESS;
fn_exit:
    ROMIO_THREAD_CS_EXIT();
    return err;
}
Ejemplo n.º 12
0
int MPIR_Status_set_elements_x_impl(MPI_Status *status, MPI_Datatype datatype, MPI_Count count)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Count size_x;

    MPID_Datatype_get_size_macro(datatype, size_x);

    /* overflow check, should probably be a real error check? */
    if (count != 0) {
        MPIU_Assert(size_x >= 0 && count > 0);
        MPIU_Assert(count * size_x < MPIR_COUNT_MAX);
    }

    MPIR_STATUS_SET_COUNT(*status, size_x * count);

fn_exit:
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Ejemplo n.º 13
0
int MPIDI_CH3U_Request_unpack_srbuf(MPID_Request * rreq)
{
    MPI_Aint last;
    int tmpbuf_last;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
    
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);

    tmpbuf_last = (int)(rreq->dev.segment_first + rreq->dev.tmpbuf_sz);
    if (rreq->dev.segment_size < tmpbuf_last)
    {
	tmpbuf_last = (int)rreq->dev.segment_size;
    }
    last = tmpbuf_last;
    MPID_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, 
			&last, rreq->dev.tmpbuf);
    if (last == 0 || last == rreq->dev.segment_first)
    {
	/* --BEGIN ERROR HANDLING-- */
	/* If no data can be unpacked, then we have a datatype processing 
	   problem.  Adjust the segment info so that the remaining
	   data is received and thrown away. */
	MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first);
	rreq->dev.segment_size = rreq->dev.segment_first;
	rreq->dev.segment_first += tmpbuf_last;
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
		       "**dtypemismatch", 0);
	/* --END ERROR HANDLING-- */
    }
    else if (tmpbuf_last == rreq->dev.segment_size)
    {
	/* --BEGIN ERROR HANDLING-- */
	if (last != tmpbuf_last)
	{
	    /* received data was not entirely consumed by unpack() because too
	       few bytes remained to fill the next basic datatype.
	       Note: the segment_first field is set to segment_last so that if
	       this is a truncated message, extra data will be read
	       off the pipe. */
	    MPIR_STATUS_SET_COUNT(rreq->status, last);
	    rreq->dev.segment_size = last;
	    rreq->dev.segment_first = tmpbuf_last;
	    rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		  MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
							  "**dtypemismatch", 0);
	}
	/* --END ERROR HANDLING-- */
    }
    else
    {
	rreq->dev.tmpbuf_off = (int)(tmpbuf_last - last);
	if (rreq->dev.tmpbuf_off > 0)
	{
	    /* move any remaining data to the beginning of the buffer.  
	       Note: memmove() is used since the data regions could
               overlap. */
	    memmove(rreq->dev.tmpbuf, (char *) rreq->dev.tmpbuf + 
		    (last - rreq->dev.segment_first), rreq->dev.tmpbuf_off);
	}
	rreq->dev.segment_first = last;
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF);
    return mpi_errno;
}
Ejemplo n.º 14
0
MPID_Request * MPID_Request_create(void)
{
    MPID_Request * req;
    MPIDI_STATE_DECL(MPID_STATE_MPID_REQUEST_CREATE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_REQUEST_CREATE);
    
    req = MPIU_Handle_obj_alloc(&MPID_Request_mem);
    if (req != NULL)
    {
	MPIU_DBG_MSG_P(CH3_CHANNEL,VERBOSE,
		       "allocated request, handle=0x%08x", req->handle);
#ifdef MPICH_DBG_OUTPUT
	/*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);*/
	if (HANDLE_GET_MPI_KIND(req->handle) != MPID_REQUEST)
	{
	    int mpi_errno;
	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
		       FCNAME, __LINE__, MPI_ERR_OTHER, 
		       "**invalid_handle", "**invalid_handle %d", req->handle);
	    MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
	}
#endif
	/* FIXME: This makes request creation expensive.  We need to trim
	   this to the basics, with additional setup for special-purpose 
	   requests (think base class and inheritance).  For example, do we 
	   *really* want to set the kind to UNDEFINED? And should the RMA 
	   values be set only for RMA requests? */
	MPIU_Object_set_ref(req, 1);
	req->kind		   = MPID_REQUEST_UNDEFINED;
        MPID_cc_set(&req->cc, 1);
	req->cc_ptr		   = &req->cc;
	/* FIXME: status fields meaningful only for receive, and even then
	   should not need to be set. */
	req->status.MPI_SOURCE	   = MPI_UNDEFINED;
	req->status.MPI_TAG	   = MPI_UNDEFINED;
	req->status.MPI_ERROR	   = MPI_SUCCESS;
        MPIR_STATUS_SET_COUNT(req->status, 0);
        MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);
	req->comm		   = NULL;
        req->greq_fns              = NULL;
        req->errflag               = MPIR_ERR_NONE;
	req->dev.datatype_ptr	   = NULL;
	req->dev.segment_ptr	   = NULL;
	/* Masks and flags for channel device state in an MPID_Request */
	req->dev.state		   = 0;
	req->dev.cancel_pending	   = FALSE;
	/* FIXME: RMA ops shouldn't need to be set except when creating a
	   request for RMA operations */
	req->dev.target_win_handle = MPI_WIN_NULL;
	req->dev.source_win_handle = MPI_WIN_NULL;
        req->dev.lock_queue_entry  = NULL;
	req->dev.dtype_info	   = NULL;
	req->dev.dataloop	   = NULL;
	req->dev.iov_offset        = 0;
        req->dev.flags             = MPIDI_CH3_PKT_FLAG_NONE;
        req->dev.resp_request_handle = MPI_REQUEST_NULL;
        req->dev.user_buf          = NULL;
        req->dev.OnDataAvail       = NULL;
        req->dev.OnFinal           = NULL;
        req->dev.user_buf          = NULL;
        req->dev.drop_data         = FALSE;
        req->dev.stream_offset     = 0;
#ifdef MPIDI_CH3_REQUEST_INIT
	MPIDI_CH3_REQUEST_INIT(req);
#endif
    }
    else
    {
	/* FIXME: This fails to fail if debugging is turned off */
	MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"unable to allocate a request");
    }
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_REQUEST_CREATE);
    return req;
}
Ejemplo n.º 15
0
int MPIDI_CH3U_Request_load_recv_iov(MPID_Request * const rreq)
{
    MPI_Aint last;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
    if (rreq->dev.segment_first < rreq->dev.segment_size)
    {
	/* still reading data that needs to go into the user buffer */
	
	if (MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_ACCUM_RECV &&
            MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_ACCUM_RECV &&
            MPIDI_Request_get_srbuf_flag(rreq))
	{
	    MPIDI_msg_sz_t data_sz;
	    MPIDI_msg_sz_t tmpbuf_sz;

	    /* Once a SRBuf is in use, we continue to use it since a small 
	       amount of data may already be present at the beginning
	       of the buffer.  This data is left over from the previous unpack,
	       most like a result of alignment issues.  NOTE: we
	       could force the use of the SRBuf only 
	       when (rreq->dev.tmpbuf_off > 0)... */
	    
	    data_sz = rreq->dev.segment_size - rreq->dev.segment_first - 
		rreq->dev.tmpbuf_off;
	    MPIU_Assert(data_sz > 0);
	    tmpbuf_sz = rreq->dev.tmpbuf_sz - rreq->dev.tmpbuf_off;
	    if (data_sz > tmpbuf_sz)
	    {
		data_sz = tmpbuf_sz;
	    }
	    rreq->dev.iov[0].MPID_IOV_BUF = 
		(MPID_IOV_BUF_CAST)((char *) rreq->dev.tmpbuf + 
				    rreq->dev.tmpbuf_off);
	    rreq->dev.iov[0].MPID_IOV_LEN = data_sz;
            rreq->dev.iov_offset = 0;
	    rreq->dev.iov_count = 1;
	    MPIU_Assert(rreq->dev.segment_first + data_sz + 
			rreq->dev.tmpbuf_off <= rreq->dev.recv_data_sz);
	    if (rreq->dev.segment_first + data_sz + rreq->dev.tmpbuf_off == 
		rreq->dev.recv_data_sz)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
		  "updating rreq to read the remaining data into the SRBuf");
		rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufComplete;
	    }
	    else
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
		       "updating rreq to read more data into the SRBuf");
		rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV;
	    }
	    goto fn_exit;
	}
	
	last = rreq->dev.segment_size;
	rreq->dev.iov_count = MPID_IOV_LIMIT;
	rreq->dev.iov_offset = 0;
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
   "pre-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d",
			  rreq->dev.segment_first, last, rreq->dev.iov_count));
	MPIU_Assert(rreq->dev.segment_first < last);
	MPIU_Assert(last > 0);
	MPID_Segment_unpack_vector(rreq->dev.segment_ptr, 
				   rreq->dev.segment_first,
				   &last, &rreq->dev.iov[0], &rreq->dev.iov_count);
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
   "post-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d, iov_offset=%lld",
			  rreq->dev.segment_first, last, rreq->dev.iov_count, (long long)rreq->dev.iov_offset));
	MPIU_Assert(rreq->dev.iov_count >= 0 && rreq->dev.iov_count <= 
		    MPID_IOV_LIMIT);

	/* --BEGIN ERROR HANDLING-- */
	if (rreq->dev.iov_count == 0)
	{
	    /* If the data can't be unpacked, the we have a mis-match between
	       the datatype and the amount of data received.  Adjust
	       the segment info so that the remaining data is received and 
	       thrown away. */
	    rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		       MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
		       "**dtypemismatch", 0);
            MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first);
	    rreq->dev.segment_size = rreq->dev.segment_first;
	    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
	    goto fn_exit;
	}
        else
        {
            MPIU_Assert(rreq->dev.iov_offset < rreq->dev.iov_count);
        }
	/* --END ERROR HANDLING-- */

	if (last == rreq->dev.recv_data_sz)
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
     "updating rreq to read the remaining data directly into the user buffer");
	    /* Eventually, use OnFinal for this instead */
	    rreq->dev.OnDataAvail = rreq->dev.OnFinal;
	}
	else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV ||
                 MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV ||
                 (last == rreq->dev.segment_size ||
                  (last - rreq->dev.segment_first) / rreq->dev.iov_count >= MPIDI_IOV_DENSITY_MIN))
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
	     "updating rreq to read more data directly into the user buffer");
	    rreq->dev.segment_first = last;
	    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV;
	}
	else
	{
	    /* Too little data would have been received using an IOV.  
	       We will start receiving data into a SRBuf and unpacking it
	       later. */
	    MPIU_Assert(MPIDI_Request_get_srbuf_flag(rreq) == FALSE);
	    
	    MPIDI_CH3U_SRBuf_alloc(rreq, 
			    rreq->dev.segment_size - rreq->dev.segment_first);
	    rreq->dev.tmpbuf_off = 0;
	    /* --BEGIN ERROR HANDLING-- */
	    if (rreq->dev.tmpbuf_sz == 0)
	    {
		/* FIXME - we should drain the data off the pipe here, but we 
		   don't have a buffer to drain it into.  should this be
		   a fatal error? */
		MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"SRBuf allocation failure");
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
			      FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 
			 "**nomem %d", 
			 rreq->dev.segment_size - rreq->dev.segment_first);
		rreq->status.MPI_ERROR = mpi_errno;
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */

	    /* fill in the IOV using a recursive call */
	    mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
	}
    }
    else
    {
	/* receive and toss any extra data that does not fit in the user's 
	   buffer */
	MPIDI_msg_sz_t data_sz;

	data_sz = rreq->dev.recv_data_sz - rreq->dev.segment_first;
	if (!MPIDI_Request_get_srbuf_flag(rreq))
	{
	    MPIDI_CH3U_SRBuf_alloc(rreq, data_sz);
	    /* --BEGIN ERROR HANDLING-- */
	    if (rreq->dev.tmpbuf_sz == 0)
	    {
		MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure");
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
			       FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0);
		rreq->status.MPI_ERROR = mpi_errno;
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */
	}

	if (data_sz <= rreq->dev.tmpbuf_sz)
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
	    "updating rreq to read overflow data into the SRBuf and complete");
	    rreq->dev.iov[0].MPID_IOV_LEN = data_sz;
	    MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV);
	    /* Eventually, use OnFinal for this instead */
	    rreq->dev.OnDataAvail = rreq->dev.OnFinal;
	}
	else
	{
	    MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,
	  "updating rreq to read overflow data into the SRBuf and reload IOV");
	    rreq->dev.iov[0].MPID_IOV_LEN = rreq->dev.tmpbuf_sz;
	    rreq->dev.segment_first += rreq->dev.tmpbuf_sz;
	    rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV;
	}
	
	rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rreq->dev.tmpbuf;
	rreq->dev.iov_count = 1;
    }
    
  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV);
    return mpi_errno;
}
Ejemplo n.º 16
0
static inline
int ADD_SUFFIX(MPID_nem_ofi_recv_callback)(cq_tagged_entry_t * wc, MPID_Request * rreq)
{
    int err0, err1, src, mpi_errno = MPI_SUCCESS;
    uint64_t ssend_bits;
    MPIDI_msg_sz_t sz;
    MPIDI_VC_t *vc;
    MPID_Request *sync_req;
    BEGIN_FUNC(FCNAME);
    /* ---------------------------------------------------- */
    /* Populate the MPI Status and unpack noncontig buffer  */
    /* ---------------------------------------------------- */
    rreq->status.MPI_ERROR = MPI_SUCCESS;
#if API_SET == API_SET_1
    rreq->status.MPI_SOURCE = get_source(wc->tag);
#elif API_SET == API_SET_2
    rreq->status.MPI_SOURCE = wc->data;
#endif
    src = rreq->status.MPI_SOURCE;
    rreq->status.MPI_TAG = get_tag(wc->tag);

    REQ_OFI(rreq)->req_started = 1;
    MPIR_STATUS_SET_COUNT(rreq->status, wc->len);

    if (REQ_OFI(rreq)->pack_buffer) {
        MPIDI_CH3U_Buffer_copy(REQ_OFI(rreq)->pack_buffer,
                               MPIR_STATUS_GET_COUNT(rreq->status),
                               MPI_BYTE, &err0, rreq->dev.user_buf,
                               rreq->dev.user_count, rreq->dev.datatype, &sz, &err1);
        MPIR_STATUS_SET_COUNT(rreq->status, sz);
        MPIU_Free(REQ_OFI(rreq)->pack_buffer);
        if (err0 || err1) {
            rreq->status.MPI_ERROR = MPI_ERR_TYPE;
        }
    }

    if ((wc->tag & MPID_PROTOCOL_MASK) == MPID_SYNC_SEND) {
        /* ---------------------------------------------------- */
        /* Ack the sync send and wait for the send request      */
        /* completion(when callback executed.  A protocol bit   */
        /* MPID_SYNC_SEND_ACK is set in the tag bits to provide */
        /* separation of MPI messages and protocol messages     */
        /* ---------------------------------------------------- */
        vc = REQ_OFI(rreq)->vc;
        if (!vc) {      /* MPI_ANY_SOURCE -- Post message from status, complete the VC */
            vc = rreq->comm->dev.vcrt->vcr_table[src];
            MPIU_Assert(vc);
        }
#if API_SET == API_SET_1
        ssend_bits = init_sendtag(rreq->dev.match.parts.context_id,
                                  rreq->comm->rank, rreq->status.MPI_TAG, MPID_SYNC_SEND_ACK);
#elif API_SET == API_SET_2
        ssend_bits = init_sendtag_2(rreq->dev.match.parts.context_id,
                                    rreq->status.MPI_TAG, MPID_SYNC_SEND_ACK);
#endif
        MPID_nem_ofi_create_req(&sync_req, 1);
        sync_req->dev.OnDataAvail = NULL;
        sync_req->dev.next = NULL;
        REQ_OFI(sync_req)->event_callback = MPID_nem_ofi_sync_recv_callback;
        REQ_OFI(sync_req)->parent = rreq;
#if API_SET == API_SET_1
        FI_RC_RETRY(fi_tsend(gl_data.endpoint,
#elif API_SET == API_SET_2
        FI_RC_RETRY(fi_tsenddata(gl_data.endpoint,
#endif
                         NULL,
                         0,
                         gl_data.mr,
#if API_SET == API_SET_2
                         rreq->comm->rank,
#endif
                         VC_OFI(vc)->direct_addr,
                         ssend_bits, &(REQ_OFI(sync_req)->ofi_context)), tsend);
    }
Ejemplo n.º 17
0
/* MSGQUEUE lock must be held by caller */
void
MPIDI_Callback_process_unexp(MPID_Request *newreq,
			     pami_context_t        context,
                             const MPIDI_MsgInfo * msginfo,
                             size_t                sndlen,
                             pami_endpoint_t       sender,
                             const void          * sndbuf,
                             pami_recv_t         * recv,
                             unsigned              isSync)
{
  MPID_Request *rreq = NULL;

  /* ---------------------------------------------------- */
  /*  Fallback position:                                  */
  /*     + Request was not posted, or                     */
  /*     + Request was long & not contiguous.             */
  /*  We must allocate enough space to hold the message.  */
  /*  The temporary buffer will be unpacked later.        */
  /* ---------------------------------------------------- */
  unsigned rank       = msginfo->MPIrank;
  unsigned tag        = msginfo->MPItag;
  unsigned context_id = msginfo->MPIctxt;
#ifndef OUT_OF_ORDER_HANDLING
  rreq = MPIDI_Recvq_AEU(newreq, rank, tag, context_id);
#else
  unsigned msg_seqno  = msginfo->MPIseqno;
  rreq = MPIDI_Recvq_AEU(newreq, rank, PAMIX_Endpoint_query(sender), tag, context_id, msg_seqno);
#endif
  /* ---------------------- */
  /*  Copy in information.  */
  /* ---------------------- */
  rreq->status.MPI_SOURCE = rank;
  rreq->status.MPI_TAG    = tag;
  MPIR_STATUS_SET_COUNT(rreq->status, sndlen);
  MPIDI_Request_setCA          (rreq, MPIDI_CA_COMPLETE);
  MPIDI_Request_cpyPeerRequestH(rreq, msginfo);
  MPIDI_Request_setSync        (rreq, isSync);

  /* Set the rank of the sender if a sync msg. */
#ifndef OUT_OF_ORDER_HANDLING
  if (isSync)
    {
#endif
      MPIDI_Request_setPeerRank_comm(rreq, rank);
      MPIDI_Request_setPeerRank_pami(rreq, PAMIX_Endpoint_query(sender));
#ifndef OUT_OF_ORDER_HANDLING
    }
#endif

  MPID_assert(!sndlen || rreq->mpid.uebuf != NULL);
  TRACE_MEMSET_R(PAMIX_Endpoint_query(sender),msg_seqno,recv_status);
  TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),msgid,msginfo->MPIseqno);
  TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rtag,tag);
  TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rctx,msginfo->MPIctxt);
  TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rlen,sndlen);
  TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),fl.f.sync,isSync);
  TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rsource,PAMIX_Endpoint_query(sender));
  TRACE_SET_REQ_VAL(rreq->mpid.idx,(msginfo->MPIseqno & SEQMASK));

  if (recv != NULL)
    {
      recv->local_fn = MPIDI_RecvDoneCB_mutexed;
      recv->cookie   = rreq;
      /* -------------------------------------------------- */
      /*  Let PAMI know where to put the rest of the data.  */
      /* -------------------------------------------------- */
      recv->addr = rreq->mpid.uebuf;
    }
  else
    {
      /* ------------------------------------------------- */
      /*  We have the data; copy it and complete the msg.  */
      /* ------------------------------------------------- */
      memcpy(rreq->mpid.uebuf, sndbuf,   sndlen);
      MPIDI_RecvDoneCB(context, rreq, PAMI_SUCCESS);
      /* caller must release rreq, after unlocking MSGQUEUE */
    }
}
Ejemplo n.º 18
0
int MPIDI_CH3_PktHandler_EagerShortSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, 
					 MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPIDI_CH3_Pkt_eagershort_send_t * eagershort_pkt = &pkt->eagershort_send;
    MPID_Request * rreq;
    int found;
    int mpi_errno = MPI_SUCCESS;

    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX);

    /* printf( "Receiving short eager!\n" ); fflush(stdout); */
    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
	"received eagershort send pkt, rank=%d, tag=%d, context=%d",
	eagershort_pkt->match.parts.rank, 
	eagershort_pkt->match.parts.tag, 
	eagershort_pkt->match.parts.context_id));
	    
    MPIU_DBG_MSGPKT(vc,eagershort_pkt->match.parts.tag,
		    eagershort_pkt->match.parts.context_id,
		    eagershort_pkt->match.parts.rank,eagershort_pkt->data_sz,
		    "ReceivedEagerShort");
    rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&eagershort_pkt->match, &found);
    MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp());

    /* If the completion counter is 0, that means that the communicator to
     * which this message is being sent has been revoked and we shouldn't
     * bother finishing this. */
    if (!found && MPID_cc_get(rreq->cc) == 0) {
        *rreqp = NULL;
        goto fn_fail;
    }

    (rreq)->status.MPI_SOURCE = (eagershort_pkt)->match.parts.rank;
    (rreq)->status.MPI_TAG    = (eagershort_pkt)->match.parts.tag;
    MPIR_STATUS_SET_COUNT((rreq)->status, (eagershort_pkt)->data_sz);
    (rreq)->dev.recv_data_sz  = (eagershort_pkt)->data_sz;
    MPIDI_Request_set_seqnum((rreq), (eagershort_pkt)->seqnum);
    /* FIXME: Why do we set the message type? */
    MPIDI_Request_set_msg_type((rreq), MPIDI_REQUEST_EAGER_MSG);

    /* This packed completes the reception of the indicated data.
       The packet handler returns null for a request that requires
       no further communication */
    *rreqp = NULL;
    *buflen = sizeof(MPIDI_CH3_Pkt_t);

    /* Extract the data from the packet */
    /* Note that if the data size if zero, we're already done */
    if (rreq->dev.recv_data_sz > 0) {
	if (found) {
	    int            dt_contig;
	    MPI_Aint       dt_true_lb;
	    MPIDI_msg_sz_t userbuf_sz;
	    MPID_Datatype *dt_ptr;
	    MPIDI_msg_sz_t data_sz;

	    /* Make sure that we handle the general (non-contiguous)
	       datatypes correctly while optimizing for the 
	       special case */
	    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
				    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
		
	    if (rreq->dev.recv_data_sz <= userbuf_sz) {
		data_sz = rreq->dev.recv_data_sz;
	    }
	    else {
		MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
		    "receive buffer too small; message truncated, msg_sz=" 
					  MPIDI_MSG_SZ_FMT ", userbuf_sz="
				          MPIDI_MSG_SZ_FMT,
				 rreq->dev.recv_data_sz, userbuf_sz));
		rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
                     MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
		     "**truncate", "**truncate %d %d %d %d", 
		     rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, 
		     rreq->dev.recv_data_sz, userbuf_sz );
		MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz);
		data_sz = userbuf_sz;
	    }

	    if (dt_contig && data_sz == rreq->dev.recv_data_sz) {
		/* user buffer is contiguous and large enough to store the
		   entire message.  We can just copy the code */

		/* Copy the payload. We could optimize this 
		   if data_sz & 0x3 == 0 
		   (copy (data_sz >> 2) ints, inline that since data size is 
		   currently limited to 4 ints */
		{
		    unsigned char const * restrict p = 
			(unsigned char *)eagershort_pkt->data;
		    unsigned char * restrict bufp = 
			(unsigned char *)(char*)(rreq->dev.user_buf) + 
			dt_true_lb;
		    int i;
		    for (i=0; i<data_sz; i++) {
			*bufp++ = *p++;
		    }
		}
		/* FIXME: We want to set the OnDataAvail to the appropriate 
		   function, which depends on whether this is an RMA 
		   request or a pt-to-pt request. */
		rreq->dev.OnDataAvail = 0;
		/* The recv_pending_count must be one here (!) because of
		   the way the pending count is queried.  We may want 
		   to fix this, but it will require a sweep of the code */
	    }
	    else {
Ejemplo n.º 19
0
/**
 * \brief The callback for a new RZV RTS
 * \note  Because this is a short message, the data is already received
 * \param[in]  context      The context on which the message is being received.
 * \param[in]  sender       The origin endpoint
 * \param[in]  _msginfo     The extended header information
 * \param[in]  msginfo_size The size of the extended header information
 * \param[in]  is_zero_byte The rendezvous message is zero bytes in length.
 */
void
MPIDI_RecvRzvCB_impl(pami_context_t    context,
                     pami_endpoint_t   sender,
                     const void      * _msginfo,
                     size_t            msginfo_size,
                     const unsigned    is_zero_byte)
{
  MPID_assert(_msginfo != NULL);
  MPID_assert(msginfo_size == sizeof(MPIDI_MsgEnvelope));
  const MPIDI_MsgEnvelope * envelope = (const MPIDI_MsgEnvelope *)_msginfo;
  const MPIDI_MsgInfo * msginfo = (const MPIDI_MsgInfo *)&envelope->msginfo;

  MPID_Request * rreq = NULL;
  int found;
  pami_task_t source;
#if TOKEN_FLOW_CONTROL
  int  rettoks=0;
#endif

  /* -------------------- */
  /*  Match the request.  */
  /* -------------------- */
  unsigned rank       = msginfo->MPIrank;
  unsigned tag        = msginfo->MPItag;
  unsigned context_id = msginfo->MPIctxt;

  MPID_Request *newreq = MPIDI_Request_create2();
  MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
  source = PAMIX_Endpoint_query(sender);
  MPIDI_Receive_tokens(msginfo,source);
#ifndef OUT_OF_ORDER_HANDLING
  rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, tag, context_id, &found);
#else
  rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, source, tag, context_id, msginfo->MPIseqno, &found);
#endif
  TRACE_ERR("RZV CB for req=%p remote-mr=0x%llx bytes=%zu (%sfound)\n",
            rreq,
            *(unsigned long long*)&envelope->envelope.memregion,
            envelope->envelope.length,
            found?"":"not ");

  /* ---------------------- */
  /*  Copy in information.  */
  /* ---------------------- */
  rreq->status.MPI_SOURCE = rank;
  rreq->status.MPI_TAG    = tag;
  MPIR_STATUS_SET_COUNT(rreq->status, envelope->length);
  MPIDI_Request_setPeerRank_comm(rreq, rank);
  MPIDI_Request_setPeerRank_pami(rreq, source);
  MPIDI_Request_cpyPeerRequestH (rreq, msginfo);
  MPIDI_Request_setSync         (rreq, msginfo->isSync);
  MPIDI_Request_setRzv          (rreq, 1);

  /* ----------------------------------------------------- */
  /* Save the rendezvous information for when the target   */
  /* node calls a receive function and the data is         */
  /* retreived from the origin node.                       */
  /* ----------------------------------------------------- */
  if (is_zero_byte)
    {
      rreq->mpid.envelope.length = 0;
      rreq->mpid.envelope.data   = NULL;
    }
  else
    {
#ifdef USE_PAMI_RDMA
      memcpy(&rreq->mpid.envelope.memregion,
             &envelope->memregion,
             sizeof(pami_memregion_t));
#else
      rreq->mpid.envelope.memregion_used = envelope->memregion_used;
      if(envelope->memregion_used)
        {
          memcpy(&rreq->mpid.envelope.memregion,
                 &envelope->memregion,
                 sizeof(pami_memregion_t));
        }
      rreq->mpid.envelope.data   = envelope->data;
#endif
      rreq->mpid.envelope.length = envelope->length;
     TRACE_SET_R_VAL(source,(rreq->mpid.idx),req,rreq);
     TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,envelope->length);
     TRACE_SET_R_VAL(source,(rreq->mpid.idx),fl.f.sync,msginfo->isSync);
     TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.rzv);
     if (TOKEN_FLOW_CONTROL_ON)
       {
         #if TOKEN_FLOW_CONTROL
         MPIDI_Must_return_tokens(context,source);
         #else
         MPID_assert_always(0);
         #endif
       }
    }
  /* ----------------------------------------- */
  /* figure out target buffer for request data */
  /* ----------------------------------------- */
  if (found)
    {
#if (MPIDI_STATISTICS)
       MPID_NSTAT(mpid_statp->earlyArrivalsMatched);
#endif
      /* --------------------------- */
      /* if synchronized, post ack.  */
      /* --------------------------- */
      if (unlikely(MPIDI_Request_isSync(rreq)))
        MPIDI_SyncAck_post(context, rreq, MPIDI_Request_getPeerRank_pami(rreq));

      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);

      if (is_zero_byte)
        MPIDI_RecvRzvDoneCB_zerobyte(context, rreq, PAMI_SUCCESS);
      else
        {
          MPIDI_RendezvousTransfer(context, rreq);
          TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.sync_com_in_HH);
          TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.matchedInHH);
          TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf);
        }
      MPID_Request_discard(newreq);
    }

  /* ------------------------------------------------------------- */
  /* Request was not posted. */
  /* ------------------------------------------------------------- */
  else
    {
#if (MPIDI_STATISTICS)
       MPID_NSTAT(mpid_statp->earlyArrivals);
#endif
      /*
       * This is to test that the fields don't need to be
       * initialized.  Remove after this doesn't fail for a while.
       */
      MPID_assert(rreq->mpid.uebuf    == NULL);
      MPID_assert(rreq->mpid.uebuflen == 0);
      /* rreq->mpid.uebuf = NULL; */
      /* rreq->mpid.uebuflen = 0; */
#ifdef OUT_OF_ORDER_HANDLING
  if (MPIDI_In_cntr[source].n_OutOfOrderMsgs > 0) {
     MPIDI_Recvq_process_out_of_order_msgs(source, context);
  }
#endif
      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
    }
  /* ---------------------------------------- */
  /*  Signal that the recv has been started.  */
  /* ---------------------------------------- */
  MPIDI_Progress_signal();
}
Ejemplo n.º 20
0
int MPIDI_CH3U_Receive_data_found(MPIR_Request *rreq, void *buf, intptr_t *buflen, int *complete)
{
    int dt_contig;
    MPI_Aint dt_true_lb;
    intptr_t userbuf_sz;
    MPIR_Datatype * dt_ptr = NULL;
    intptr_t data_sz;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);

    MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found");
	
    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
		
    if (rreq->dev.recv_data_sz <= userbuf_sz) {
	data_sz = rreq->dev.recv_data_sz;
    }
    else {
	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
               "receive buffer too small; message truncated, msg_sz=%" PRIdPTR ", userbuf_sz=%"
					    PRIdPTR,
				 rreq->dev.recv_data_sz, userbuf_sz));
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
                     MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
		     "**truncate", "**truncate %d %d %d %d", 
		     rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, 
		     rreq->dev.recv_data_sz, userbuf_sz );
	MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz);
	data_sz = userbuf_sz;
    }

    if (dt_contig && data_sz == rreq->dev.recv_data_sz)
    {
	/* user buffer is contiguous and large enough to store the
	   entire message.  However, we haven't yet *read* the data 
	   (this code describes how to read the data into the destination) */

        /* if all of the data has already been received, unpack it
           now, otherwise build an iov and let the channel unpack */
        if (*buflen >= data_sz)
        {
            MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"Copying contiguous data to user buffer");
            /* copy data out of the receive buffer */
            if (rreq->dev.drop_data == FALSE) {
                MPIR_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz);
            }
            *buflen = data_sz;
            *complete = TRUE;
        }
        else
        {
            MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for contiguous read");
            
            rreq->dev.iov[0].MPL_IOV_BUF = 
                (MPL_IOV_BUF_CAST)((char*)(rreq->dev.user_buf) + dt_true_lb);
            rreq->dev.iov[0].MPL_IOV_LEN = data_sz;
            rreq->dev.iov_count = 1;
            *buflen = 0;
            *complete = FALSE;
        }
        
        /* Trigger OnFinal when receiving the last segment */
        rreq->dev.OnDataAvail = rreq->dev.OnFinal;
    }
    else {
	/* user buffer is not contiguous or is too small to hold
	   the entire message */
        
	rreq->dev.segment_ptr = MPIR_Segment_alloc( );
        MPIR_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Segment_alloc");

 	MPIR_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, 
			  rreq->dev.datatype, rreq->dev.segment_ptr);
	rreq->dev.segment_first = 0;
	rreq->dev.segment_size  = data_sz;

        /* if all of the data has already been received, and the
           message is not truncated, unpack it now, otherwise build an
           iov and let the channel unpack */
        if (data_sz == rreq->dev.recv_data_sz && *buflen >= data_sz)
        {
            intptr_t last;
            MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"Copying noncontiguous data to user buffer");
            last = data_sz;
            MPIR_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, 
				&last, buf);
            /* --BEGIN ERROR HANDLING-- */
            if (last != data_sz)
            {
                /* If the data can't be unpacked, the we have a
                   mismatch between the datatype and the amount of
                   data received.  Throw away received data. */
                MPIR_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch");
                MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first);
                *buflen = data_sz;
                *complete = TRUE;
		/* FIXME: Set OnDataAvail to 0?  If not, why not? */
                goto fn_exit;
            }
            /* --END ERROR HANDLING-- */
            *buflen = data_sz;
            /* Trigger OnFinal when receiving the last segment */
            rreq->dev.OnDataAvail = rreq->dev.OnFinal;
            *complete = TRUE;
        }
        else
        {   
            MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for non-contiguous read");

            mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER,
                                         "**ch3|loadrecviov");
            }
            *buflen = 0;
            *complete = FALSE;
        }
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Ejemplo n.º 21
0
static int _mxm_handle_rreq(MPID_Request * req)
{
    int complete = FALSE, found = FALSE;
    int dt_contig;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    MPIDI_msg_sz_t userbuf_sz;
    MPID_Datatype *dt_ptr;
    MPIDI_msg_sz_t data_sz;
    MPID_nem_mxm_vc_area *vc_area ATTRIBUTE((unused)) = NULL;
    MPID_nem_mxm_req_area *req_area = NULL;
    void *tmp_buf = NULL;

    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX);
    found = MPIDI_CH3U_Recvq_DP(req);
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_MSGQ_MUTEX);
    /* an MPI_ANY_SOURCE request may have been previously removed from the
     * CH3 queue by an FDP (find and dequeue posted) operation */
    if (req->dev.match.parts.rank != MPI_ANY_SOURCE) {
        MPIU_Assert(found);
    }

    MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, userbuf_sz, dt_ptr,
                            dt_true_lb);

    vc_area = VC_BASE(req->ch.vc);
    req_area = REQ_BASE(req);

    _dbg_mxm_out_buf(req_area->iov_buf[0].ptr,
                     (req_area->iov_buf[0].length > 16 ? 16 : req_area->iov_buf[0].length));

    if (req->dev.recv_data_sz <= userbuf_sz) {
        data_sz = req->dev.recv_data_sz;
        if (req->status.MPI_ERROR == MPI_ERR_TRUNCATE) {
            req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
                                                         MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                                         MPI_ERR_TRUNCATE, "**truncate",
                                                         "**truncate %d %d %d %d",
                                                         req->status.MPI_SOURCE,
                                                         req->status.MPI_TAG, req->dev.recv_data_sz,
                                                         userbuf_sz);
        }
    }
    else {
        data_sz = userbuf_sz;
        MPIR_STATUS_SET_COUNT(req->status, userbuf_sz);
        MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE, (MPIU_DBG_FDEST,
                                              "receive buffer too small; message truncated, msg_sz="
                                              MPIDI_MSG_SZ_FMT ", userbuf_sz="
                                              MPIDI_MSG_SZ_FMT, req->dev.recv_data_sz, userbuf_sz));
        req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS,
                                                     MPIR_ERR_RECOVERABLE, FCNAME, __LINE__,
                                                     MPI_ERR_TRUNCATE, "**truncate",
                                                     "**truncate %d %d %d %d",
                                                     req->status.MPI_SOURCE, req->status.MPI_TAG,
                                                     req->dev.recv_data_sz, userbuf_sz);
    }

    if (!dt_contig) {
        MPIDI_msg_sz_t last = 0;

        if (req->dev.tmpbuf != NULL) {
            last = req->dev.recv_data_sz;
            MPID_Segment_unpack(req->dev.segment_ptr, 0, &last, req->dev.tmpbuf);
            tmp_buf = req->dev.tmpbuf;
        }
        else {
            mxm_req_buffer_t *iov_buf;
            MPL_IOV *iov;
            int n_iov = 0;
            int index;

            last = req->dev.recv_data_sz;
            n_iov = req_area->iov_count;
            iov_buf = req_area->iov_buf;
            if (last && n_iov > 0) {
                iov = MPIU_Malloc(n_iov * sizeof(*iov));
                MPIU_Assert(iov);

                for (index = 0; index < n_iov; index++) {
                    iov[index].MPL_IOV_BUF = iov_buf[index].ptr;
                    iov[index].MPL_IOV_LEN = iov_buf[index].length;
                }

                MPID_Segment_unpack_vector(req->dev.segment_ptr, req->dev.segment_first, &last, iov,
                                           &n_iov);
                MPIU_Free(iov);
            }
            if (req_area->iov_count > MXM_MPICH_MAX_IOV) {
                tmp_buf = req_area->iov_buf;
                req_area->iov_buf = req_area->tmp_buf;
                req_area->iov_count = 0;
            }
        }
        if (last != data_sz) {
            MPIR_STATUS_SET_COUNT(req->status, last);
            if (req->dev.recv_data_sz <= userbuf_sz) {
                /* If the data can't be unpacked, the we have a
                 *  mismatch between the datatype and the amount of
                 *  data received.  Throw away received data.
                 */
                MPIR_ERR_SETSIMPLE(req->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch");
            }
        }
    }

    MPIDI_CH3U_Handle_recv_req(req->ch.vc, req, &complete);
    MPIU_Assert(complete == TRUE);

    if (tmp_buf)
        MPIU_Free(tmp_buf);

    return complete;
}
Ejemplo n.º 22
0
int MPIDI_CH3U_Post_data_receive_found(MPIR_Request * rreq)
{
    int mpi_errno = MPI_SUCCESS;	
    int dt_contig;
    MPI_Aint dt_true_lb;
    intptr_t userbuf_sz;
    MPIR_Datatype * dt_ptr = NULL;
    intptr_t data_sz;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);

    MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found");
	
    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
		
    if (rreq->dev.recv_data_sz <= userbuf_sz) {
	data_sz = rreq->dev.recv_data_sz;
    }
    else {
	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
               "receive buffer too small; message truncated, msg_sz=%" PRIdPTR ", userbuf_sz=%"
					    PRIdPTR,
				 rreq->dev.recv_data_sz, userbuf_sz));
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
                     MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
		     "**truncate", "**truncate %d %d %d %d", 
		     rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, 
		     rreq->dev.recv_data_sz, userbuf_sz );
	MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz);
	data_sz = userbuf_sz;
    }

    if (dt_contig && data_sz == rreq->dev.recv_data_sz)
    {
	/* user buffer is contiguous and large enough to store the
	   entire message.  However, we haven't yet *read* the data 
	   (this code describes how to read the data into the destination) */
	MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for contiguous read");
	rreq->dev.iov[0].MPL_IOV_BUF = 
	    (MPL_IOV_BUF_CAST)((char*)(rreq->dev.user_buf) + dt_true_lb);
	rreq->dev.iov[0].MPL_IOV_LEN = data_sz;
	rreq->dev.iov_count = 1;
	/* FIXME: We want to set the OnDataAvail to the appropriate 
	   function, which depends on whether this is an RMA 
	   request or a pt-to-pt request. */
	rreq->dev.OnDataAvail = 0;
    }
    else {
	/* user buffer is not contiguous or is too small to hold
	   the entire message */
	MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for non-contiguous read");
	rreq->dev.segment_ptr = MPIR_Segment_alloc( );
        MPIR_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Segment_alloc");
	MPIR_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, 
			  rreq->dev.datatype, rreq->dev.segment_ptr);
	rreq->dev.segment_first = 0;
	rreq->dev.segment_size = data_sz;
	mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER,
				     "**ch3|loadrecviov");
	}
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Ejemplo n.º 23
0
int MPIDI_CH3U_Request_unpack_uebuf(MPID_Request * rreq)
{
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPIDI_msg_sz_t userbuf_sz;
    MPID_Datatype * dt_ptr;
    MPIDI_msg_sz_t unpack_sz;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);

    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, 
			    dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
    
    if (rreq->dev.recv_data_sz <= userbuf_sz)
    {
	unpack_sz = rreq->dev.recv_data_sz;
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
	MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST,
      "receive buffer overflow; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT 
	      ", buf_sz=" MPIDI_MSG_SZ_FMT, 
                rreq->dev.recv_data_sz, userbuf_sz));
	unpack_sz = userbuf_sz;
	MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz);
	rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
		 MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
		 "**truncate", "**truncate %d %d", 
                 rreq->dev.recv_data_sz, userbuf_sz);
	/* --END ERROR HANDLING-- */
    }

    if (unpack_sz > 0)
    {
	if (dt_contig)
	{
	    /* TODO - check that amount of data is consistent with datatype.  
	       In other words, if we were to use Segment_unpack()
	       would last = unpack?  If not we should return an error 
	       (unless configured with --enable-fast) */
	    MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY);
	    MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, rreq->dev.tmpbuf,
		   unpack_sz);
	    MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY);
	}
	else
	{
	    MPID_Segment seg;
	    MPI_Aint last;

	    MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, 
			      rreq->dev.datatype, &seg, 0);
	    last = unpack_sz;
	    MPID_Segment_unpack(&seg, 0, &last, rreq->dev.tmpbuf);
	    if (last != unpack_sz)
	    {
		/* --BEGIN ERROR HANDLING-- */
		/* received data was not entirely consumed by unpack() 
		   because too few bytes remained to fill the next basic
		   datatype */
		MPIR_STATUS_SET_COUNT(rreq->status, last);
		rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
                         MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE,
			 "**dtypemismatch", 0);
		/* --END ERROR HANDLING-- */
	    }
	}
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF);
    return mpi_errno;
}
Ejemplo n.º 24
0
static inline void
MPIDI_RecvShortCB(pami_context_t    context,
                  const void      * _msginfo,
                  const void      * sndbuf,
                  size_t            sndlen,
                  pami_endpoint_t   sender,
                  unsigned          isSync)
{
  MPID_assert(_msginfo != NULL);

  const MPIDI_MsgInfo *msginfo = (const MPIDI_MsgInfo *)_msginfo;
  MPID_Request * rreq = NULL;
  pami_task_t source;
#if TOKEN_FLOW_CONTROL
  int          rettoks=0;
#endif

  /* -------------------- */
  /*  Match the request.  */
  /* -------------------- */
  unsigned rank       = msginfo->MPIrank;
  unsigned tag        = msginfo->MPItag;
  unsigned context_id = msginfo->MPIctxt;

  MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
  source = PAMIX_Endpoint_query(sender);
  MPIDI_Receive_tokens(msginfo,source);
#ifndef OUT_OF_ORDER_HANDLING
  rreq = MPIDI_Recvq_FDP(rank, tag, context_id);
#else
  rreq = MPIDI_Recvq_FDP(rank, source, tag, context_id, msginfo->MPIseqno);
#endif

  /* Match not found */
  if (unlikely(rreq == NULL))
    {
#if (MPIDI_STATISTICS)
         MPID_NSTAT(mpid_statp->earlyArrivals);
#endif
      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
      MPID_Request *newreq = MPIDI_Request_create2();
      MPID_assert(newreq != NULL);
      if (sndlen)
      {
        newreq->mpid.uebuflen = sndlen;
        if (!TOKEN_FLOW_CONTROL_ON)
          {
            newreq->mpid.uebuf = MPL_malloc(sndlen);
            newreq->mpid.uebuf_malloc = mpiuMalloc;
          }
        else
          {
            #if TOKEN_FLOW_CONTROL
            MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
            newreq->mpid.uebuf = MPIDI_mm_alloc(sndlen);
            newreq->mpid.uebuf_malloc = mpidiBufMM;
            MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
            #else
            MPID_assert_always(0);
            #endif
          }
        MPID_assert(newreq->mpid.uebuf != NULL);
      }
      MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
#ifndef OUT_OF_ORDER_HANDLING
      rreq = MPIDI_Recvq_FDP(rank, tag, context_id);
#else
      rreq = MPIDI_Recvq_FDP(rank, PAMIX_Endpoint_query(sender), tag, context_id, msginfo->MPIseqno);
#endif
      
      if (unlikely(rreq == NULL))
      {
        MPIDI_Callback_process_unexp(newreq, context, msginfo, sndlen, sender, sndbuf, NULL, isSync);
        /* request is always complete now */
        if (TOKEN_FLOW_CONTROL_ON && sndlen)
          {
            #if TOKEN_FLOW_CONTROL
            MPIDI_Token_cntr[source].unmatched++;
            #else
            MPID_assert_always(0);
            #endif
          }
        MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
        MPID_Request_release(newreq);
        goto fn_exit_short;
      }
      else
      {       
        MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
        MPID_Request_discard(newreq);
      }         
    }
  else
    {
#if (MPIDI_STATISTICS)
     MPID_NSTAT(mpid_statp->earlyArrivalsMatched);
#endif
      if (TOKEN_FLOW_CONTROL_ON && sndlen)
        {
          #if TOKEN_FLOW_CONTROL
          MPIDI_Update_rettoks(source);
          MPIDI_Must_return_tokens(context,source);
          #else
          MPID_assert_always(0);
          #endif
        }
      MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
    }

  /* the receive queue processing has been completed and we found match*/

  /* ---------------------- */
  /*  Copy in information.  */
  /* ---------------------- */
  rreq->status.MPI_SOURCE = rank;
  rreq->status.MPI_TAG    = tag;
  MPIR_STATUS_SET_COUNT(rreq->status, sndlen);
  MPIDI_Request_setCA          (rreq, MPIDI_CA_COMPLETE);
  MPIDI_Request_cpyPeerRequestH(rreq, msginfo);
  MPIDI_Request_setSync        (rreq, isSync);
  MPIDI_Request_setRzv         (rreq, 0);

  /* ----------------------------- */
  /*  Request was already posted.  */
  /* ----------------------------- */
  if (unlikely(isSync))
    MPIDI_SyncAck_post(context, rreq, PAMIX_Endpoint_query(sender));

  if (unlikely(HANDLE_GET_KIND(rreq->mpid.datatype) != HANDLE_KIND_BUILTIN))
    {
      MPIDI_Callback_process_userdefined_dt(context, sndbuf, sndlen, rreq);
      goto fn_exit_short;
    }

  size_t dt_size = rreq->mpid.userbufcount * MPID_Datatype_get_basic_size(rreq->mpid.datatype);

  /* ----------------------------- */
  /*  Test for truncated message.  */
  /* ----------------------------- */
  if (unlikely(sndlen > dt_size))
    {
#if ASSERT_LEVEL > 0
      MPIDI_Callback_process_trunc(context, rreq, NULL, sndbuf);
      goto fn_exit_short;
#else
      sndlen = dt_size;
#endif
    }

  MPID_assert(rreq->mpid.uebuf    == NULL);
  MPID_assert(rreq->mpid.uebuflen == 0);
  void* rcvbuf = rreq->mpid.userbuf;

  if (sndlen > 0)
  {
#if CUDA_AWARE_SUPPORT
    if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf))
    {
      cudaError_t cudaerr = CudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice);
    }
    else
#endif
      memcpy(rcvbuf, sndbuf, sndlen);
  }
  TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,sndlen);
  TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.comp_in_HH);
  TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf);
  MPIDI_Request_complete(rreq);

 fn_exit_short:
#ifdef OUT_OF_ORDER_HANDLING
  MPIU_THREAD_CS_ENTER(MSGQUEUE,0);
  if (MPIDI_In_cntr[source].n_OutOfOrderMsgs>0)  {
    MPIDI_Recvq_process_out_of_order_msgs(source, context);
  }
  MPIU_THREAD_CS_EXIT(MSGQUEUE,0);
#endif

  /* ---------------------------------------- */
  /*  Signal that the recv has been started.  */
  /* ---------------------------------------- */
  MPIDI_Progress_signal();
}
Ejemplo n.º 25
0
int MPIDI_CH3_RecvFromSelf( MPID_Request *rreq, void *buf, MPI_Aint count,
			    MPI_Datatype datatype )
#endif
{
    MPID_Request * const sreq = rreq->partner_request;
    int mpi_errno = MPI_SUCCESS;

    if (sreq != NULL)
    {
	MPIDI_msg_sz_t data_sz;

#if defined(FINEGRAIN_MPI)
        /* FG: Zerocopy */
        void * buf = (void *) (*buf_handle);
        if ( MPIDI_Request_get_self_zerocopy_flag(sreq) && MPIDI_Request_get_self_zerocopy_flag(rreq) )
        {
            int rdt_contig;
            MPI_Aint rdt_true_lb;
            MPID_Datatype * rdt_ptr;

            /* Unexpected Send-Collocated MPIX_Zsend/Izsend - MPIX_Zrecv/Izrecv pairing */
            MPIU_Assert(NULL == rreq->dev.user_buf);
            *(rreq->dev.user_buf_handle) = (void*) (*(sreq->dev.user_buf_handle));

            MPIDI_Datatype_get_info(count, datatype, rdt_contig, data_sz, rdt_ptr, rdt_true_lb);

            /* MPIX_Zsend buf_handle can't be set to NULL as we don't have
               a ptr to void **.  */
        }
        else if( MPIDI_Request_get_self_zerocopy_flag(sreq) && !MPIDI_Request_get_self_zerocopy_flag(rreq) ){
            /* Unexpected Send-Collocated MPIX_Zsend/Izsend<=>MPI_Recv/Irecv pairing. Freeing sender buffer */
            MPIDI_CH3U_Buffer_copy(*(sreq->dev.user_buf_handle), sreq->dev.user_count,
                                   sreq->dev.datatype, &sreq->status.MPI_ERROR,
                                   buf, count, datatype, &data_sz,
                                   &rreq->status.MPI_ERROR);

            /* Free the sender's buffer */
            MPIU_Free(*(sreq->dev.user_buf_handle));
        }
        else if( !MPIDI_Request_get_self_zerocopy_flag(sreq) && MPIDI_Request_get_self_zerocopy_flag(rreq) ){
            /* Unexpected Send-Collocated MPI_Send/Isend - MPIX_Zrecv/Izrecv pairing. Allocating receiver's buffer. */
            MPIU_Assert(NULL == rreq->dev.user_buf);
            /* Added checks for buffer count size as is done in MPIDI_CH3U_Buffer_copy() */
            MPIDI_CH3U_Buffer_allocate(sreq->dev.user_buf, sreq->dev.user_count,
                                       sreq->dev.datatype, &sreq->status.MPI_ERROR,
                                       rreq->dev.user_buf_handle, rreq->dev.user_count,
                                       rreq->dev.datatype, &data_sz, &rreq->status.MPI_ERROR);
            MPIDI_CH3U_Buffer_copy(sreq->dev.user_buf, sreq->dev.user_count,
                                   sreq->dev.datatype, &sreq->status.MPI_ERROR,
                                   *(rreq->dev.user_buf_handle), rreq->dev.user_count,
                                   rreq->dev.datatype, &data_sz, &rreq->status.MPI_ERROR);
        } else {
            /* Unexpected Send-Collocated MPI_Send/Isend - MPI_Recv/Irecv pairing */
#endif /* matches #if defined(FINEGRAIN_MPI) */

	MPIDI_CH3U_Buffer_copy(sreq->dev.user_buf, sreq->dev.user_count,
			       sreq->dev.datatype, &sreq->status.MPI_ERROR,
			       buf, count, datatype, &data_sz, 
			       &rreq->status.MPI_ERROR);
#if defined(FINEGRAIN_MPI)
        }
#endif

	MPIR_STATUS_SET_COUNT(rreq->status, data_sz);
	mpi_errno = MPID_Request_complete(sreq);
        if (mpi_errno != MPI_SUCCESS) {
            MPIR_ERR_POP(mpi_errno);
        }
    }
    else
    {
	/* The sreq is missing which means an error occurred.  
	   rreq->status.MPI_ERROR should have been set when the
	   error was detected. */
    }
    
    /* no other thread can possibly be waiting on rreq, so it is safe to 
       reset ref_count and cc */
    mpi_errno = MPID_Request_complete(rreq);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}