static void dequeue_req(const ptl_event_t *e) { int found; MPID_Request *const rreq = e->user_ptr; MPI_Aint s_len, r_len; /* At this point we know the ME is unlinked. Invalidate the handle to prevent further accesses, e.g. an attempted cancel. */ REQ_PTL(rreq)->put_me = PTL_INVALID_HANDLE; found = MPIDI_CH3U_Recvq_DP(rreq); /* an MPI_ANY_SOURCE request may have been previously removed from the CH3 queue by an FDP (find and dequeue posted) operation */ if (rreq->dev.match.parts.rank != MPI_ANY_SOURCE) MPIU_Assert(found); rreq->status.MPI_ERROR = MPI_SUCCESS; rreq->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits); rreq->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits); MPID_Datatype_get_size_macro(rreq->dev.datatype, r_len); r_len *= rreq->dev.user_count; s_len = NPTL_HEADER_GET_LENGTH(e->hdr_data); if (s_len > r_len) { /* truncated data */ MPIR_STATUS_SET_COUNT(rreq->status, r_len); MPIR_ERR_SET2(rreq->status.MPI_ERROR, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", s_len, r_len); } else { MPIR_STATUS_SET_COUNT(rreq->status, s_len); } }
static void _mxm_recv_completion_cb(void *context) { MPID_Request *req = (MPID_Request *) context; mxm_recv_req_t *mxm_rreq; MPID_nem_mxm_req_area *req_area = NULL; MPIU_Assert(req); _dbg_mxm_out_req(req); req_area = REQ_BASE(req); _mxm_to_mpi_status(req_area->mxm_req->item.base.error, &req->status); mxm_rreq = &req_area->mxm_req->item.recv; req->status.MPI_TAG = _mxm_tag_mxm2mpi(mxm_rreq->completion.sender_tag); req->status.MPI_SOURCE = mxm_rreq->completion.sender_imm; req->dev.recv_data_sz = mxm_rreq->completion.actual_len; MPIR_STATUS_SET_COUNT(req->status, req->dev.recv_data_sz); if (req->ch.vc) { MPID_nem_mxm_vc_area *vc_area = VC_BASE(req->ch.vc); list_enqueue(&vc_area->mxm_ep->free_queue, &req_area->mxm_req->queue); } else { list_enqueue(&mxm_obj->free_queue, &req_area->mxm_req->queue); } _dbg_mxm_output(5, "========> %s RECV req %p status %d\n", (MPIR_STATUS_GET_CANCEL_BIT(req->status) ? "Canceling" : "Completing"), req, req->status.MPI_ERROR); if (likely(!MPIR_STATUS_GET_CANCEL_BIT(req->status))) { _mxm_handle_rreq(req); } }
static int handle_probe(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const req = e->user_ptr; MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE); if (e->ni_fail_type == PTL_NI_NO_MATCH) { REQ_PTL(req)->found = FALSE; goto finish_probe; } REQ_PTL(req)->found = TRUE; req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits); req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits); MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data)); finish_probe: mpi_errno = MPID_Request_complete(req); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE); return mpi_errno; fn_fail: goto fn_exit; }
static int search_complete(uint64_t tag, size_t msglen, MPID_Request * rreq) { int mpi_errno = MPI_SUCCESS; BEGIN_FUNC(FCNAME); rreq->status.MPI_SOURCE = get_source(tag); rreq->status.MPI_TAG = get_tag(tag); rreq->status.MPI_ERROR = MPI_SUCCESS; MPIR_STATUS_SET_COUNT(rreq->status, msglen); END_FUNC(FCNAME); return mpi_errno; }
static int handle_mprobe(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const req = e->user_ptr; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE); if (e->ni_fail_type == PTL_NI_NO_MATCH) { REQ_PTL(req)->found = FALSE; goto finish_mprobe; } REQ_PTL(req)->found = TRUE; req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits); req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits); MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data)); MPIDI_Request_set_sync_send_flag(req, e->hdr_data & NPTL_SSEND); MPIU_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf"); MPIU_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength); req->dev.recv_data_sz = e->mlength; if (!(e->hdr_data & NPTL_LARGE)) { MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_EAGER_MSG); } else { MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD); req->dev.match.parts.tag = req->status.MPI_TAG; req->dev.match.parts.context_id = NPTL_MATCH_GET_CTX(e->match_bits); req->dev.match.parts.rank = req->status.MPI_SOURCE; MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_RNDV_MSG); } /* At this point we know the ME is unlinked. Invalidate the handle to prevent further accesses, e.g. an attempted cancel. */ REQ_PTL(req)->put_me = PTL_INVALID_HANDLE; req->dev.recv_pending_count = 1; finish_mprobe: mpi_errno = MPID_Request_complete(req); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } fn_exit: MPIU_CHKPMEM_COMMIT(); MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; }
int MPIO_Waitany(int count, MPIO_Request requests[], int *index, MPI_Status *status) { int i, flag, err; MPID_THREADPRIV_DECL; ROMIO_THREAD_CS_ENTER(); if (count == 1) { err = MPIO_Wait( requests, status ); if (!err) *index = 0; goto fn_exit; } /* Check for no active requests */ for (i=0; i<count; i++) { if (requests[i] != MPIO_REQUEST_NULL) { break; } } if (i == count) { *index = MPI_UNDEFINED; #ifdef MPICH /* need to set empty status */ if (status != MPI_STATUS_IGNORE) { status->MPI_SOURCE = MPI_ANY_SOURCE; status->MPI_TAG = MPI_ANY_TAG; MPIR_STATUS_SET_COUNT(*status, 0); MPIR_STATUS_SET_CANCEL_BIT(*status, 0); } #endif err = MPI_SUCCESS; goto fn_exit; } err = MPI_SUCCESS; do { flag = 0; for (i=0; i<count; i++) { if (requests[i] != MPIO_REQUEST_NULL) { err = MPIO_Test( &requests[i], &flag, status ); if (flag) { if (!err) *index = i; break; } } } } while (flag == 0); fn_exit: ROMIO_THREAD_CS_EXIT(); return err; }
int MPID_nem_ib_lmt_done_recv(struct MPIDI_VC *vc, struct MPID_Request *rreq) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV); dprintf("lmt_done_recv,enter,rreq=%p,head=%p\n", rreq, MPID_nem_ib_lmtq.head); int is_contig; MPID_Datatype_is_contig(rreq->dev.datatype, &is_contig); if (!is_contig) { dprintf("lmt_done_recv,copying noncontiguous data to user buffer\n"); /* see MPIDI_CH3U_Request_unpack_uebuf (in /src/mpid/ch3/src/ch3u_request.c) */ /* or MPIDI_CH3U_Receive_data_found (in src/mpid/ch3/src/ch3u_handle_recv_pkt.c) */ MPIDI_msg_sz_t unpack_sz = rreq->ch.lmt_data_sz; MPID_Segment seg; MPI_Aint last; MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0); last = unpack_sz; MPID_Segment_unpack(&seg, 0, &last, REQ_FIELD(rreq, lmt_pack_buf)); if (last != unpack_sz) { /* --BEGIN ERROR HANDLING-- */ /* received data was not entirely consumed by unpack() * because too few bytes remained to fill the next basic * datatype */ MPIR_STATUS_SET_COUNT(rreq->status, last); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**MPID_nem_ib_lmt_done_recv", 0); /* --END ERROR HANDLING-- */ } //MPIU_Free(REQ_FIELD(rreq, lmt_pack_buf)); MPID_nem_ib_stfree(REQ_FIELD(rreq, lmt_pack_buf), (size_t) rreq->ch.lmt_data_sz); } dprintf("lmt_done_recv,1,req=%p,pcc=%d\n", rreq, MPIDI_CH3I_progress_completion_count.v); MPIDI_CH3U_Request_complete(rreq); dprintf("lmt_done_recv,complete,req=%p\n", rreq); dprintf("lmt_done_recv,2,pcc=%d\n", MPIDI_CH3I_progress_completion_count.v); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_DONE_RECV); return mpi_errno; //fn_fail: goto fn_exit; }
int MPID_Cancel_recv(MPIR_Request * rreq) { int netmod_cancelled = TRUE; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_RECV); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_RECV); MPIR_Assert(rreq->kind == MPIR_REQUEST_KIND__RECV); /* If the netmod has its own cancel_recv function, we need to call it here. ANYSOURCE cancels (netmod and otherwise) are handled by MPIDI_CH3U_Recvq_DP below. */ #ifdef ENABLE_COMM_OVERRIDES if (rreq->dev.match.parts.rank != MPI_ANY_SOURCE) { MPIDI_VC_t *vc; MPIDI_Comm_get_vc_set_active(rreq->comm, rreq->dev.match.parts.rank, &vc); if (vc->comm_ops && vc->comm_ops->cancel_recv) netmod_cancelled = !vc->comm_ops->cancel_recv(NULL, rreq); } #endif if (netmod_cancelled && MPIDI_CH3U_Recvq_DP(rreq)) { MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE, "request 0x%08x cancelled", rreq->handle); MPIR_STATUS_SET_CANCEL_BIT(rreq->status, TRUE); MPIR_STATUS_SET_COUNT(rreq->status, 0); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } else { MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE, "request 0x%08x already matched, unable to cancel", rreq->handle); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_RECV); return mpi_errno; fn_fail: goto fn_exit; }
static int tsearch_callback(cq_tagged_entry_t * wc, MPID_Request * rreq) { int mpi_errno = MPI_SUCCESS; BEGIN_FUNC(FCNAME); if (wc->data) { REQ_OFI(rreq)->match_state = TSEARCH_FOUND; rreq->status.MPI_SOURCE = get_source(wc->tag); rreq->status.MPI_TAG = get_tag(wc->tag); MPIR_STATUS_SET_COUNT(rreq->status, wc->len); rreq->status.MPI_ERROR = MPI_SUCCESS; } else { REQ_OFI(rreq)->match_state = TSEARCH_NOT_FOUND; } END_FUNC(FCNAME); return mpi_errno; }
static int ADD_SUFFIX(peek_callback)(cq_tagged_entry_t * wc, MPIR_Request * rreq) { int mpi_errno = MPI_SUCCESS; BEGIN_FUNC(FCNAME); REQ_OFI(rreq)->match_state = PEEK_FOUND; #if API_SET == API_SET_1 rreq->status.MPI_SOURCE = get_source(wc->tag); #elif API_SET == API_SET_2 rreq->status.MPI_SOURCE = wc->data; #endif rreq->status.MPI_TAG = get_tag(wc->tag); MPIR_STATUS_SET_COUNT(rreq->status, wc->len); rreq->status.MPI_ERROR = MPI_SUCCESS; END_FUNC(FCNAME); return mpi_errno; }
int MPIO_Testall(int count, MPIO_Request requests[], int *flag, MPI_Status statuses[]) { int done, i, err; ROMIO_THREAD_CS_ENTER(); if (count == 1) { err = MPIO_Test( requests, flag, statuses ); goto fn_exit; } /* This is actually very difficult to do. We can't use MPIO_Test, since we must change the requests only if *ALL* requests are complete */ /* FIXME: THIS IS NOT CORRECT (see above). But most applications won't care */ done = 1; for (i=0; i<count; i++) { if (requests[i] != MPIO_REQUEST_NULL) { err = MPIO_Test( &requests[i], flag, &statuses[i] ); if (!*flag) done = 0; if (err) goto fn_exit; } else { #ifdef MPICH /* need to set empty status */ if (statuses != MPI_STATUSES_IGNORE) { statuses[i].MPI_SOURCE = MPI_ANY_SOURCE; statuses[i].MPI_TAG = MPI_ANY_TAG; MPIR_STATUS_SET_COUNT(statuses[i], 0); MPIR_STATUS_SET_CANCEL_BIT(statuses[i], 0); } #else ; #endif } } *flag = done; err = MPI_SUCCESS; fn_exit: ROMIO_THREAD_CS_EXIT(); return err; }
int MPIR_Status_set_elements_x_impl(MPI_Status *status, MPI_Datatype datatype, MPI_Count count) { int mpi_errno = MPI_SUCCESS; MPI_Count size_x; MPID_Datatype_get_size_macro(datatype, size_x); /* overflow check, should probably be a real error check? */ if (count != 0) { MPIU_Assert(size_x >= 0 && count > 0); MPIU_Assert(count * size_x < MPIR_COUNT_MAX); } MPIR_STATUS_SET_COUNT(*status, size_x * count); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3U_Request_unpack_srbuf(MPID_Request * rreq) { MPI_Aint last; int tmpbuf_last; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF); tmpbuf_last = (int)(rreq->dev.segment_first + rreq->dev.tmpbuf_sz); if (rreq->dev.segment_size < tmpbuf_last) { tmpbuf_last = (int)rreq->dev.segment_size; } last = tmpbuf_last; MPID_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.tmpbuf); if (last == 0 || last == rreq->dev.segment_first) { /* --BEGIN ERROR HANDLING-- */ /* If no data can be unpacked, then we have a datatype processing problem. Adjust the segment info so that the remaining data is received and thrown away. */ MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first); rreq->dev.segment_size = rreq->dev.segment_first; rreq->dev.segment_first += tmpbuf_last; rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); /* --END ERROR HANDLING-- */ } else if (tmpbuf_last == rreq->dev.segment_size) { /* --BEGIN ERROR HANDLING-- */ if (last != tmpbuf_last) { /* received data was not entirely consumed by unpack() because too few bytes remained to fill the next basic datatype. Note: the segment_first field is set to segment_last so that if this is a truncated message, extra data will be read off the pipe. */ MPIR_STATUS_SET_COUNT(rreq->status, last); rreq->dev.segment_size = last; rreq->dev.segment_first = tmpbuf_last; rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); } /* --END ERROR HANDLING-- */ } else { rreq->dev.tmpbuf_off = (int)(tmpbuf_last - last); if (rreq->dev.tmpbuf_off > 0) { /* move any remaining data to the beginning of the buffer. Note: memmove() is used since the data regions could overlap. */ memmove(rreq->dev.tmpbuf, (char *) rreq->dev.tmpbuf + (last - rreq->dev.segment_first), rreq->dev.tmpbuf_off); } rreq->dev.segment_first = last; } MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_SRBUF); return mpi_errno; }
MPID_Request * MPID_Request_create(void) { MPID_Request * req; MPIDI_STATE_DECL(MPID_STATE_MPID_REQUEST_CREATE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_REQUEST_CREATE); req = MPIU_Handle_obj_alloc(&MPID_Request_mem); if (req != NULL) { MPIU_DBG_MSG_P(CH3_CHANNEL,VERBOSE, "allocated request, handle=0x%08x", req->handle); #ifdef MPICH_DBG_OUTPUT /*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);*/ if (HANDLE_GET_MPI_KIND(req->handle) != MPID_REQUEST) { int mpi_errno; mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**invalid_handle", "**invalid_handle %d", req->handle); MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL); } #endif /* FIXME: This makes request creation expensive. We need to trim this to the basics, with additional setup for special-purpose requests (think base class and inheritance). For example, do we *really* want to set the kind to UNDEFINED? And should the RMA values be set only for RMA requests? */ MPIU_Object_set_ref(req, 1); req->kind = MPID_REQUEST_UNDEFINED; MPID_cc_set(&req->cc, 1); req->cc_ptr = &req->cc; /* FIXME: status fields meaningful only for receive, and even then should not need to be set. */ req->status.MPI_SOURCE = MPI_UNDEFINED; req->status.MPI_TAG = MPI_UNDEFINED; req->status.MPI_ERROR = MPI_SUCCESS; MPIR_STATUS_SET_COUNT(req->status, 0); MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE); req->comm = NULL; req->greq_fns = NULL; req->errflag = MPIR_ERR_NONE; req->dev.datatype_ptr = NULL; req->dev.segment_ptr = NULL; /* Masks and flags for channel device state in an MPID_Request */ req->dev.state = 0; req->dev.cancel_pending = FALSE; /* FIXME: RMA ops shouldn't need to be set except when creating a request for RMA operations */ req->dev.target_win_handle = MPI_WIN_NULL; req->dev.source_win_handle = MPI_WIN_NULL; req->dev.lock_queue_entry = NULL; req->dev.dtype_info = NULL; req->dev.dataloop = NULL; req->dev.iov_offset = 0; req->dev.flags = MPIDI_CH3_PKT_FLAG_NONE; req->dev.resp_request_handle = MPI_REQUEST_NULL; req->dev.user_buf = NULL; req->dev.OnDataAvail = NULL; req->dev.OnFinal = NULL; req->dev.user_buf = NULL; req->dev.drop_data = FALSE; req->dev.stream_offset = 0; #ifdef MPIDI_CH3_REQUEST_INIT MPIDI_CH3_REQUEST_INIT(req); #endif } else { /* FIXME: This fails to fail if debugging is turned off */ MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"unable to allocate a request"); } MPIDI_FUNC_EXIT(MPID_STATE_MPID_REQUEST_CREATE); return req; }
int MPIDI_CH3U_Request_load_recv_iov(MPID_Request * const rreq) { MPI_Aint last; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV); if (rreq->dev.segment_first < rreq->dev.segment_size) { /* still reading data that needs to go into the user buffer */ if (MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_ACCUM_RECV && MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_ACCUM_RECV && MPIDI_Request_get_srbuf_flag(rreq)) { MPIDI_msg_sz_t data_sz; MPIDI_msg_sz_t tmpbuf_sz; /* Once a SRBuf is in use, we continue to use it since a small amount of data may already be present at the beginning of the buffer. This data is left over from the previous unpack, most like a result of alignment issues. NOTE: we could force the use of the SRBuf only when (rreq->dev.tmpbuf_off > 0)... */ data_sz = rreq->dev.segment_size - rreq->dev.segment_first - rreq->dev.tmpbuf_off; MPIU_Assert(data_sz > 0); tmpbuf_sz = rreq->dev.tmpbuf_sz - rreq->dev.tmpbuf_off; if (data_sz > tmpbuf_sz) { data_sz = tmpbuf_sz; } rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *) rreq->dev.tmpbuf + rreq->dev.tmpbuf_off); rreq->dev.iov[0].MPID_IOV_LEN = data_sz; rreq->dev.iov_offset = 0; rreq->dev.iov_count = 1; MPIU_Assert(rreq->dev.segment_first + data_sz + rreq->dev.tmpbuf_off <= rreq->dev.recv_data_sz); if (rreq->dev.segment_first + data_sz + rreq->dev.tmpbuf_off == rreq->dev.recv_data_sz) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read the remaining data into the SRBuf"); rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufComplete; } else { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read more data into the SRBuf"); rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV; } goto fn_exit; } last = rreq->dev.segment_size; rreq->dev.iov_count = MPID_IOV_LIMIT; rreq->dev.iov_offset = 0; MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "pre-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d", rreq->dev.segment_first, last, rreq->dev.iov_count)); MPIU_Assert(rreq->dev.segment_first < last); MPIU_Assert(last > 0); MPID_Segment_unpack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, &rreq->dev.iov[0], &rreq->dev.iov_count); MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "post-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d, iov_offset=%lld", rreq->dev.segment_first, last, rreq->dev.iov_count, (long long)rreq->dev.iov_offset)); MPIU_Assert(rreq->dev.iov_count >= 0 && rreq->dev.iov_count <= MPID_IOV_LIMIT); /* --BEGIN ERROR HANDLING-- */ if (rreq->dev.iov_count == 0) { /* If the data can't be unpacked, the we have a mis-match between the datatype and the amount of data received. Adjust the segment info so that the remaining data is received and thrown away. */ rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first); rreq->dev.segment_size = rreq->dev.segment_first; mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq); goto fn_exit; } else { MPIU_Assert(rreq->dev.iov_offset < rreq->dev.iov_count); } /* --END ERROR HANDLING-- */ if (last == rreq->dev.recv_data_sz) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read the remaining data directly into the user buffer"); /* Eventually, use OnFinal for this instead */ rreq->dev.OnDataAvail = rreq->dev.OnFinal; } else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV || MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV || (last == rreq->dev.segment_size || (last - rreq->dev.segment_first) / rreq->dev.iov_count >= MPIDI_IOV_DENSITY_MIN)) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read more data directly into the user buffer"); rreq->dev.segment_first = last; rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV; } else { /* Too little data would have been received using an IOV. We will start receiving data into a SRBuf and unpacking it later. */ MPIU_Assert(MPIDI_Request_get_srbuf_flag(rreq) == FALSE); MPIDI_CH3U_SRBuf_alloc(rreq, rreq->dev.segment_size - rreq->dev.segment_first); rreq->dev.tmpbuf_off = 0; /* --BEGIN ERROR HANDLING-- */ if (rreq->dev.tmpbuf_sz == 0) { /* FIXME - we should drain the data off the pipe here, but we don't have a buffer to drain it into. should this be a fatal error? */ MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", "**nomem %d", rreq->dev.segment_size - rreq->dev.segment_first); rreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ /* fill in the IOV using a recursive call */ mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq); } } else { /* receive and toss any extra data that does not fit in the user's buffer */ MPIDI_msg_sz_t data_sz; data_sz = rreq->dev.recv_data_sz - rreq->dev.segment_first; if (!MPIDI_Request_get_srbuf_flag(rreq)) { MPIDI_CH3U_SRBuf_alloc(rreq, data_sz); /* --BEGIN ERROR HANDLING-- */ if (rreq->dev.tmpbuf_sz == 0) { MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); rreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ } if (data_sz <= rreq->dev.tmpbuf_sz) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read overflow data into the SRBuf and complete"); rreq->dev.iov[0].MPID_IOV_LEN = data_sz; MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV); /* Eventually, use OnFinal for this instead */ rreq->dev.OnDataAvail = rreq->dev.OnFinal; } else { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read overflow data into the SRBuf and reload IOV"); rreq->dev.iov[0].MPID_IOV_LEN = rreq->dev.tmpbuf_sz; rreq->dev.segment_first += rreq->dev.tmpbuf_sz; rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV; } rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rreq->dev.tmpbuf; rreq->dev.iov_count = 1; } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV); return mpi_errno; }
static inline int ADD_SUFFIX(MPID_nem_ofi_recv_callback)(cq_tagged_entry_t * wc, MPID_Request * rreq) { int err0, err1, src, mpi_errno = MPI_SUCCESS; uint64_t ssend_bits; MPIDI_msg_sz_t sz; MPIDI_VC_t *vc; MPID_Request *sync_req; BEGIN_FUNC(FCNAME); /* ---------------------------------------------------- */ /* Populate the MPI Status and unpack noncontig buffer */ /* ---------------------------------------------------- */ rreq->status.MPI_ERROR = MPI_SUCCESS; #if API_SET == API_SET_1 rreq->status.MPI_SOURCE = get_source(wc->tag); #elif API_SET == API_SET_2 rreq->status.MPI_SOURCE = wc->data; #endif src = rreq->status.MPI_SOURCE; rreq->status.MPI_TAG = get_tag(wc->tag); REQ_OFI(rreq)->req_started = 1; MPIR_STATUS_SET_COUNT(rreq->status, wc->len); if (REQ_OFI(rreq)->pack_buffer) { MPIDI_CH3U_Buffer_copy(REQ_OFI(rreq)->pack_buffer, MPIR_STATUS_GET_COUNT(rreq->status), MPI_BYTE, &err0, rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &sz, &err1); MPIR_STATUS_SET_COUNT(rreq->status, sz); MPIU_Free(REQ_OFI(rreq)->pack_buffer); if (err0 || err1) { rreq->status.MPI_ERROR = MPI_ERR_TYPE; } } if ((wc->tag & MPID_PROTOCOL_MASK) == MPID_SYNC_SEND) { /* ---------------------------------------------------- */ /* Ack the sync send and wait for the send request */ /* completion(when callback executed. A protocol bit */ /* MPID_SYNC_SEND_ACK is set in the tag bits to provide */ /* separation of MPI messages and protocol messages */ /* ---------------------------------------------------- */ vc = REQ_OFI(rreq)->vc; if (!vc) { /* MPI_ANY_SOURCE -- Post message from status, complete the VC */ vc = rreq->comm->dev.vcrt->vcr_table[src]; MPIU_Assert(vc); } #if API_SET == API_SET_1 ssend_bits = init_sendtag(rreq->dev.match.parts.context_id, rreq->comm->rank, rreq->status.MPI_TAG, MPID_SYNC_SEND_ACK); #elif API_SET == API_SET_2 ssend_bits = init_sendtag_2(rreq->dev.match.parts.context_id, rreq->status.MPI_TAG, MPID_SYNC_SEND_ACK); #endif MPID_nem_ofi_create_req(&sync_req, 1); sync_req->dev.OnDataAvail = NULL; sync_req->dev.next = NULL; REQ_OFI(sync_req)->event_callback = MPID_nem_ofi_sync_recv_callback; REQ_OFI(sync_req)->parent = rreq; #if API_SET == API_SET_1 FI_RC_RETRY(fi_tsend(gl_data.endpoint, #elif API_SET == API_SET_2 FI_RC_RETRY(fi_tsenddata(gl_data.endpoint, #endif NULL, 0, gl_data.mr, #if API_SET == API_SET_2 rreq->comm->rank, #endif VC_OFI(vc)->direct_addr, ssend_bits, &(REQ_OFI(sync_req)->ofi_context)), tsend); }
/* MSGQUEUE lock must be held by caller */ void MPIDI_Callback_process_unexp(MPID_Request *newreq, pami_context_t context, const MPIDI_MsgInfo * msginfo, size_t sndlen, pami_endpoint_t sender, const void * sndbuf, pami_recv_t * recv, unsigned isSync) { MPID_Request *rreq = NULL; /* ---------------------------------------------------- */ /* Fallback position: */ /* + Request was not posted, or */ /* + Request was long & not contiguous. */ /* We must allocate enough space to hold the message. */ /* The temporary buffer will be unpacked later. */ /* ---------------------------------------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_AEU(newreq, rank, tag, context_id); #else unsigned msg_seqno = msginfo->MPIseqno; rreq = MPIDI_Recvq_AEU(newreq, rank, PAMIX_Endpoint_query(sender), tag, context_id, msg_seqno); #endif /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, sndlen); MPIDI_Request_setCA (rreq, MPIDI_CA_COMPLETE); MPIDI_Request_cpyPeerRequestH(rreq, msginfo); MPIDI_Request_setSync (rreq, isSync); /* Set the rank of the sender if a sync msg. */ #ifndef OUT_OF_ORDER_HANDLING if (isSync) { #endif MPIDI_Request_setPeerRank_comm(rreq, rank); MPIDI_Request_setPeerRank_pami(rreq, PAMIX_Endpoint_query(sender)); #ifndef OUT_OF_ORDER_HANDLING } #endif MPID_assert(!sndlen || rreq->mpid.uebuf != NULL); TRACE_MEMSET_R(PAMIX_Endpoint_query(sender),msg_seqno,recv_status); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),msgid,msginfo->MPIseqno); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rtag,tag); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rctx,msginfo->MPIctxt); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rlen,sndlen); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),fl.f.sync,isSync); TRACE_SET_R_VAL(PAMIX_Endpoint_query(sender),(msginfo->MPIseqno & SEQMASK),rsource,PAMIX_Endpoint_query(sender)); TRACE_SET_REQ_VAL(rreq->mpid.idx,(msginfo->MPIseqno & SEQMASK)); if (recv != NULL) { recv->local_fn = MPIDI_RecvDoneCB_mutexed; recv->cookie = rreq; /* -------------------------------------------------- */ /* Let PAMI know where to put the rest of the data. */ /* -------------------------------------------------- */ recv->addr = rreq->mpid.uebuf; } else { /* ------------------------------------------------- */ /* We have the data; copy it and complete the msg. */ /* ------------------------------------------------- */ memcpy(rreq->mpid.uebuf, sndbuf, sndlen); MPIDI_RecvDoneCB(context, rreq, PAMI_SUCCESS); /* caller must release rreq, after unlocking MSGQUEUE */ } }
int MPIDI_CH3_PktHandler_EagerShortSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp ) { MPIDI_CH3_Pkt_eagershort_send_t * eagershort_pkt = &pkt->eagershort_send; MPID_Request * rreq; int found; int mpi_errno = MPI_SUCCESS; MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX); /* printf( "Receiving short eager!\n" ); fflush(stdout); */ MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received eagershort send pkt, rank=%d, tag=%d, context=%d", eagershort_pkt->match.parts.rank, eagershort_pkt->match.parts.tag, eagershort_pkt->match.parts.context_id)); MPIU_DBG_MSGPKT(vc,eagershort_pkt->match.parts.tag, eagershort_pkt->match.parts.context_id, eagershort_pkt->match.parts.rank,eagershort_pkt->data_sz, "ReceivedEagerShort"); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&eagershort_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPID_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } (rreq)->status.MPI_SOURCE = (eagershort_pkt)->match.parts.rank; (rreq)->status.MPI_TAG = (eagershort_pkt)->match.parts.tag; MPIR_STATUS_SET_COUNT((rreq)->status, (eagershort_pkt)->data_sz); (rreq)->dev.recv_data_sz = (eagershort_pkt)->data_sz; MPIDI_Request_set_seqnum((rreq), (eagershort_pkt)->seqnum); /* FIXME: Why do we set the message type? */ MPIDI_Request_set_msg_type((rreq), MPIDI_REQUEST_EAGER_MSG); /* This packed completes the reception of the indicated data. The packet handler returns null for a request that requires no further communication */ *rreqp = NULL; *buflen = sizeof(MPIDI_CH3_Pkt_t); /* Extract the data from the packet */ /* Note that if the data size if zero, we're already done */ if (rreq->dev.recv_data_sz > 0) { if (found) { int dt_contig; MPI_Aint dt_true_lb; MPIDI_msg_sz_t userbuf_sz; MPID_Datatype *dt_ptr; MPIDI_msg_sz_t data_sz; /* Make sure that we handle the general (non-contiguous) datatypes correctly while optimizing for the special case */ MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); if (rreq->dev.recv_data_sz <= userbuf_sz) { data_sz = rreq->dev.recv_data_sz; } else { MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "receive buffer too small; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", userbuf_sz=" MPIDI_MSG_SZ_FMT, rreq->dev.recv_data_sz, userbuf_sz)); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d %d %d", rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, rreq->dev.recv_data_sz, userbuf_sz ); MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz); data_sz = userbuf_sz; } if (dt_contig && data_sz == rreq->dev.recv_data_sz) { /* user buffer is contiguous and large enough to store the entire message. We can just copy the code */ /* Copy the payload. We could optimize this if data_sz & 0x3 == 0 (copy (data_sz >> 2) ints, inline that since data size is currently limited to 4 ints */ { unsigned char const * restrict p = (unsigned char *)eagershort_pkt->data; unsigned char * restrict bufp = (unsigned char *)(char*)(rreq->dev.user_buf) + dt_true_lb; int i; for (i=0; i<data_sz; i++) { *bufp++ = *p++; } } /* FIXME: We want to set the OnDataAvail to the appropriate function, which depends on whether this is an RMA request or a pt-to-pt request. */ rreq->dev.OnDataAvail = 0; /* The recv_pending_count must be one here (!) because of the way the pending count is queried. We may want to fix this, but it will require a sweep of the code */ } else {
/** * \brief The callback for a new RZV RTS * \note Because this is a short message, the data is already received * \param[in] context The context on which the message is being received. * \param[in] sender The origin endpoint * \param[in] _msginfo The extended header information * \param[in] msginfo_size The size of the extended header information * \param[in] is_zero_byte The rendezvous message is zero bytes in length. */ void MPIDI_RecvRzvCB_impl(pami_context_t context, pami_endpoint_t sender, const void * _msginfo, size_t msginfo_size, const unsigned is_zero_byte) { MPID_assert(_msginfo != NULL); MPID_assert(msginfo_size == sizeof(MPIDI_MsgEnvelope)); const MPIDI_MsgEnvelope * envelope = (const MPIDI_MsgEnvelope *)_msginfo; const MPIDI_MsgInfo * msginfo = (const MPIDI_MsgInfo *)&envelope->msginfo; MPID_Request * rreq = NULL; int found; pami_task_t source; #if TOKEN_FLOW_CONTROL int rettoks=0; #endif /* -------------------- */ /* Match the request. */ /* -------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; MPID_Request *newreq = MPIDI_Request_create2(); MPIU_THREAD_CS_ENTER(MSGQUEUE,0); source = PAMIX_Endpoint_query(sender); MPIDI_Receive_tokens(msginfo,source); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, tag, context_id, &found); #else rreq = MPIDI_Recvq_FDP_or_AEU(newreq, rank, source, tag, context_id, msginfo->MPIseqno, &found); #endif TRACE_ERR("RZV CB for req=%p remote-mr=0x%llx bytes=%zu (%sfound)\n", rreq, *(unsigned long long*)&envelope->envelope.memregion, envelope->envelope.length, found?"":"not "); /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, envelope->length); MPIDI_Request_setPeerRank_comm(rreq, rank); MPIDI_Request_setPeerRank_pami(rreq, source); MPIDI_Request_cpyPeerRequestH (rreq, msginfo); MPIDI_Request_setSync (rreq, msginfo->isSync); MPIDI_Request_setRzv (rreq, 1); /* ----------------------------------------------------- */ /* Save the rendezvous information for when the target */ /* node calls a receive function and the data is */ /* retreived from the origin node. */ /* ----------------------------------------------------- */ if (is_zero_byte) { rreq->mpid.envelope.length = 0; rreq->mpid.envelope.data = NULL; } else { #ifdef USE_PAMI_RDMA memcpy(&rreq->mpid.envelope.memregion, &envelope->memregion, sizeof(pami_memregion_t)); #else rreq->mpid.envelope.memregion_used = envelope->memregion_used; if(envelope->memregion_used) { memcpy(&rreq->mpid.envelope.memregion, &envelope->memregion, sizeof(pami_memregion_t)); } rreq->mpid.envelope.data = envelope->data; #endif rreq->mpid.envelope.length = envelope->length; TRACE_SET_R_VAL(source,(rreq->mpid.idx),req,rreq); TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,envelope->length); TRACE_SET_R_VAL(source,(rreq->mpid.idx),fl.f.sync,msginfo->isSync); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.rzv); if (TOKEN_FLOW_CONTROL_ON) { #if TOKEN_FLOW_CONTROL MPIDI_Must_return_tokens(context,source); #else MPID_assert_always(0); #endif } } /* ----------------------------------------- */ /* figure out target buffer for request data */ /* ----------------------------------------- */ if (found) { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivalsMatched); #endif /* --------------------------- */ /* if synchronized, post ack. */ /* --------------------------- */ if (unlikely(MPIDI_Request_isSync(rreq))) MPIDI_SyncAck_post(context, rreq, MPIDI_Request_getPeerRank_pami(rreq)); MPIU_THREAD_CS_EXIT(MSGQUEUE,0); if (is_zero_byte) MPIDI_RecvRzvDoneCB_zerobyte(context, rreq, PAMI_SUCCESS); else { MPIDI_RendezvousTransfer(context, rreq); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.sync_com_in_HH); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.matchedInHH); TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf); } MPID_Request_discard(newreq); } /* ------------------------------------------------------------- */ /* Request was not posted. */ /* ------------------------------------------------------------- */ else { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivals); #endif /* * This is to test that the fields don't need to be * initialized. Remove after this doesn't fail for a while. */ MPID_assert(rreq->mpid.uebuf == NULL); MPID_assert(rreq->mpid.uebuflen == 0); /* rreq->mpid.uebuf = NULL; */ /* rreq->mpid.uebuflen = 0; */ #ifdef OUT_OF_ORDER_HANDLING if (MPIDI_In_cntr[source].n_OutOfOrderMsgs > 0) { MPIDI_Recvq_process_out_of_order_msgs(source, context); } #endif MPIU_THREAD_CS_EXIT(MSGQUEUE,0); } /* ---------------------------------------- */ /* Signal that the recv has been started. */ /* ---------------------------------------- */ MPIDI_Progress_signal(); }
int MPIDI_CH3U_Receive_data_found(MPIR_Request *rreq, void *buf, intptr_t *buflen, int *complete) { int dt_contig; MPI_Aint dt_true_lb; intptr_t userbuf_sz; MPIR_Datatype * dt_ptr = NULL; intptr_t data_sz; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND); MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found"); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); if (rreq->dev.recv_data_sz <= userbuf_sz) { data_sz = rreq->dev.recv_data_sz; } else { MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "receive buffer too small; message truncated, msg_sz=%" PRIdPTR ", userbuf_sz=%" PRIdPTR, rreq->dev.recv_data_sz, userbuf_sz)); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d %d %d", rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, rreq->dev.recv_data_sz, userbuf_sz ); MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz); data_sz = userbuf_sz; } if (dt_contig && data_sz == rreq->dev.recv_data_sz) { /* user buffer is contiguous and large enough to store the entire message. However, we haven't yet *read* the data (this code describes how to read the data into the destination) */ /* if all of the data has already been received, unpack it now, otherwise build an iov and let the channel unpack */ if (*buflen >= data_sz) { MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"Copying contiguous data to user buffer"); /* copy data out of the receive buffer */ if (rreq->dev.drop_data == FALSE) { MPIR_Memcpy((char*)(rreq->dev.user_buf) + dt_true_lb, buf, data_sz); } *buflen = data_sz; *complete = TRUE; } else { MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for contiguous read"); rreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)((char*)(rreq->dev.user_buf) + dt_true_lb); rreq->dev.iov[0].MPL_IOV_LEN = data_sz; rreq->dev.iov_count = 1; *buflen = 0; *complete = FALSE; } /* Trigger OnFinal when receiving the last segment */ rreq->dev.OnDataAvail = rreq->dev.OnFinal; } else { /* user buffer is not contiguous or is too small to hold the entire message */ rreq->dev.segment_ptr = MPIR_Segment_alloc( ); MPIR_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Segment_alloc"); MPIR_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, rreq->dev.segment_ptr); rreq->dev.segment_first = 0; rreq->dev.segment_size = data_sz; /* if all of the data has already been received, and the message is not truncated, unpack it now, otherwise build an iov and let the channel unpack */ if (data_sz == rreq->dev.recv_data_sz && *buflen >= data_sz) { intptr_t last; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"Copying noncontiguous data to user buffer"); last = data_sz; MPIR_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, buf); /* --BEGIN ERROR HANDLING-- */ if (last != data_sz) { /* If the data can't be unpacked, the we have a mismatch between the datatype and the amount of data received. Throw away received data. */ MPIR_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch"); MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first); *buflen = data_sz; *complete = TRUE; /* FIXME: Set OnDataAvail to 0? If not, why not? */ goto fn_exit; } /* --END ERROR HANDLING-- */ *buflen = data_sz; /* Trigger OnFinal when receiving the last segment */ rreq->dev.OnDataAvail = rreq->dev.OnFinal; *complete = TRUE; } else { MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for non-contiguous read"); mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|loadrecviov"); } *buflen = 0; *complete = FALSE; } } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_FOUND); return mpi_errno; fn_fail: goto fn_exit; }
static int _mxm_handle_rreq(MPID_Request * req) { int complete = FALSE, found = FALSE; int dt_contig; MPI_Aint dt_true_lb ATTRIBUTE((unused)); MPIDI_msg_sz_t userbuf_sz; MPID_Datatype *dt_ptr; MPIDI_msg_sz_t data_sz; MPID_nem_mxm_vc_area *vc_area ATTRIBUTE((unused)) = NULL; MPID_nem_mxm_req_area *req_area = NULL; void *tmp_buf = NULL; MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX); found = MPIDI_CH3U_Recvq_DP(req); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_MSGQ_MUTEX); /* an MPI_ANY_SOURCE request may have been previously removed from the * CH3 queue by an FDP (find and dequeue posted) operation */ if (req->dev.match.parts.rank != MPI_ANY_SOURCE) { MPIU_Assert(found); } MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); vc_area = VC_BASE(req->ch.vc); req_area = REQ_BASE(req); _dbg_mxm_out_buf(req_area->iov_buf[0].ptr, (req_area->iov_buf[0].length > 16 ? 16 : req_area->iov_buf[0].length)); if (req->dev.recv_data_sz <= userbuf_sz) { data_sz = req->dev.recv_data_sz; if (req->status.MPI_ERROR == MPI_ERR_TRUNCATE) { req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d %d %d", req->status.MPI_SOURCE, req->status.MPI_TAG, req->dev.recv_data_sz, userbuf_sz); } } else { data_sz = userbuf_sz; MPIR_STATUS_SET_COUNT(req->status, userbuf_sz); MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE, (MPIU_DBG_FDEST, "receive buffer too small; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", userbuf_sz=" MPIDI_MSG_SZ_FMT, req->dev.recv_data_sz, userbuf_sz)); req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d %d %d", req->status.MPI_SOURCE, req->status.MPI_TAG, req->dev.recv_data_sz, userbuf_sz); } if (!dt_contig) { MPIDI_msg_sz_t last = 0; if (req->dev.tmpbuf != NULL) { last = req->dev.recv_data_sz; MPID_Segment_unpack(req->dev.segment_ptr, 0, &last, req->dev.tmpbuf); tmp_buf = req->dev.tmpbuf; } else { mxm_req_buffer_t *iov_buf; MPL_IOV *iov; int n_iov = 0; int index; last = req->dev.recv_data_sz; n_iov = req_area->iov_count; iov_buf = req_area->iov_buf; if (last && n_iov > 0) { iov = MPIU_Malloc(n_iov * sizeof(*iov)); MPIU_Assert(iov); for (index = 0; index < n_iov; index++) { iov[index].MPL_IOV_BUF = iov_buf[index].ptr; iov[index].MPL_IOV_LEN = iov_buf[index].length; } MPID_Segment_unpack_vector(req->dev.segment_ptr, req->dev.segment_first, &last, iov, &n_iov); MPIU_Free(iov); } if (req_area->iov_count > MXM_MPICH_MAX_IOV) { tmp_buf = req_area->iov_buf; req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 0; } } if (last != data_sz) { MPIR_STATUS_SET_COUNT(req->status, last); if (req->dev.recv_data_sz <= userbuf_sz) { /* If the data can't be unpacked, the we have a * mismatch between the datatype and the amount of * data received. Throw away received data. */ MPIR_ERR_SETSIMPLE(req->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch"); } } } MPIDI_CH3U_Handle_recv_req(req->ch.vc, req, &complete); MPIU_Assert(complete == TRUE); if (tmp_buf) MPIU_Free(tmp_buf); return complete; }
int MPIDI_CH3U_Post_data_receive_found(MPIR_Request * rreq) { int mpi_errno = MPI_SUCCESS; int dt_contig; MPI_Aint dt_true_lb; intptr_t userbuf_sz; MPIR_Datatype * dt_ptr = NULL; intptr_t data_sz; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND); MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found"); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); if (rreq->dev.recv_data_sz <= userbuf_sz) { data_sz = rreq->dev.recv_data_sz; } else { MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "receive buffer too small; message truncated, msg_sz=%" PRIdPTR ", userbuf_sz=%" PRIdPTR, rreq->dev.recv_data_sz, userbuf_sz)); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d %d %d", rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, rreq->dev.recv_data_sz, userbuf_sz ); MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz); data_sz = userbuf_sz; } if (dt_contig && data_sz == rreq->dev.recv_data_sz) { /* user buffer is contiguous and large enough to store the entire message. However, we haven't yet *read* the data (this code describes how to read the data into the destination) */ MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for contiguous read"); rreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)((char*)(rreq->dev.user_buf) + dt_true_lb); rreq->dev.iov[0].MPL_IOV_LEN = data_sz; rreq->dev.iov_count = 1; /* FIXME: We want to set the OnDataAvail to the appropriate function, which depends on whether this is an RMA request or a pt-to-pt request. */ rreq->dev.OnDataAvail = 0; } else { /* user buffer is not contiguous or is too small to hold the entire message */ MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"IOV loaded for non-contiguous read"); rreq->dev.segment_ptr = MPIR_Segment_alloc( ); MPIR_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIR_Segment_alloc"); MPIR_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, rreq->dev.segment_ptr); rreq->dev.segment_first = 0; rreq->dev.segment_size = data_sz; mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETFATALANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|loadrecviov"); } } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_POST_DATA_RECEIVE_FOUND); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3U_Request_unpack_uebuf(MPID_Request * rreq) { int dt_contig; MPI_Aint dt_true_lb; MPIDI_msg_sz_t userbuf_sz; MPID_Datatype * dt_ptr; MPIDI_msg_sz_t unpack_sz; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF); MPIDI_STATE_DECL(MPID_STATE_MEMCPY); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); if (rreq->dev.recv_data_sz <= userbuf_sz) { unpack_sz = rreq->dev.recv_data_sz; } else { /* --BEGIN ERROR HANDLING-- */ MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "receive buffer overflow; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", buf_sz=" MPIDI_MSG_SZ_FMT, rreq->dev.recv_data_sz, userbuf_sz)); unpack_sz = userbuf_sz; MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", rreq->dev.recv_data_sz, userbuf_sz); /* --END ERROR HANDLING-- */ } if (unpack_sz > 0) { if (dt_contig) { /* TODO - check that amount of data is consistent with datatype. In other words, if we were to use Segment_unpack() would last = unpack? If not we should return an error (unless configured with --enable-fast) */ MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY); MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, rreq->dev.tmpbuf, unpack_sz); MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY); } else { MPID_Segment seg; MPI_Aint last; MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0); last = unpack_sz; MPID_Segment_unpack(&seg, 0, &last, rreq->dev.tmpbuf); if (last != unpack_sz) { /* --BEGIN ERROR HANDLING-- */ /* received data was not entirely consumed by unpack() because too few bytes remained to fill the next basic datatype */ MPIR_STATUS_SET_COUNT(rreq->status, last); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); /* --END ERROR HANDLING-- */ } } } MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF); return mpi_errno; }
static inline void MPIDI_RecvShortCB(pami_context_t context, const void * _msginfo, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, unsigned isSync) { MPID_assert(_msginfo != NULL); const MPIDI_MsgInfo *msginfo = (const MPIDI_MsgInfo *)_msginfo; MPID_Request * rreq = NULL; pami_task_t source; #if TOKEN_FLOW_CONTROL int rettoks=0; #endif /* -------------------- */ /* Match the request. */ /* -------------------- */ unsigned rank = msginfo->MPIrank; unsigned tag = msginfo->MPItag; unsigned context_id = msginfo->MPIctxt; MPIU_THREAD_CS_ENTER(MSGQUEUE,0); source = PAMIX_Endpoint_query(sender); MPIDI_Receive_tokens(msginfo,source); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP(rank, tag, context_id); #else rreq = MPIDI_Recvq_FDP(rank, source, tag, context_id, msginfo->MPIseqno); #endif /* Match not found */ if (unlikely(rreq == NULL)) { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivals); #endif MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request *newreq = MPIDI_Request_create2(); MPID_assert(newreq != NULL); if (sndlen) { newreq->mpid.uebuflen = sndlen; if (!TOKEN_FLOW_CONTROL_ON) { newreq->mpid.uebuf = MPL_malloc(sndlen); newreq->mpid.uebuf_malloc = mpiuMalloc; } else { #if TOKEN_FLOW_CONTROL MPIU_THREAD_CS_ENTER(MSGQUEUE,0); newreq->mpid.uebuf = MPIDI_mm_alloc(sndlen); newreq->mpid.uebuf_malloc = mpidiBufMM; MPIU_THREAD_CS_EXIT(MSGQUEUE,0); #else MPID_assert_always(0); #endif } MPID_assert(newreq->mpid.uebuf != NULL); } MPIU_THREAD_CS_ENTER(MSGQUEUE,0); #ifndef OUT_OF_ORDER_HANDLING rreq = MPIDI_Recvq_FDP(rank, tag, context_id); #else rreq = MPIDI_Recvq_FDP(rank, PAMIX_Endpoint_query(sender), tag, context_id, msginfo->MPIseqno); #endif if (unlikely(rreq == NULL)) { MPIDI_Callback_process_unexp(newreq, context, msginfo, sndlen, sender, sndbuf, NULL, isSync); /* request is always complete now */ if (TOKEN_FLOW_CONTROL_ON && sndlen) { #if TOKEN_FLOW_CONTROL MPIDI_Token_cntr[source].unmatched++; #else MPID_assert_always(0); #endif } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request_release(newreq); goto fn_exit_short; } else { MPIU_THREAD_CS_EXIT(MSGQUEUE,0); MPID_Request_discard(newreq); } } else { #if (MPIDI_STATISTICS) MPID_NSTAT(mpid_statp->earlyArrivalsMatched); #endif if (TOKEN_FLOW_CONTROL_ON && sndlen) { #if TOKEN_FLOW_CONTROL MPIDI_Update_rettoks(source); MPIDI_Must_return_tokens(context,source); #else MPID_assert_always(0); #endif } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); } /* the receive queue processing has been completed and we found match*/ /* ---------------------- */ /* Copy in information. */ /* ---------------------- */ rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; MPIR_STATUS_SET_COUNT(rreq->status, sndlen); MPIDI_Request_setCA (rreq, MPIDI_CA_COMPLETE); MPIDI_Request_cpyPeerRequestH(rreq, msginfo); MPIDI_Request_setSync (rreq, isSync); MPIDI_Request_setRzv (rreq, 0); /* ----------------------------- */ /* Request was already posted. */ /* ----------------------------- */ if (unlikely(isSync)) MPIDI_SyncAck_post(context, rreq, PAMIX_Endpoint_query(sender)); if (unlikely(HANDLE_GET_KIND(rreq->mpid.datatype) != HANDLE_KIND_BUILTIN)) { MPIDI_Callback_process_userdefined_dt(context, sndbuf, sndlen, rreq); goto fn_exit_short; } size_t dt_size = rreq->mpid.userbufcount * MPID_Datatype_get_basic_size(rreq->mpid.datatype); /* ----------------------------- */ /* Test for truncated message. */ /* ----------------------------- */ if (unlikely(sndlen > dt_size)) { #if ASSERT_LEVEL > 0 MPIDI_Callback_process_trunc(context, rreq, NULL, sndbuf); goto fn_exit_short; #else sndlen = dt_size; #endif } MPID_assert(rreq->mpid.uebuf == NULL); MPID_assert(rreq->mpid.uebuflen == 0); void* rcvbuf = rreq->mpid.userbuf; if (sndlen > 0) { #if CUDA_AWARE_SUPPORT if(MPIDI_Process.cuda_aware_support_on && MPIDI_cuda_is_device_buf(rcvbuf)) { cudaError_t cudaerr = CudaMemcpy(rcvbuf, sndbuf, (size_t)sndlen, cudaMemcpyHostToDevice); } else #endif memcpy(rcvbuf, sndbuf, sndlen); } TRACE_SET_R_VAL(source,(rreq->mpid.idx),rlen,sndlen); TRACE_SET_R_BIT(source,(rreq->mpid.idx),fl.f.comp_in_HH); TRACE_SET_R_VAL(source,(rreq->mpid.idx),bufadd,rreq->mpid.userbuf); MPIDI_Request_complete(rreq); fn_exit_short: #ifdef OUT_OF_ORDER_HANDLING MPIU_THREAD_CS_ENTER(MSGQUEUE,0); if (MPIDI_In_cntr[source].n_OutOfOrderMsgs>0) { MPIDI_Recvq_process_out_of_order_msgs(source, context); } MPIU_THREAD_CS_EXIT(MSGQUEUE,0); #endif /* ---------------------------------------- */ /* Signal that the recv has been started. */ /* ---------------------------------------- */ MPIDI_Progress_signal(); }
int MPIDI_CH3_RecvFromSelf( MPID_Request *rreq, void *buf, MPI_Aint count, MPI_Datatype datatype ) #endif { MPID_Request * const sreq = rreq->partner_request; int mpi_errno = MPI_SUCCESS; if (sreq != NULL) { MPIDI_msg_sz_t data_sz; #if defined(FINEGRAIN_MPI) /* FG: Zerocopy */ void * buf = (void *) (*buf_handle); if ( MPIDI_Request_get_self_zerocopy_flag(sreq) && MPIDI_Request_get_self_zerocopy_flag(rreq) ) { int rdt_contig; MPI_Aint rdt_true_lb; MPID_Datatype * rdt_ptr; /* Unexpected Send-Collocated MPIX_Zsend/Izsend - MPIX_Zrecv/Izrecv pairing */ MPIU_Assert(NULL == rreq->dev.user_buf); *(rreq->dev.user_buf_handle) = (void*) (*(sreq->dev.user_buf_handle)); MPIDI_Datatype_get_info(count, datatype, rdt_contig, data_sz, rdt_ptr, rdt_true_lb); /* MPIX_Zsend buf_handle can't be set to NULL as we don't have a ptr to void **. */ } else if( MPIDI_Request_get_self_zerocopy_flag(sreq) && !MPIDI_Request_get_self_zerocopy_flag(rreq) ){ /* Unexpected Send-Collocated MPIX_Zsend/Izsend<=>MPI_Recv/Irecv pairing. Freeing sender buffer */ MPIDI_CH3U_Buffer_copy(*(sreq->dev.user_buf_handle), sreq->dev.user_count, sreq->dev.datatype, &sreq->status.MPI_ERROR, buf, count, datatype, &data_sz, &rreq->status.MPI_ERROR); /* Free the sender's buffer */ MPIU_Free(*(sreq->dev.user_buf_handle)); } else if( !MPIDI_Request_get_self_zerocopy_flag(sreq) && MPIDI_Request_get_self_zerocopy_flag(rreq) ){ /* Unexpected Send-Collocated MPI_Send/Isend - MPIX_Zrecv/Izrecv pairing. Allocating receiver's buffer. */ MPIU_Assert(NULL == rreq->dev.user_buf); /* Added checks for buffer count size as is done in MPIDI_CH3U_Buffer_copy() */ MPIDI_CH3U_Buffer_allocate(sreq->dev.user_buf, sreq->dev.user_count, sreq->dev.datatype, &sreq->status.MPI_ERROR, rreq->dev.user_buf_handle, rreq->dev.user_count, rreq->dev.datatype, &data_sz, &rreq->status.MPI_ERROR); MPIDI_CH3U_Buffer_copy(sreq->dev.user_buf, sreq->dev.user_count, sreq->dev.datatype, &sreq->status.MPI_ERROR, *(rreq->dev.user_buf_handle), rreq->dev.user_count, rreq->dev.datatype, &data_sz, &rreq->status.MPI_ERROR); } else { /* Unexpected Send-Collocated MPI_Send/Isend - MPI_Recv/Irecv pairing */ #endif /* matches #if defined(FINEGRAIN_MPI) */ MPIDI_CH3U_Buffer_copy(sreq->dev.user_buf, sreq->dev.user_count, sreq->dev.datatype, &sreq->status.MPI_ERROR, buf, count, datatype, &data_sz, &rreq->status.MPI_ERROR); #if defined(FINEGRAIN_MPI) } #endif MPIR_STATUS_SET_COUNT(rreq->status, data_sz); mpi_errno = MPID_Request_complete(sreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } else { /* The sreq is missing which means an error occurred. rreq->status.MPI_ERROR should have been set when the error was detected. */ } /* no other thread can possibly be waiting on rreq, so it is safe to reset ref_count and cc */ mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }