int MPIDI_CH3U_Receive_data_unexpected(MPIR_Request * rreq, void *buf, intptr_t *buflen, int *complete) { int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED); /* FIXME: to improve performance, allocate temporary buffer from a specialized buffer pool. */ /* FIXME: to avoid memory exhaustion, integrate buffer pool management with flow control */ MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"unexpected request allocated"); rreq->dev.tmpbuf = MPL_malloc(rreq->dev.recv_data_sz, MPL_MEM_BUFFER); if (!rreq->dev.tmpbuf) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %d", rreq->dev.recv_data_sz); } rreq->dev.tmpbuf_sz = rreq->dev.recv_data_sz; /* if all of the data has already been received, copy it now, otherwise build an iov and let the channel copy it */ if (rreq->dev.recv_data_sz <= *buflen) { MPIR_Memcpy(rreq->dev.tmpbuf, buf, rreq->dev.recv_data_sz); *buflen = rreq->dev.recv_data_sz; rreq->dev.recv_pending_count = 1; *complete = TRUE; } else { rreq->dev.iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)((char *)rreq->dev.tmpbuf); rreq->dev.iov[0].MPL_IOV_LEN = rreq->dev.recv_data_sz; rreq->dev.iov_count = 1; rreq->dev.recv_pending_count = 2; *buflen = 0; *complete = FALSE; } if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG) MPIR_T_PVAR_LEVEL_INC(RECVQ, unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackUEBufComplete; fn_fail: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED); return mpi_errno; }
int MPID_Cancel_send(MPID_Request * sreq) { MPIDI_VC_t * vc; int proto; int flag; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_SEND); MPIU_Assert(sreq->kind == MPID_REQUEST_SEND); MPIDI_Request_cancel_pending(sreq, &flag); if (flag) { goto fn_exit; } /* * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer * and no pointer to the underlying communication * request. For now, we simply fail to cancel the request. In the future, * we should add a new request kind to indicate that * the request is a BSEND. Then we can properly cancel the request, much * in the way we do persistent requests. */ if (sreq->comm == NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc); proto = MPIDI_Request_get_msg_type(sreq); if (proto == MPIDI_REQUEST_SELF_MSG) { MPID_Request * rreq; MPIU_DBG_MSG(CH3_OTHER,VERBOSE, "attempting to cancel message sent to self"); MPIU_THREAD_CS_ENTER(MSGQUEUE,); rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match); MPIU_THREAD_CS_EXIT(MSGQUEUE,); if (rreq) { MPIU_Assert(rreq->partner_request == sreq); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); MPIU_Object_set_ref(rreq, 0); MPIDI_CH3_Request_destroy(rreq); sreq->status.cancelled = TRUE; /* no other thread should be waiting on sreq, so it is safe to reset ref_count and cc */ MPID_cc_set(&sreq->cc, 0); /* FIXME should be a decr and assert, not a set */ MPIU_Object_set_ref(sreq, 1); } else { sreq->status.cancelled = FALSE; MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); } goto fn_exit; }
MPID_Request * MPIDI_CH3U_Recvq_FDU_or_AEP(int source, int tag, int context_id, MPID_Comm *comm, void *user_buf, int user_count, MPI_Datatype datatype, int * foundp) { MPID_Time_t timer_start; int found; MPID_Request *rreq, *prev_rreq; MPIDI_Message_match match; MPIDI_Message_match mask; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); MPIU_THREAD_CS_ASSERT_HELD(MSGQUEUE); /* Store how much time is spent traversing the queue */ MPIR_T_START_TIMER(RECVQ_STATISTICS, timer_start); /* Optimize this loop for an empty unexpected receive queue */ rreq = recvq_unexpected_head; if (rreq) { prev_rreq = NULL; match.parts.context_id = context_id; match.parts.tag = tag; match.parts.rank = source; if (tag != MPI_ANY_TAG && source != MPI_ANY_SOURCE) { do { MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts); if (MATCH_WITH_NO_MASK(rreq->dev.match, match)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen); if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG) MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } else { mask.parts.context_id = mask.parts.rank = mask.parts.tag = ~0; if (tag == MPI_ANY_TAG) match.parts.tag = mask.parts.tag = 0; if (source == MPI_ANY_SOURCE) match.parts.rank = mask.parts.rank = 0; do { MPIR_T_INC(RECVQ_STATISTICS, unexpected_recvq_match_attempts); if (MATCH_WITH_LEFT_MASK(rreq->dev.match, match, mask)) { if (prev_rreq != NULL) { prev_rreq->dev.next = rreq->dev.next; } else { recvq_unexpected_head = rreq->dev.next; } if (rreq->dev.next == NULL) { recvq_unexpected_tail = prev_rreq; } MPIR_T_DEC(RECVQ_STATISTICS, unexpected_qlen); if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG) MPIR_T_SUBTRACT(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; found = TRUE; goto lock_exit; } prev_rreq = rreq; rreq = rreq->dev.next; } while (rreq); } } MPIR_T_END_TIMER(RECVQ_STATISTICS, timer_start, time_matching_unexpectedq); /* A matching request was not found in the unexpected queue, so we need to allocate a new request and add it to the posted queue */ { int mpi_errno = MPI_SUCCESS; found = FALSE; MPIDI_Request_create_rreq( rreq, mpi_errno, goto lock_exit ); rreq->dev.match.parts.tag = tag; rreq->dev.match.parts.rank = source; rreq->dev.match.parts.context_id = context_id; /* Added a mask for faster search on 64-bit capable * platforms */ rreq->dev.mask.parts.context_id = ~0; if (rreq->dev.match.parts.rank == MPI_ANY_SOURCE) rreq->dev.mask.parts.rank = 0; else rreq->dev.mask.parts.rank = ~0; if (rreq->dev.match.parts.tag == MPI_ANY_TAG) rreq->dev.mask.parts.tag = 0; else rreq->dev.mask.parts.tag = ~0; rreq->comm = comm; MPIR_Comm_add_ref(comm); rreq->dev.user_buf = user_buf; rreq->dev.user_count = user_count; rreq->dev.datatype = datatype; /* check whether VC has failed, or this is an ANY_SOURCE in a failed communicator */ if (source != MPI_ANY_SOURCE) { MPIDI_VC_t *vc; MPIDI_Comm_get_vc(comm, source, &vc); if (vc->state == MPIDI_VC_STATE_MORIBUND) { MPIU_ERR_SET1(mpi_errno, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail", "**comm_fail %d", vc->pg_rank); rreq->status.MPI_ERROR = mpi_errno; MPIDI_CH3U_Request_complete(rreq); goto lock_exit; } } else if (!MPIDI_CH3I_Comm_AS_enabled(comm)) { MPIU_ERR_SET(mpi_errno, MPIX_ERR_PROC_FAIL_STOP, "**comm_fail"); rreq->status.MPI_ERROR = mpi_errno; MPIDI_CH3U_Request_complete(rreq); goto lock_exit; } rreq->dev.next = NULL; if (recvq_posted_tail != NULL) { recvq_posted_tail->dev.next = rreq; } else { recvq_posted_head = rreq; } recvq_posted_tail = rreq; MPIR_T_INC(RECVQ_STATISTICS, posted_qlen); MPIDI_POSTED_RECV_ENQUEUE_HOOK(rreq); } lock_exit: *foundp = found; /* If a match was not found, the timer was stopped after the traversal */ if (found) MPIR_T_END_TIMER(RECVQ_STATISTICS, timer_start, time_matching_unexpectedq); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDU_OR_AEP); return rreq; }
int MPID_Cancel_send(MPIR_Request * sreq) { MPIDI_VC_t * vc; int proto; int flag; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_SEND); MPIR_Assert(sreq->kind == MPIR_REQUEST_KIND__SEND); MPIDI_Request_cancel_pending(sreq, &flag); if (flag) { goto fn_exit; } /* * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer * and no pointer to the underlying communication * request. For now, we simply fail to cancel the request. In the future, * we should add a new request kind to indicate that * the request is a BSEND. Then we can properly cancel the request, much * in the way we do persistent requests. */ if (sreq->comm == NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc); proto = MPIDI_Request_get_msg_type(sreq); if (proto == MPIDI_REQUEST_SELF_MSG) { MPIR_Request * rreq; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE, "attempting to cancel message sent to self"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); if (rreq) { MPIR_Assert(rreq->dev.partner_request == sreq); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); /* Pull the message out of the unexpected queue since it's * being cancelled. The below request release drops one * reference. We explicitly drop a second reference, * because the receive request will never be visible to * the user. */ MPIR_Request_free(rreq); MPIR_Request_free(rreq); MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); mpi_errno = MPID_Request_complete(sreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } else { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); } goto fn_exit; } /* If the message went over a netmod and it provides a cancel_send function, call it here. */ #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->cancel_send) { mpi_errno = vc->comm_ops->cancel_send(vc, sreq); goto fn_exit; } #endif /* Check to see if the send is still in the send queue. If so, remove it, mark the request and cancelled and complete, and release the device's reference to the request object. */ { int cancelled; if (proto == MPIDI_REQUEST_RNDV_MSG) { MPIR_Request * rts_sreq; /* The cancellation of the RTS request needs to be atomic through the destruction of the RTS request to avoid conflict with release of the RTS request if the CTS is received (see handling of a rendezvous CTS packet in MPIDI_CH3U_Handle_recv_pkt()). MPID_Request_fetch_and_clear_rts_sreq() is used to gurantee that atomicity. */ MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq); if (rts_sreq != NULL) { cancelled = FALSE; /* since we attempted to cancel a RTS request, then we are responsible for releasing that request */ MPIR_Request_free(rts_sreq); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER, "**ch3|cancelrndv", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ if (cancelled) { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); /* no other thread should be waiting on sreq, so it is safe to reset ref_count and cc */ MPIR_cc_set(&sreq->cc, 0); /* FIXME should be a decr and assert, not a set */ MPIR_Object_set_ref(sreq, 1); goto fn_exit; } } } else { cancelled = FALSE; if (cancelled) { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); /* no other thread should be waiting on sreq, so it is safe to reset ref_count and cc */ MPIR_cc_set(&sreq->cc, 0); /* FIXME should be a decr and assert, not a set */ MPIR_Object_set_ref(sreq, 1); goto fn_exit; } } } /* Part or all of the message has already been sent, so we need to send a cancellation request to the receiver in an attempt to catch the message before it is matched. */ { int was_incomplete; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_cancel_send_req_t * const csr_pkt = &upkt.cancel_send_req; MPIR_Request * csr_sreq; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "sending cancel request to %d for 0x%08x", sreq->dev.match.parts.rank, sreq->handle)); /* The completion counter and reference count are incremented to keep the request around long enough to receive a response regardless of what the user does (free the request before waiting, etc.). */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); if (!was_incomplete) { /* The reference count is incremented only if the request was complete before the increment. */ MPIR_Request_add_ref( sreq ); } MPIDI_Pkt_init(csr_pkt, MPIDI_CH3_PKT_CANCEL_SEND_REQ); csr_pkt->match.parts.rank = sreq->comm->rank; csr_pkt->match.parts.tag = sreq->dev.match.parts.tag; csr_pkt->match.parts.context_id = sreq->dev.match.parts.context_id; csr_pkt->sender_req_id = sreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, csr_pkt, sizeof(*csr_pkt), &csr_sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|cancelreq"); } if (csr_sreq != NULL) { MPIR_Request_free(csr_sreq); } } /* FIXME: if send cancellation packets are allowed to arrive out-of-order with respect to send packets, then we need to timestamp send and cancel packets to insure that a cancellation request does not bypass the send packet to be cancelled and erroneously cancel a previously sent message with the same request handle. */ /* FIXME: A timestamp is more than is necessary; a message sequence number should be adequate. */ fn_fail: fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_SEND); return mpi_errno; }