int MPID_nem_ptl_recv_posted(MPIDI_VC_t *vc, MPIR_Request *rreq) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); ptl_me_t me; int dt_contig; intptr_t data_sz; MPIDU_Datatype*dt_ptr; MPI_Aint dt_true_lb; MPI_Aint last; ptl_process_t id_any; int ret; MPIR_CHKPMEM_DECL(1); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_PTL_RECV_POSTED); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_PTL_RECV_POSTED); id_any.phys.nid = PTL_NID_ANY; id_any.phys.pid = PTL_PID_ANY; MPID_nem_ptl_init_req(rreq); me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_USE_ONCE ); if (vc == NULL) { /* MPI_ANY_SOURCE receive */ me.match_id = id_any; } else { if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } me.match_id = vc_ptl->id; } MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "tag=%#x ctx=%#x rank=%#x", rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id, rreq->dev.match.parts.rank)); me.match_bits = NPTL_MATCH(rreq->dev.match.parts.tag, rreq->dev.match.parts.context_id, rreq->dev.match.parts.rank); if (rreq->dev.match.parts.tag == MPI_ANY_TAG) me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG; else me.ignore_bits = NPTL_MATCH_IGNORE; me.min_free = 0; MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz)); if (data_sz <= PTL_LARGE_THRESHOLD) { if (dt_contig) { /* small contig message */ void *start = (char *)rreq->dev.user_buf + dt_true_lb; MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "Small contig message"); if (start == NULL) me.start = &dummy; else me.start = start; me.length = data_sz; REQ_PTL(rreq)->event_handler = handler_recv_dequeue_complete; } else { /* small noncontig */ MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "Small noncontig message"); rreq->dev.segment_ptr = MPIDU_Segment_alloc(); MPIR_ERR_CHKANDJUMP1(rreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIDU_Segment_alloc"); MPIDU_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, rreq->dev.segment_ptr, 0); rreq->dev.segment_first = 0; rreq->dev.segment_size = data_sz; last = rreq->dev.segment_size; rreq->dev.iov_count = MPL_IOV_LIMIT; MPIDU_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov, &rreq->dev.iov_count); if (last == rreq->dev.segment_size) { /* entire message fits in IOV */ MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, " entire message fits in IOV"); me.start = rreq->dev.iov; me.length = rreq->dev.iov_count; me.options |= PTL_IOVEC; REQ_PTL(rreq)->event_handler = handler_recv_dequeue_complete; } else { /* IOV is not long enough to describe entire message: recv into buffer and unpack later */ MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, " IOV too long: using bounce buffer"); MPIR_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer"); me.start = REQ_PTL(rreq)->chunk_buffer[0]; me.length = data_sz; REQ_PTL(rreq)->event_handler = handler_recv_dequeue_unpack_complete; } } } else { /* Large message: Create an ME for the first chunk of data, then do a GET for the rest */ if (dt_contig) {
/* MPIDI_CH3_EagerSyncNoncontigSend - Eagerly send noncontiguous data in synchronous mode. Some implementations may choose to use Rendezvous sends (see ch3u_rndv.c) for all Synchronous sends (MPI_Issend and MPI_Ssend). An eager synchronous send eliminates one of the handshake messages, but most application codes should not be using synchronous sends in performance-critical operations. */ int MPIDI_CH3_EagerSyncNoncontigSend( MPIR_Request **sreq_p, const void * buf, int count, MPI_Datatype datatype, intptr_t data_sz, int dt_contig, MPI_Aint dt_true_lb, int rank, int tag, MPIR_Comm * comm, int context_offset ) { int mpi_errno = MPI_SUCCESS; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_send_t * const es_pkt = &upkt.eager_sync_send; MPIDI_VC_t * vc; MPIR_Request *sreq = *sreq_p; /* MT FIXME what are the two operations we are waiting for? the send and * the sync response? */ MPIR_cc_set(&sreq->cc, 2); sreq->dev.OnDataAvail = 0; sreq->dev.OnFinal = 0; MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND); es_pkt->match.parts.rank = comm->rank; es_pkt->match.parts.tag = tag; es_pkt->match.parts.context_id = comm->context_id + context_offset; es_pkt->sender_req_id = sreq->handle; es_pkt->data_sz = data_sz; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(es_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPL_DBG_MSGPKT(vc,tag,es_pkt->match.parts.context_id,rank,data_sz,"EagerSync"); if (dt_contig) { MPL_IOV iov[2]; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "sending contiguous sync eager message, data_sz=%" PRIdPTR, data_sz)); iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)es_pkt; iov[0].MPL_IOV_LEN = sizeof(*es_pkt); iov[1].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) ((char *)buf + dt_true_lb); iov[1].MPL_IOV_LEN = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { /* Make sure to destroy the request before setting the pointer to * NULL, otherwise we lose the handle on the request */ MPIR_Request_free(sreq); *sreq_p = NULL; MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ } else { MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE, "sending non-contiguous sync eager message, data_sz=%" PRIdPTR, data_sz); sreq->dev.segment_ptr = MPIDU_Segment_alloc( ); MPIR_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIDU_Segment_alloc"); MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = vc->sendNoncontig_fn(vc, sreq, es_pkt, sizeof(MPIDI_CH3_Pkt_eager_sync_send_t)); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: return mpi_errno; fn_fail: *sreq_p = NULL; goto fn_exit; }
int MPIDI_CH3_PktHandler_RndvClrToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &pkt->rndv_clr_to_send; MPIR_Request * sreq; MPIR_Request * rts_sreq; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_send_t * rs_pkt = &upkt.rndv_send; int dt_contig; MPI_Aint dt_true_lb; intptr_t data_sz; MPIDU_Datatype* dt_ptr; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"received rndv CTS pkt"); MPIR_Request_get_ptr(cts_pkt->sender_req_id, sreq); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,TERSE,(MPL_DBG_FDEST,"received cts, count=" MPI_AINT_FMT_DEC_SPEC "\n", sreq->dev.user_count)); sreq->dev.OnDataAvail = 0; sreq->dev.OnFinal = 0; /* Release the RTS request if one exists. MPID_Request_fetch_and_clear_rts_sreq() needs to be atomic to prevent cancel send from cancelling the wrong (future) request. If MPID_Request_fetch_and_clear_rts_sreq() returns a NULL rts_sreq, then MPID_Cancel_send() is responsible for releasing the RTS request object. */ MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq); if (rts_sreq != NULL) { MPIR_Request_free(rts_sreq); } *buflen = sizeof(MPIDI_CH3_Pkt_t); MPIDI_Pkt_init(rs_pkt, MPIDI_CH3_PKT_RNDV_SEND); rs_pkt->receiver_req_id = cts_pkt->receiver_req_id; MPIDI_Datatype_get_info(sreq->dev.user_count, sreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (dt_contig) { MPL_IOV iov[MPL_IOV_LIMIT]; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "sending contiguous rndv data, data_sz=%" PRIdPTR, data_sz)); iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)rs_pkt; iov[0].MPL_IOV_LEN = sizeof(*rs_pkt); iov[1].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)((char *)sreq->dev.user_buf + dt_true_lb); iov[1].MPL_IOV_LEN = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|senddata"); } else { sreq->dev.segment_ptr = MPIDU_Segment_alloc( ); MPIR_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIDU_Segment_alloc"); MPIDU_Segment_init(sreq->dev.user_buf, sreq->dev.user_count, sreq->dev.datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = vc->sendNoncontig_fn(vc, sreq, rs_pkt, sizeof(*rs_pkt)); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|senddata"); } *rreqp = NULL; fn_fail: return mpi_errno; }