/* Send a zero-sized message with eager synchronous. This is a temporary routine, as we may want to replace this with a counterpart to the Eager Short message */ int MPIDI_CH3_EagerSyncZero(MPIR_Request **sreq_p, int rank, int tag, MPIR_Comm * comm, int context_offset ) { int mpi_errno = MPI_SUCCESS; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_send_t * const es_pkt = &upkt.eager_sync_send; MPIDI_VC_t * vc; MPIR_Request *sreq = *sreq_p; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending zero length message"); /* MT FIXME what are the two operations we are waiting for? the send and * the sync response? */ MPIR_cc_set(&sreq->cc, 2); MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG); sreq->dev.OnDataAvail = 0; MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND); es_pkt->match.parts.rank = comm->rank; es_pkt->match.parts.tag = tag; es_pkt->match.parts.context_id = comm->context_id + context_offset; es_pkt->sender_req_id = sreq->handle; es_pkt->data_sz = 0; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(es_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPL_DBG_MSGPKT(vc,tag,es_pkt->match.parts.context_id,rank,(intptr_t)0,"EagerSync0"); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iSend(vc, sreq, es_pkt, sizeof(*es_pkt)); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_Request_free(sreq); *sreq_p = NULL; MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
/* MPIDI_CH3_RndvSend - Send a request to perform a rendezvous send */ int MPIDI_CH3_RndvSend( MPIR_Request **sreq_p, const void * buf, MPI_Aint count, MPI_Datatype datatype, int dt_contig, intptr_t data_sz, MPI_Aint dt_true_lb, int rank, int tag, MPIR_Comm * comm, int context_offset ) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_req_to_send_t * const rts_pkt = &upkt.rndv_req_to_send; MPIDI_VC_t * vc; MPIR_Request * rts_sreq; MPIR_Request *sreq =*sreq_p; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE, "sending rndv RTS, data_sz=%" PRIdPTR, data_sz); sreq->dev.OnDataAvail = 0; sreq->dev.partner_request = NULL; MPIDI_Pkt_init(rts_pkt, MPIDI_CH3_PKT_RNDV_REQ_TO_SEND); rts_pkt->match.parts.rank = comm->rank; rts_pkt->match.parts.tag = tag; rts_pkt->match.parts.context_id = comm->context_id + context_offset; rts_pkt->sender_req_id = sreq->handle; rts_pkt->data_sz = data_sz; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(rts_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPL_DBG_MSGPKT(vc,tag,rts_pkt->match.parts.context_id,rank,data_sz,"Rndv"); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, rts_pkt, sizeof(*rts_pkt), &rts_sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_Request_free(sreq); *sreq_p = NULL; MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt"); } /* --END ERROR HANDLING-- */ if (rts_sreq != NULL) { if (rts_sreq->status.MPI_ERROR != MPI_SUCCESS) { MPIR_Request_free(sreq); *sreq_p = NULL; mpi_errno = rts_sreq->status.MPI_ERROR; MPIR_Request_free(rts_sreq); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt"); } MPIR_Request_free(rts_sreq); } /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
/* MPIDI_CH3_EagerSyncNoncontigSend - Eagerly send noncontiguous data in synchronous mode. Some implementations may choose to use Rendezvous sends (see ch3u_rndv.c) for all Synchronous sends (MPI_Issend and MPI_Ssend). An eager synchronous send eliminates one of the handshake messages, but most application codes should not be using synchronous sends in performance-critical operations. */ int MPIDI_CH3_EagerSyncNoncontigSend( MPIR_Request **sreq_p, const void * buf, int count, MPI_Datatype datatype, intptr_t data_sz, int dt_contig, MPI_Aint dt_true_lb, int rank, int tag, MPIR_Comm * comm, int context_offset ) { int mpi_errno = MPI_SUCCESS; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_send_t * const es_pkt = &upkt.eager_sync_send; MPIDI_VC_t * vc; MPIR_Request *sreq = *sreq_p; /* MT FIXME what are the two operations we are waiting for? the send and * the sync response? */ MPIR_cc_set(&sreq->cc, 2); sreq->dev.OnDataAvail = 0; sreq->dev.OnFinal = 0; MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND); es_pkt->match.parts.rank = comm->rank; es_pkt->match.parts.tag = tag; es_pkt->match.parts.context_id = comm->context_id + context_offset; es_pkt->sender_req_id = sreq->handle; es_pkt->data_sz = data_sz; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(es_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPL_DBG_MSGPKT(vc,tag,es_pkt->match.parts.context_id,rank,data_sz,"EagerSync"); if (dt_contig) { MPL_IOV iov[2]; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "sending contiguous sync eager message, data_sz=%" PRIdPTR, data_sz)); iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)es_pkt; iov[0].MPL_IOV_LEN = sizeof(*es_pkt); iov[1].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) ((char *)buf + dt_true_lb); iov[1].MPL_IOV_LEN = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { /* Make sure to destroy the request before setting the pointer to * NULL, otherwise we lose the handle on the request */ MPIR_Request_free(sreq); *sreq_p = NULL; MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ } else { MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE, "sending non-contiguous sync eager message, data_sz=%" PRIdPTR, data_sz); sreq->dev.segment_ptr = MPIDU_Segment_alloc( ); MPIR_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIDU_Segment_alloc"); MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = vc->sendNoncontig_fn(vc, sreq, es_pkt, sizeof(MPIDI_CH3_Pkt_eager_sync_send_t)); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: return mpi_errno; fn_fail: *sreq_p = NULL; goto fn_exit; }
int MPIDI_CH3_PktHandler_EagerSyncSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIDI_CH3_Pkt_eager_send_t * es_pkt = &pkt->eager_send; MPIR_Request * rreq; int found; int complete; char *data_buf; intptr_t data_len; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received eager sync send pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", es_pkt->sender_req_id, es_pkt->match.parts.rank, es_pkt->match.parts.tag, es_pkt->match.parts.context_id)); MPL_DBG_MSGPKT(vc,es_pkt->match.parts.tag,es_pkt->match.parts.context_id, es_pkt->match.parts.rank,es_pkt->data_sz, "ReceivedEagerSync"); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&es_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, es_pkt, MPIDI_REQUEST_EAGER_MSG); data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= rreq->dev.recv_data_sz) ? rreq->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t)); data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t); if (found) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack; MPIR_Request * esa_req; if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_found( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack"); MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK); esa_pkt->sender_req_id = rreq->dev.sender_req_id; /* Because this is a packet handler, it is already within a CH3 CS */ /* MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); */ mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req); /* MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|syncack"); } if (esa_req != NULL) { MPIR_Request_free(esa_req); } } else { if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_unexpected( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPIDI_Request_set_sync_send_flag(rreq, TRUE); } fn_fail: return mpi_errno; }
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIR_Request * rreq; int found; MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=%" PRIdPTR, rts_pkt->sender_req_id, rts_pkt->match.parts.rank, rts_pkt->match.parts.tag, rts_pkt->match.parts.context_id, rts_pkt->data_sz)); MPL_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id, rts_pkt->match.parts.rank,rts_pkt->data_sz, "ReceivedRndv"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); *buflen = sizeof(MPIDI_CH3_Pkt_t); if (found) { MPIR_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found"); /* FIXME: What if the receive user buffer is not big enough to hold the data about to be cleared for sending? */ MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending rndv CTS packet"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { MPIR_Request_free(cts_req); } } else { MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"unexpected request allocated"); /* * A MPID_Probe() may be waiting for the request we just * inserted, so we need to tell the progress engine to exit. * * FIXME: This will cause MPID_Progress_wait() to return to the * MPI layer each time an unexpected RTS packet is * received. MPID_Probe() should atomically increment a * counter and MPIDI_CH3_Progress_signal_completion() * should only be called if that counter is greater than zero. */ MPIDI_CH3_Progress_signal_completion(); } *rreqp = NULL; fn_fail: return mpi_errno; }