int MPIDI_CH3_PktHandler_EagerSyncSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIDI_CH3_Pkt_eager_send_t * es_pkt = &pkt->eager_send; MPIR_Request * rreq; int found; int complete; char *data_buf; intptr_t data_len; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received eager sync send pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", es_pkt->sender_req_id, es_pkt->match.parts.rank, es_pkt->match.parts.tag, es_pkt->match.parts.context_id)); MPL_DBG_MSGPKT(vc,es_pkt->match.parts.tag,es_pkt->match.parts.context_id, es_pkt->match.parts.rank,es_pkt->data_sz, "ReceivedEagerSync"); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&es_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, es_pkt, MPIDI_REQUEST_EAGER_MSG); data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= rreq->dev.recv_data_sz) ? rreq->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t)); data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t); if (found) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack; MPIR_Request * esa_req; if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_found( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack"); MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK); esa_pkt->sender_req_id = rreq->dev.sender_req_id; /* Because this is a packet handler, it is already within a CH3 CS */ /* MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); */ mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req); /* MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|syncack"); } if (esa_req != NULL) { MPIR_Request_free(esa_req); } } else { if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_unexpected( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPIDI_Request_set_sync_send_flag(rreq, TRUE); } fn_fail: return mpi_errno; }
int MPID_Send(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, MPIR_Comm * comm, int context_offset, MPIR_Request ** request) { intptr_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPIR_Datatype* dt_ptr; MPIR_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_SEND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_SEND); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPIR_COMM_KIND__INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { if (sreq != NULL && MPIR_cc_get(sreq->cc) != 0) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|selfsenddeadlock"); } } # endif if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIR_ERR_CHKANDJUMP1(vc->state == MPIDI_VC_STATE_MORIBUND, mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", rank); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->send) { mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = MPI_REQUEST_NULL; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ if (sreq != NULL) { MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); /* sreq->comm = comm; MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */ } goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); /* FIXME: flow control: limit number of outstanding eager messages containing data and need to be buffered by the receiver */ #ifdef USE_EAGER_SHORT if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) { mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, MPIDI_CH3_PKT_EAGERSHORT_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else #endif if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); } } else {
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIR_Request * rreq; int found; MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=%" PRIdPTR, rts_pkt->sender_req_id, rts_pkt->match.parts.rank, rts_pkt->match.parts.tag, rts_pkt->match.parts.context_id, rts_pkt->data_sz)); MPL_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id, rts_pkt->match.parts.rank,rts_pkt->data_sz, "ReceivedRndv"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); *buflen = sizeof(MPIDI_CH3_Pkt_t); if (found) { MPIR_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found"); /* FIXME: What if the receive user buffer is not big enough to hold the data about to be cleared for sending? */ MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending rndv CTS packet"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { MPIR_Request_free(cts_req); } } else { MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"unexpected request allocated"); /* * A MPID_Probe() may be waiting for the request we just * inserted, so we need to tell the progress engine to exit. * * FIXME: This will cause MPID_Progress_wait() to return to the * MPI layer each time an unexpected RTS packet is * received. MPID_Probe() should atomically increment a * counter and MPIDI_CH3_Progress_signal_completion() * should only be called if that counter is greater than zero. */ MPIDI_CH3_Progress_signal_completion(); } *rreqp = NULL; fn_fail: return mpi_errno; }