int found; MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=%" PRIdPTR, rts_pkt->sender_req_id, rts_pkt->match.parts.rank, rts_pkt->match.parts.tag, rts_pkt->match.parts.context_id, rts_pkt->data_sz)); MPL_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id, rts_pkt->match.parts.rank,rts_pkt->data_sz, "ReceivedRndv"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); *buflen = 0;
int MPIDI_Isend_self(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, int type, MPID_Request ** request) { MPIDI_Message_match match; MPID_Request * sreq; MPID_Request * rreq; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int found; int mpi_errno = MPI_SUCCESS; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending message to self"); MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, type); MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_SELF_MSG); match.parts.rank = rank; match.parts.tag = tag; match.parts.context_id = comm->context_id + context_offset; MPIU_THREAD_CS_ENTER(MSGQUEUE,); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&match, &found); /* --BEGIN ERROR HANDLING-- */ if (rreq == NULL) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); sreq = NULL; MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); goto fn_exit; } /* --END ERROR HANDLING-- */ MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_seqnum(rreq, seqnum); rreq->status.MPI_SOURCE = rank; rreq->status.MPI_TAG = tag; if (found) { MPIDI_msg_sz_t data_sz; /* we found a posted req, which we now own, so we can release the CS */ MPIU_THREAD_CS_EXIT(MSGQUEUE,); MPIU_DBG_MSG(CH3_OTHER,VERBOSE, "found posted receive request; copying data"); MPIDI_CH3U_Buffer_copy(buf, count, datatype, &sreq->status.MPI_ERROR, rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &data_sz, &rreq->status.MPI_ERROR); rreq->status.count = (int)data_sz; MPID_REQUEST_SET_COMPLETED(rreq); MPID_Request_release(rreq); /* sreq has never been seen by the user or outside this thread, so it is safe to reset ref_count and cc */ MPIU_Object_set_ref(sreq, 1); MPID_cc_set(&sreq->cc, 0); } else { if (type != MPIDI_REQUEST_TYPE_RSEND)
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp ) { MPID_Request * rreq; int found; MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send; int mpi_errno = MPI_SUCCESS; MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=" MPIDI_MSG_SZ_FMT, rts_pkt->sender_req_id, rts_pkt->match.parts.rank, rts_pkt->match.parts.tag, rts_pkt->match.parts.context_id, rts_pkt->data_sz)); MPIU_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id, rts_pkt->match.parts.rank,rts_pkt->data_sz, "ReceivedRndv"); MPIU_THREAD_CS_ENTER(MSGQUEUE,); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found); MPIU_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG); MPIU_THREAD_CS_EXIT(MSGQUEUE,); *buflen = sizeof(MPIDI_CH3_Pkt_t); if (found) { MPID_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"posted request found"); /* FIXME: What if the receive user buffer is not big enough to hold the data about to be cleared for sending? */ MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending rndv CTS packet"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { MPID_Request_release(cts_req); } } else { MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated"); /* * A MPID_Probe() may be waiting for the request we just * inserted, so we need to tell the progress engine to exit. * * FIXME: This will cause MPID_Progress_wait() to return to the * MPI layer each time an unexpected RTS packet is * received. MPID_Probe() should atomically increment a * counter and MPIDI_CH3_Progress_signal_completion() * should only be called if that counter is greater than zero. */ MPIDI_CH3_Progress_signal_completion(); } *rreqp = NULL; fn_fail: return mpi_errno; }
int MPIDI_CH3_PktHandler_EagerSyncSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIDI_CH3_Pkt_eager_send_t * es_pkt = &pkt->eager_send; MPIR_Request * rreq; int found; int complete; char *data_buf; intptr_t data_len; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received eager sync send pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", es_pkt->sender_req_id, es_pkt->match.parts.rank, es_pkt->match.parts.tag, es_pkt->match.parts.context_id)); MPL_DBG_MSGPKT(vc,es_pkt->match.parts.tag,es_pkt->match.parts.context_id, es_pkt->match.parts.rank,es_pkt->data_sz, "ReceivedEagerSync"); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&es_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, es_pkt, MPIDI_REQUEST_EAGER_MSG); data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= rreq->dev.recv_data_sz) ? rreq->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t)); data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t); if (found) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack; MPIR_Request * esa_req; if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_found( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack"); MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK); esa_pkt->sender_req_id = rreq->dev.sender_req_id; /* Because this is a packet handler, it is already within a CH3 CS */ /* MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); */ mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req); /* MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|syncack"); } if (esa_req != NULL) { MPIR_Request_free(esa_req); } } else { if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_unexpected( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPIDI_Request_set_sync_send_flag(rreq, TRUE); } fn_fail: return mpi_errno; }
int MPIDI_CH3_PktHandler_EagerShortSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp ) { MPIDI_CH3_Pkt_eagershort_send_t * eagershort_pkt = &pkt->eagershort_send; MPID_Request * rreq; int found; int mpi_errno = MPI_SUCCESS; MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX); /* printf( "Receiving short eager!\n" ); fflush(stdout); */ MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received eagershort send pkt, rank=%d, tag=%d, context=%d", eagershort_pkt->match.parts.rank, eagershort_pkt->match.parts.tag, eagershort_pkt->match.parts.context_id)); MPIU_DBG_MSGPKT(vc,eagershort_pkt->match.parts.tag, eagershort_pkt->match.parts.context_id, eagershort_pkt->match.parts.rank,eagershort_pkt->data_sz, "ReceivedEagerShort"); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&eagershort_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPID_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } (rreq)->status.MPI_SOURCE = (eagershort_pkt)->match.parts.rank; (rreq)->status.MPI_TAG = (eagershort_pkt)->match.parts.tag; MPIR_STATUS_SET_COUNT((rreq)->status, (eagershort_pkt)->data_sz); (rreq)->dev.recv_data_sz = (eagershort_pkt)->data_sz; MPIDI_Request_set_seqnum((rreq), (eagershort_pkt)->seqnum); /* FIXME: Why do we set the message type? */ MPIDI_Request_set_msg_type((rreq), MPIDI_REQUEST_EAGER_MSG); /* This packed completes the reception of the indicated data. The packet handler returns null for a request that requires no further communication */ *rreqp = NULL; *buflen = sizeof(MPIDI_CH3_Pkt_t); /* Extract the data from the packet */ /* Note that if the data size if zero, we're already done */ if (rreq->dev.recv_data_sz > 0) { if (found) { int dt_contig; MPI_Aint dt_true_lb; MPIDI_msg_sz_t userbuf_sz; MPID_Datatype *dt_ptr; MPIDI_msg_sz_t data_sz; /* Make sure that we handle the general (non-contiguous) datatypes correctly while optimizing for the special case */ MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); if (rreq->dev.recv_data_sz <= userbuf_sz) { data_sz = rreq->dev.recv_data_sz; } else { MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "receive buffer too small; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", userbuf_sz=" MPIDI_MSG_SZ_FMT, rreq->dev.recv_data_sz, userbuf_sz)); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d %d %d", rreq->status.MPI_SOURCE, rreq->status.MPI_TAG, rreq->dev.recv_data_sz, userbuf_sz ); MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz); data_sz = userbuf_sz; } if (dt_contig && data_sz == rreq->dev.recv_data_sz) { /* user buffer is contiguous and large enough to store the entire message. We can just copy the code */ /* Copy the payload. We could optimize this if data_sz & 0x3 == 0 (copy (data_sz >> 2) ints, inline that since data size is currently limited to 4 ints */ { unsigned char const * restrict p = (unsigned char *)eagershort_pkt->data; unsigned char * restrict bufp = (unsigned char *)(char*)(rreq->dev.user_buf) + dt_true_lb; int i; for (i=0; i<data_sz; i++) { *bufp++ = *p++; } } /* FIXME: We want to set the OnDataAvail to the appropriate function, which depends on whether this is an RMA request or a pt-to-pt request. */ rreq->dev.OnDataAvail = 0; /* The recv_pending_count must be one here (!) because of the way the pending count is queried. We may want to fix this, but it will require a sweep of the code */ } else {
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIR_Request * rreq; int found; MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=%" PRIdPTR, rts_pkt->sender_req_id, rts_pkt->match.parts.rank, rts_pkt->match.parts.tag, rts_pkt->match.parts.context_id, rts_pkt->data_sz)); MPL_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id, rts_pkt->match.parts.rank,rts_pkt->data_sz, "ReceivedRndv"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); *buflen = sizeof(MPIDI_CH3_Pkt_t); if (found) { MPIR_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"posted request found"); /* FIXME: What if the receive user buffer is not big enough to hold the data about to be cleared for sending? */ MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending rndv CTS packet"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { MPIR_Request_free(cts_req); } } else { MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"unexpected request allocated"); /* * A MPID_Probe() may be waiting for the request we just * inserted, so we need to tell the progress engine to exit. * * FIXME: This will cause MPID_Progress_wait() to return to the * MPI layer each time an unexpected RTS packet is * received. MPID_Probe() should atomically increment a * counter and MPIDI_CH3_Progress_signal_completion() * should only be called if that counter is greater than zero. */ MPIDI_CH3_Progress_signal_completion(); } *rreqp = NULL; fn_fail: return mpi_errno; }