int MPIDI_CH3_RecvRndv( MPIDI_VC_t * vc, MPID_Request *rreq ) { int mpi_errno = MPI_SUCCESS; /* A rendezvous request-to-send (RTS) message has arrived. We need to send a CTS message to the remote process. */ MPID_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE, "rndv RTS in the request, sending rndv CTS"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rreq->dev.sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { /* FIXME: Ideally we could specify that a req not be returned. This would avoid our having to decrement the reference count on a req we don't want/need. */ MPID_Request_release(cts_req); } fn_fail: return mpi_errno; }
int MPIDI_nem_ckpt_start(void) { int mpi_errno = MPI_SUCCESS; int i; MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_START); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_START); if (checkpointing) goto fn_exit; checkpointing = TRUE; marker_count = MPIDI_Process.my_pg->size - 1; /* We won't receive a marker from ourselves. */ ++current_wave; /* send markers to all other processes */ /* FIXME: we're only handling processes in our pg, so no dynamic connections */ for (i = 0; i < MPIDI_Process.my_pg->size; ++i) { MPID_Request *req; MPIDI_VC_t *vc; MPIDI_CH3I_VC *vc_ch; MPID_PKT_DECL_CAST(upkt, MPID_nem_pkt_ckpt_marker_t, ckpt_pkt); /* Don't send a marker to ourselves. */ if (i == MPIDI_Process.my_pg_rank) continue; MPIDI_PG_Get_vc_set_active(MPIDI_Process.my_pg, i, &vc); vc_ch = &vc->ch; MPIDI_Pkt_init(ckpt_pkt, MPIDI_NEM_PKT_CKPT_MARKER); ckpt_pkt->wave = current_wave; mpi_errno = MPIDI_CH3_iStartMsg(vc, ckpt_pkt, sizeof(ckpt_pkt), &req); MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ckptpkt"); if (req != NULL) { MPIU_ERR_CHKANDJUMP(req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**ckptpkt"); MPID_Request_release(req); } if (!vc_ch->is_local) { mpi_errno = vc_ch->ckpt_pause_send_vc(vc); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_START); return mpi_errno; fn_fail: goto fn_exit; }
/*@ MPIDI_CH3U_VC_SendClose - Initiate a close on a virtual connection Input Parameters: + vc - Virtual connection to close - i - rank of virtual connection within a process group (used for debugging) Notes: The current state of this connection must be either 'MPIDI_VC_STATE_ACTIVE' or 'MPIDI_VC_STATE_REMOTE_CLOSE'. @*/ int MPIDI_CH3U_VC_SendClose( MPIDI_VC_t *vc, int rank ) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_close_t * close_pkt = &upkt.close; MPIR_Request * sreq; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); MPIR_Assert( vc->state == MPIDI_VC_STATE_ACTIVE || vc->state == MPIDI_VC_STATE_REMOTE_CLOSE ); MPIDI_Pkt_init(close_pkt, MPIDI_CH3_PKT_CLOSE); close_pkt->ack = (vc->state == MPIDI_VC_STATE_ACTIVE) ? FALSE : TRUE; /* MT: this is not thread safe, the POBJ CS is scoped to the vc and * doesn't protect this global correctly */ MPIDI_Outstanding_close_ops += 1; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,(MPL_DBG_FDEST, "sending close(%s) on vc (pg=%p) %p to rank %d, ops = %d", close_pkt->ack ? "TRUE" : "FALSE", vc->pg, vc, rank, MPIDI_Outstanding_close_ops)); /* * A close packet acknowledging this close request could be * received during iStartMsg, therefore the state must * be changed before the close packet is sent. */ if (vc->state == MPIDI_VC_STATE_ACTIVE) { MPIDI_CHANGE_VC_STATE(vc, LOCAL_CLOSE); } else { MPIR_Assert( vc->state == MPIDI_VC_STATE_REMOTE_CLOSE ); MPIDI_CHANGE_VC_STATE(vc, CLOSE_ACKED); } mpi_errno = MPIDI_CH3_iStartMsg(vc, close_pkt, sizeof(*close_pkt), &sreq); MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|send_close_ack"); if (sreq != NULL) { /* There is still another reference being held by the channel. It will not be released until the pkt is actually sent. */ MPIR_Request_free(sreq); } fn_exit: MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_iStartRndvTransfer(MPIDI_VC_t * vc, MPID_Request * rreq) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t *cts_pkt = &upkt.rndv_clr_to_send; MPID_Request *cts_req; MPID_Seqnum_t seqnum; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_ISTARTRNDVTRANSFER); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_ISTARTRNDVTRANSFER); #ifdef CKPT MPIDI_CH3I_CR_lock(); #endif MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); if (rreq->dev.iov_count == 1 && rreq->dev.OnDataAvail == NULL) cts_pkt->recv_sz = rreq->dev.iov[0].MPID_IOV_LEN; else cts_pkt->recv_sz = rreq->dev.segment_size; cts_pkt->sender_req_id = rreq->dev.sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(cts_pkt, seqnum); mpi_errno = MPIDI_CH3_Prepare_rndv_cts(vc, cts_pkt, rreq); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|ctspkt", 0); goto fn_exit; } mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|ctspkt", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ if (cts_req != NULL) { MPID_Request_release(cts_req); } fn_exit: #ifdef CKPT MPIDI_CH3I_CR_unlock(); #endif MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_ISTARTRNDVTRANSFER); return mpi_errno; }
/* * These routines are called when a receive matches an eager sync send */ int MPIDI_CH3_EagerSyncAck( MPIDI_VC_t *vc, MPIR_Request *rreq ) { int mpi_errno = MPI_SUCCESS; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack; MPIR_Request * esa_req; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack"); MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK); esa_pkt->sender_req_id = rreq->dev.sender_req_id; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } if (esa_req != NULL) { MPIR_Request_free(esa_req); } fn_fail: return mpi_errno; }
int MPID_Send(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; #if defined(FINEGRAIN_MPI) int destpid=-1, destworldrank=-1; #endif MPIDI_STATE_DECL(MPID_STATE_MPID_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } #if defined(FINEGRAIN_MPI) MPIDI_Comm_get_pid_worldrank(comm, rank, &destpid, &destworldrank); if (COMPARE_RANKS(rank,comm,destpid) && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(&buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); if (rank == comm->rank) { printf("my_fgrank=%d: %s, self send DEADLOCK\n", my_fgrank, __FUNCTION__); if (sreq != NULL && sreq->cc != 0) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|selfsenddeadlock"); } } #else if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { if (sreq != NULL && MPID_cc_get(sreq->cc) != 0) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|selfsenddeadlock"); } } # endif #endif if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } #if defined(FINEGRAIN_MPI) MPIDI_Comm_get_vc_set_active_direct(comm, destpid, &vc); #else MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #endif MPIR_ERR_CHKANDJUMP1(vc->state == MPIDI_VC_STATE_MORIBUND, mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", rank); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->send) { mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); #if defined(FINEGRAIN_MPI) eager_pkt->match.parts.dest_rank = destworldrank; #endif eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = MPI_REQUEST_NULL; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ if (sreq != NULL) { MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); /* sreq->comm = comm; MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */ } goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); /* FIXME: flow control: limit number of outstanding eager messages containing data and need to be buffered by the receiver */ #ifdef USE_EAGER_SHORT if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) { mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, MPIDI_CH3_PKT_EAGERSHORT_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else #endif if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); } } else {
static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t * vc, MPID_Request * sreq) { int nb; int complete = 0; int seqnum; int mpi_errno; MPIDI_CH3_Pkt_rndv_r3_data_t pkt_head; MPID_Request * send_req; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH); MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_RNDV_R3_DATA); pkt_head.receiver_req_id = sreq->mrail.partner_id; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(&pkt_head, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); #if defined(_SMP_LIMIC_) /* Use limic2 for contiguous data * Use shared memory for non-contiguous data */ if (!g_smp_use_limic2 || sreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_SendReloadIOV || sreq->dev.iov_count > 1) { pkt_head.send_req_id = NULL; } else { pkt_head.send_req_id = sreq; } #endif mpi_errno = MPIDI_CH3_iStartMsg(vc, &pkt_head, sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t), &send_req); if (mpi_errno != MPI_SUCCESS) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); sreq = NULL; mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|rtspkt", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ if (send_req != NULL) { DEBUG_PRINT("r3 packet not sent \n"); MPID_Request_release(send_req); } #if defined(_SMP_LIMIC_) if (pkt_head.send_req_id) { sreq->mrail.nearly_complete = 1; return MPI_SUCCESS; } #endif vc->smp.send_current_pkt_type = SMP_RNDV_MSG; DEBUG_PRINT("r3 sent req is %p\n", sreq); if (MPIDI_CH3I_SMP_SendQ_empty(vc)) { for (;;) { DEBUG_PRINT("iov count (sreq): %d, offset %d, len[1] %d\n", sreq->dev.iov_count, sreq->dev.iov_offset, sreq->dev.iov[0].MPID_IOV_LEN); if (vc->smp.send_current_pkt_type == SMP_RNDV_MSG) { mpi_errno = MPIDI_CH3I_SMP_writev_rndv_data(vc, &sreq->dev.iov[sreq->dev.iov_offset], sreq->dev.iov_count - sreq->dev.iov_offset, &nb); } else { MPIU_Assert(vc->smp.send_current_pkt_type == SMP_RNDV_MSG_CONT); MPIDI_CH3I_SMP_writev_rndv_data_cont(vc, &sreq->dev.iov[sreq->dev.iov_offset], sreq->dev.iov_count - sreq->dev.iov_offset, &nb); } if (MPI_SUCCESS != mpi_errno) { vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED; sreq->status.MPI_ERROR = MPI_ERR_INTERN; MPIDI_CH3U_Request_complete(sreq); return mpi_errno; } if (nb >= 0) { if (MPIDI_CH3I_Request_adjust_iov(sreq, nb)) { MPIDI_CH3U_Handle_send_req(vc, sreq, &complete); if (complete) { sreq->mrail.nearly_complete = 1; break; } else { vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT; } } else { sreq->ch.reqtype = REQUEST_RNDV_R3_DATA; MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq); vc->smp.send_active = sreq; sreq->mrail.nearly_complete = 1; vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT; break; } } else { MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq); vc->smp.send_active = sreq; sreq->mrail.nearly_complete = 1; break; } } } else { sreq->ch.reqtype = REQUEST_RNDV_R3_DATA; MPIDI_CH3I_SMP_SendQ_enqueue(vc, sreq); sreq->mrail.nearly_complete = 1; vc->smp.send_current_pkt_type = SMP_RNDV_MSG; DEBUG_PRINT("Enqueue sreq %p", sreq); } MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH); return MPI_SUCCESS; }
/* MPIDI_CH3_RndvSend - Send a request to perform a rendezvous send */ int MPIDI_CH3_RndvSend( MPIR_Request **sreq_p, const void * buf, MPI_Aint count, MPI_Datatype datatype, int dt_contig, intptr_t data_sz, MPI_Aint dt_true_lb, int rank, int tag, MPIR_Comm * comm, int context_offset ) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_req_to_send_t * const rts_pkt = &upkt.rndv_req_to_send; MPIDI_VC_t * vc; MPIR_Request * rts_sreq; MPIR_Request *sreq =*sreq_p; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE, "sending rndv RTS, data_sz=%" PRIdPTR, data_sz); sreq->dev.OnDataAvail = 0; sreq->dev.partner_request = NULL; MPIDI_Pkt_init(rts_pkt, MPIDI_CH3_PKT_RNDV_REQ_TO_SEND); rts_pkt->match.parts.rank = comm->rank; rts_pkt->match.parts.tag = tag; rts_pkt->match.parts.context_id = comm->context_id + context_offset; rts_pkt->sender_req_id = sreq->handle; rts_pkt->data_sz = data_sz; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(rts_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPL_DBG_MSGPKT(vc,tag,rts_pkt->match.parts.context_id,rank,data_sz,"Rndv"); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, rts_pkt, sizeof(*rts_pkt), &rts_sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_Request_free(sreq); *sreq_p = NULL; MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt"); } /* --END ERROR HANDLING-- */ if (rts_sreq != NULL) { if (rts_sreq->status.MPI_ERROR != MPI_SUCCESS) { MPIR_Request_free(sreq); *sreq_p = NULL; mpi_errno = rts_sreq->status.MPI_ERROR; MPIR_Request_free(rts_sreq); MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt"); } MPIR_Request_free(rts_sreq); } /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_Send(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { if (sreq != NULL && sreq->cc != 0) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|selfsenddeadlock"); } } # endif if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); } goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->send) { mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = MPI_REQUEST_NULL; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq); MPIU_THREAD_CS_EXIT(CH3COMM,vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ if (sreq != NULL) { MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); /* sreq->comm = comm; MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */ } goto fn_exit; } /* FIXME: flow control: limit number of outstanding eager messsages containing data and need to be buffered by the receiver */ #ifdef USE_EAGER_SHORT if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) { mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, MPIDI_CH3_PKT_EAGERSHORT_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else #endif if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= vc->eager_max_msg_sz) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); } } else {
int MPIDI_CH3_Connect_to_root(const char* port_name, MPIDI_VC_t** new_vc) { int mpi_errno = MPI_SUCCESS; int str_errno; char ifname[MAX_HOST_DESCRIPTION_LEN]; MPIDI_VC_t *vc; MPIDI_CH3_Pkt_cm_establish_t pkt; MPID_Request * sreq; int seqnum; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT); *new_vc = NULL; if (!MPIDI_CH3I_Process.has_dpm) return MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**notimpl", 0); str_errno = MPIU_Str_get_string_arg(port_name, MPIDI_CH3I_HOST_DESCRIPTION_KEY, ifname, MAX_HOST_DESCRIPTION_LEN); if (str_errno != MPIU_STR_SUCCESS) { /* --BEGIN ERROR HANDLING */ if (str_errno == MPIU_STR_FAIL) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**argstr_missinghost"); } else { /* MPIU_STR_TRUNCATED or MPIU_STR_NONEM */ MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_hostd"); } /* --END ERROR HANDLING-- */ } vc = MPIU_Malloc(sizeof(MPIDI_VC_t)); if (!vc) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**nomem"); } MPIDI_VC_Init(vc, NULL, 0); mpi_errno = MPIDI_CH3I_CM_Connect_raw_vc(vc, ifname); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } while (vc->ch.state != MPIDI_CH3I_VC_STATE_IDLE) { mpi_errno = MPID_Progress_test(); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); } } /* fprintf(stderr, "[###] vc state to idel, now send cm_establish msg\n") */ /* Now a connection is created, send a cm_establish message */ /* FIXME: vc->mrail.remote_vc_addr is used to find remote vc * A more elegant way is needed */ MPIDI_Pkt_init(&pkt, MPIDI_CH3_PKT_CM_ESTABLISH); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(&pkt, seqnum); pkt.vc_addr = vc->mrail.remote_vc_addr; mpi_errno = MPIDI_GetTagFromPort(port_name, &pkt.port_name_tag); if (mpi_errno != MPIU_STR_SUCCESS) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_port_name_tag"); } mpi_errno = MPIDI_CH3_iStartMsg(vc, &pkt, sizeof(pkt), &sreq); if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**fail", "**fail %s", "Failed to send cm establish message"); } if (sreq != NULL) { if (sreq->status.MPI_ERROR != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(sreq->status.MPI_ERROR, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); MPID_Request_release(sreq); goto fn_fail; } MPID_Request_release(sreq); } *new_vc = vc; fn_fail: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT); return mpi_errno; }
int MPID_Rsend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_RSEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_RSEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_RSEND, &sreq); goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->rsend) { mpi_errno = vc->comm_ops->rsend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_ready_send_t * const ready_pkt = &upkt.ready_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(ready_pkt, MPIDI_CH3_PKT_READY_SEND); ready_pkt->match.parts.rank = comm->rank; ready_pkt->match.parts.tag = tag; ready_pkt->match.parts.context_id = comm->context_id + context_offset; ready_pkt->sender_req_id = MPI_REQUEST_NULL; ready_pkt->data_sz = data_sz; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(ready_pkt, seqnum); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, ready_pkt, sizeof(*ready_pkt), &sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|eagermsg", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ if (sreq != NULL) { MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND); /* sreq->comm = comm; MPIR_Comm_add_ref(comm); -- not needed for blocking operations */ } goto fn_exit; } if (vc->ready_eager_max_msg_sz < 0 || data_sz + sizeof(MPIDI_CH3_Pkt_ready_send_t) <= vc->ready_eager_max_msg_sz) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, MPIDI_CH3_PKT_READY_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_READY_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); } } else {
int MPIDI_CH3_PktHandler_Close( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIDI_CH3_Pkt_close_t * close_pkt = &pkt->close; int mpi_errno = MPI_SUCCESS; if (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_close_t * resp_pkt = &upkt.close; MPIR_Request * resp_sreq; MPIDI_Pkt_init(resp_pkt, MPIDI_CH3_PKT_CLOSE); resp_pkt->ack = TRUE; MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,"sending close(TRUE) to %d", vc->pg_rank); mpi_errno = MPIDI_CH3_iStartMsg(vc, resp_pkt, sizeof(*resp_pkt), &resp_sreq); MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|send_close_ack"); if (resp_sreq != NULL) { /* There is still another reference being held by the channel. It will not be released until the pkt is actually sent. */ MPIR_Request_free(resp_sreq); } } if (close_pkt->ack == FALSE) { if (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE) { MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL, "received close(FALSE) from %d, moving to CLOSE_ACKED.", vc->pg_rank); MPIDI_CHANGE_VC_STATE(vc, CLOSE_ACKED); } else /* (vc->state == MPIDI_VC_STATE_ACTIVE) */ { if (vc->state != MPIDI_VC_STATE_ACTIVE) { MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT, TYPICAL, (MPL_DBG_FDEST, "Unexpected state %s in vc %p (rank=%d) (expecting MPIDI_VC_STATE_ACTIVE)\n", MPIDI_VC_GetStateString(vc->state), vc, vc->pg_rank )); MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL, "received close(FALSE) from %d, moving to REMOTE_CLOSE.", vc->pg_rank); } MPIR_Assert(vc->state == MPIDI_VC_STATE_ACTIVE); MPIDI_CHANGE_VC_STATE(vc, REMOTE_CLOSE); } } else /* (close_pkt->ack == TRUE) */ { MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL, "received close(TRUE) from %d, moving to CLOSED.", vc->pg_rank); MPIR_Assert (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE || vc->state == MPIDI_VC_STATE_CLOSE_ACKED); MPIDI_CHANGE_VC_STATE(vc, CLOSED); /* For example, with sockets, Connection_terminate will close the socket */ mpi_errno = MPIDI_CH3_Connection_terminate(vc); } *buflen = sizeof(MPIDI_CH3_Pkt_t); *rreqp = NULL; fn_fail: return mpi_errno; }
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp ) { MPID_Request * rreq; int found; MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send; int mpi_errno = MPI_SUCCESS; MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=" MPIDI_MSG_SZ_FMT, rts_pkt->sender_req_id, rts_pkt->match.parts.rank, rts_pkt->match.parts.tag, rts_pkt->match.parts.context_id, rts_pkt->data_sz)); MPIU_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id, rts_pkt->match.parts.rank,rts_pkt->data_sz, "ReceivedRndv"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPID_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_MSGQ_MUTEX); *buflen = sizeof(MPIDI_CH3_Pkt_t); if (found) { MPID_Request * cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"posted request found"); /* FIXME: What if the receive user buffer is not big enough to hold the data about to be cleared for sending? */ MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending rndv CTS packet"); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = rreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt"); } if (cts_req != NULL) { MPID_Request_release(cts_req); } } else { MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated"); /* * A MPID_Probe() may be waiting for the request we just * inserted, so we need to tell the progress engine to exit. * * FIXME: This will cause MPID_Progress_wait() to return to the * MPI layer each time an unexpected RTS packet is * received. MPID_Probe() should atomically increment a * counter and MPIDI_CH3_Progress_signal_completion() * should only be called if that counter is greater than zero. */ MPIDI_CH3_Progress_signal_completion(); } *rreqp = NULL; fn_fail: return mpi_errno; }
int MPID_Cancel_send(MPIR_Request * sreq) { MPIDI_VC_t * vc; int proto; int flag; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_SEND); MPIR_Assert(sreq->kind == MPIR_REQUEST_KIND__SEND); MPIDI_Request_cancel_pending(sreq, &flag); if (flag) { goto fn_exit; } /* * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer * and no pointer to the underlying communication * request. For now, we simply fail to cancel the request. In the future, * we should add a new request kind to indicate that * the request is a BSEND. Then we can properly cancel the request, much * in the way we do persistent requests. */ if (sreq->comm == NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc); proto = MPIDI_Request_get_msg_type(sreq); if (proto == MPIDI_REQUEST_SELF_MSG) { MPIR_Request * rreq; MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE, "attempting to cancel message sent to self"); MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match); MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX); if (rreq) { MPIR_Assert(rreq->dev.partner_request == sreq); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); /* Pull the message out of the unexpected queue since it's * being cancelled. The below request release drops one * reference. We explicitly drop a second reference, * because the receive request will never be visible to * the user. */ MPIR_Request_free(rreq); MPIR_Request_free(rreq); MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); mpi_errno = MPID_Request_complete(sreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } } else { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x", sreq->handle, rreq->handle)); } goto fn_exit; } /* If the message went over a netmod and it provides a cancel_send function, call it here. */ #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->cancel_send) { mpi_errno = vc->comm_ops->cancel_send(vc, sreq); goto fn_exit; } #endif /* Check to see if the send is still in the send queue. If so, remove it, mark the request and cancelled and complete, and release the device's reference to the request object. */ { int cancelled; if (proto == MPIDI_REQUEST_RNDV_MSG) { MPIR_Request * rts_sreq; /* The cancellation of the RTS request needs to be atomic through the destruction of the RTS request to avoid conflict with release of the RTS request if the CTS is received (see handling of a rendezvous CTS packet in MPIDI_CH3U_Handle_recv_pkt()). MPID_Request_fetch_and_clear_rts_sreq() is used to gurantee that atomicity. */ MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq); if (rts_sreq != NULL) { cancelled = FALSE; /* since we attempted to cancel a RTS request, then we are responsible for releasing that request */ MPIR_Request_free(rts_sreq); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER, "**ch3|cancelrndv", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ if (cancelled) { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); /* no other thread should be waiting on sreq, so it is safe to reset ref_count and cc */ MPIR_cc_set(&sreq->cc, 0); /* FIXME should be a decr and assert, not a set */ MPIR_Object_set_ref(sreq, 1); goto fn_exit; } } } else { cancelled = FALSE; if (cancelled) { MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE); /* no other thread should be waiting on sreq, so it is safe to reset ref_count and cc */ MPIR_cc_set(&sreq->cc, 0); /* FIXME should be a decr and assert, not a set */ MPIR_Object_set_ref(sreq, 1); goto fn_exit; } } } /* Part or all of the message has already been sent, so we need to send a cancellation request to the receiver in an attempt to catch the message before it is matched. */ { int was_incomplete; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_cancel_send_req_t * const csr_pkt = &upkt.cancel_send_req; MPIR_Request * csr_sreq; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "sending cancel request to %d for 0x%08x", sreq->dev.match.parts.rank, sreq->handle)); /* The completion counter and reference count are incremented to keep the request around long enough to receive a response regardless of what the user does (free the request before waiting, etc.). */ MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete); if (!was_incomplete) { /* The reference count is incremented only if the request was complete before the increment. */ MPIR_Request_add_ref( sreq ); } MPIDI_Pkt_init(csr_pkt, MPIDI_CH3_PKT_CANCEL_SEND_REQ); csr_pkt->match.parts.rank = sreq->comm->rank; csr_pkt->match.parts.tag = sreq->dev.match.parts.tag; csr_pkt->match.parts.context_id = sreq->dev.match.parts.context_id; csr_pkt->sender_req_id = sreq->handle; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, csr_pkt, sizeof(*csr_pkt), &csr_sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|cancelreq"); } if (csr_sreq != NULL) { MPIR_Request_free(csr_sreq); } } /* FIXME: if send cancellation packets are allowed to arrive out-of-order with respect to send packets, then we need to timestamp send and cancel packets to insure that a cancellation request does not bypass the send packet to be cancelled and erroneously cancel a previously sent message with the same request handle. */ /* FIXME: A timestamp is more than is necessary; a message sequence number should be adequate. */ fn_fail: fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_SEND); return mpi_errno; }
int MPIDI_CH3_PktHandler_EagerSyncSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp ) { MPIDI_CH3_Pkt_eager_send_t * es_pkt = &pkt->eager_send; MPIR_Request * rreq; int found; int complete; char *data_buf; intptr_t data_len; int mpi_errno = MPI_SUCCESS; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "received eager sync send pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", es_pkt->sender_req_id, es_pkt->match.parts.rank, es_pkt->match.parts.tag, es_pkt->match.parts.context_id)); MPL_DBG_MSGPKT(vc,es_pkt->match.parts.tag,es_pkt->match.parts.context_id, es_pkt->match.parts.rank,es_pkt->data_sz, "ReceivedEagerSync"); rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&es_pkt->match, &found); MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp()); /* If the completion counter is 0, that means that the communicator to * which this message is being sent has been revoked and we shouldn't * bother finishing this. */ if (!found && MPIR_cc_get(rreq->cc) == 0) { *rreqp = NULL; goto fn_fail; } set_request_info(rreq, es_pkt, MPIDI_REQUEST_EAGER_MSG); data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= rreq->dev.recv_data_sz) ? rreq->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t)); data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t); if (found) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack; MPIR_Request * esa_req; if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_found( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack"); MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK); esa_pkt->sender_req_id = rreq->dev.sender_req_id; /* Because this is a packet handler, it is already within a CH3 CS */ /* MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); */ mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req); /* MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|syncack"); } if (esa_req != NULL) { MPIR_Request_free(esa_req); } } else { if (rreq->dev.recv_data_sz == 0) { *buflen = sizeof(MPIDI_CH3_Pkt_t); mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { mpi_errno = MPIDI_CH3U_Receive_data_unexpected( rreq, data_buf, &data_len, &complete ); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv", "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND"); } *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len; if (complete) { mpi_errno = MPID_Request_complete(rreq); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } *rreqp = NULL; } else { *rreqp = rreq; } } MPIDI_Request_set_sync_send_flag(rreq, TRUE); } fn_fail: return mpi_errno; }
int MPIDI_CH3_EagerContigShortSend( MPID_Request **sreq_p, MPIDI_CH3_Pkt_type_t reqtype, const void * buf, MPIDI_msg_sz_t data_sz, int rank, int tag, MPID_Comm * comm, int context_offset ) { int mpi_errno = MPI_SUCCESS; MPIDI_VC_t * vc; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eagershort_send_t * const eagershort_pkt = &upkt.eagershort_send; MPID_Request *sreq = *sreq_p; /* printf( "Sending short eager\n"); fflush(stdout); */ MPIDI_Pkt_init(eagershort_pkt, reqtype); eagershort_pkt->match.parts.rank = comm->rank; eagershort_pkt->match.parts.tag = tag; eagershort_pkt->match.parts.context_id = comm->context_id + context_offset; eagershort_pkt->data_sz = data_sz; MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "sending contiguous short eager message, data_sz=" MPIDI_MSG_SZ_FMT, data_sz)); MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eagershort_pkt, seqnum); /* Copy the payload. We could optimize this if data_sz & 0x3 == 0 (copy (data_sz >> 2) ints, inline that since data size is currently limited to 4 ints */ { unsigned char * restrict p = (unsigned char *)eagershort_pkt->data; unsigned char const * restrict bufp = (unsigned char *)buf; int i; for (i=0; i<data_sz; i++) { *p++ = *bufp++; } } MPIU_DBG_MSGPKT(vc,tag,eagershort_pkt->match.parts.context_id,rank,data_sz, "EagerShort"); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, eagershort_pkt, sizeof(*eagershort_pkt), sreq_p); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg"); } sreq = *sreq_p; if (sreq != NULL) { /*printf( "Surprise, did not complete send of eagershort (starting connection?)\n" ); fflush(stdout); */ /* MT FIXME setting fields in the request after it has been given to the * progress engine is racy. The start call above is protected by * vc CS, but the progress engine is protected by MPIDCOMM. So * we can't just extend the CS type below this point... what's the fix? */ MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); } fn_fail: return mpi_errno; }