/* MPIDI_CH3_EagerNoncontigSend - Eagerly send noncontiguous data */ int MPIDI_CH3_EagerNoncontigSend( MPID_Request **sreq_p, MPIDI_CH3_Pkt_type_t reqtype, const void * buf, MPI_Aint count, MPI_Datatype datatype, MPIDI_msg_sz_t data_sz, int rank, int tag, MPID_Comm * comm, int context_offset ) { int mpi_errno = MPI_SUCCESS; MPIDI_VC_t * vc; MPID_Request *sreq = *sreq_p; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "sending non-contiguous eager message, data_sz=" MPIDI_MSG_SZ_FMT, data_sz)); sreq->dev.OnDataAvail = 0; sreq->dev.OnFinal = 0; MPIDI_Pkt_init(eager_pkt, reqtype); eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = MPI_REQUEST_NULL; eager_pkt->data_sz = data_sz; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_DBG_MSGPKT(vc,tag,eager_pkt->match.parts.context_id,rank,data_sz, "Eager"); sreq->dev.segment_ptr = MPID_Segment_alloc( ); MPIR_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = vc->sendNoncontig_fn(vc, sreq, eager_pkt, sizeof(MPIDI_CH3_Pkt_eager_send_t)); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: return mpi_errno; fn_fail: *sreq_p = NULL; goto fn_exit; }
int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_ready_send_t * const ready_pkt = &upkt.ready_send; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_IRSEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRSEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_RSEND, &sreq); goto fn_exit; } if (rank != MPI_PROC_NULL) { MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES /* this needs to come before the sreq is created, since the override * function is responsible for creating its own request */ if (vc->comm_ops && vc->comm_ops->irsend) { mpi_errno = vc->comm_ops->irsend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif } MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND); MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG); if (rank == MPI_PROC_NULL) { MPIU_Object_set_ref(sreq, 1); MPID_cc_set(&sreq->cc, 0); goto fn_exit; } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIDI_Pkt_init(ready_pkt, MPIDI_CH3_PKT_READY_SEND); ready_pkt->match.parts.rank = comm->rank; ready_pkt->match.parts.tag = tag; ready_pkt->match.parts.context_id = comm->context_id + context_offset; ready_pkt->sender_req_id = MPI_REQUEST_NULL; ready_pkt->data_sz = data_sz; if (data_sz == 0) { MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); sreq->dev.OnDataAvail = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(ready_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iSend(vc, sreq, ready_pkt, sizeof(*ready_pkt)); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPID_Request_release(sreq); sreq = NULL; MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); goto fn_exit; } /* --END ERROR HANDLING-- */ goto fn_exit; } if (vc->ready_eager_max_msg_sz < 0 || data_sz + sizeof(MPIDI_CH3_Pkt_ready_send_t) <= vc->ready_eager_max_msg_sz) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, MPIDI_CH3_PKT_READY_SEND, (char*)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_READY_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); /* If we're not complete, then add a reference to the datatype */ if (sreq && sreq->dev.OnDataAvail) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } } else { /* Do rendezvous. This will be sent as a regular send not as a ready send, so the receiver won't know to send an error if the receive has not been posted */ MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG ); mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); if (sreq && dt_ptr != NULL) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } fn_exit: *request = sreq; MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{ if (sreq != NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle); } } );
int MPID_Ssend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_SSEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_SSEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SSEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { /* --BEGIN ERROR HANDLING-- */ if (sreq != NULL && MPID_cc_get(sreq->cc) != 0) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**dev|selfsenddeadlock", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ } # endif goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->ssend) { mpi_errno = vc->comm_ops->ssend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND); if (data_sz == 0) { mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, context_offset ); goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= eager_threshold) { mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count, datatype, data_sz, dt_contig, dt_true_lb, rank, tag, comm, context_offset ); } else { /* Note that the sreq was created above */ mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* Note that we don't increase the ref cound on the datatype because this is a blocking call, and the calling routine must wait until sreq completes */ } fn_fail: fn_exit: *request = sreq; MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{if (sreq!=NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE, "request allocated, handle=0x%08x", sreq->handle);}});
int MPID_Ssend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_SSEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_SSEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { #if defined(_OSU_PSM_) goto skip_self_send; /* PSM internally, will optimize self-send */ #endif mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SSEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { /* --BEGIN ERROR HANDLING-- */ if (sreq != NULL && sreq->cc != 0) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**dev|selfsenddeadlock", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ } # endif goto fn_exit; } #if defined (_OSU_PSM_) skip_self_send: #endif if (rank == MPI_PROC_NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->ssend) { mpi_errno = vc->comm_ops->ssend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND); if (data_sz == 0) { #if defined (_OSU_PSM_) goto psm_ssend; #endif mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, context_offset ); goto fn_exit; } #if defined (_OSU_PSM_) psm_ssend: sreq->psm_flags |= PSM_SYNC_SEND; if(dt_contig) { PSMSG(fprintf(stderr, "psm Sync send\n")); mpi_errno = MPIDI_CH3_EagerContigSend(&sreq, MPIDI_CH3_PKT_EAGER_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset); } else { PSMSG(fprintf(stderr, "psm NC-Sync send\n")); mpi_errno = MPIDI_CH3_EagerNoncontigSend(&sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset); } goto fn_exit; #endif /* _OSU_PSM_ */ #if defined(_OSU_MVAPICH_) if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= vc->eager_max_msg_sz && ! vc->force_rndv) #else /* defined(_OSU_MVAPICH_) */ if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= vc->eager_max_msg_sz) #endif /* defined(_OSU_MVAPICH_) */ { mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count, datatype, data_sz, dt_contig, dt_true_lb, rank, tag, comm, context_offset ); } else { /* Note that the sreq was created above */ mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* Note that we don't increase the ref cound on the datatype because this is a blocking call, and the calling routine must wait until sreq completes */ } fn_exit: *request = sreq; MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{if (sreq!=NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE, "request allocated, handle=0x%08x", sreq->handle);}});
/* MPIDI_CH3_EagerSyncNoncontigSend - Eagerly send noncontiguous data in synchronous mode. Some implementations may choose to use Rendezvous sends (see ch3u_rndv.c) for all Synchronous sends (MPI_Issend and MPI_Ssend). An eager synchronous send eliminates one of the handshake messages, but most application codes should not be using synchronous sends in performance-critical operations. */ int MPIDI_CH3_EagerSyncNoncontigSend( MPIR_Request **sreq_p, const void * buf, int count, MPI_Datatype datatype, intptr_t data_sz, int dt_contig, MPI_Aint dt_true_lb, int rank, int tag, MPIR_Comm * comm, int context_offset ) { int mpi_errno = MPI_SUCCESS; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_sync_send_t * const es_pkt = &upkt.eager_sync_send; MPIDI_VC_t * vc; MPIR_Request *sreq = *sreq_p; /* MT FIXME what are the two operations we are waiting for? the send and * the sync response? */ MPIR_cc_set(&sreq->cc, 2); sreq->dev.OnDataAvail = 0; sreq->dev.OnFinal = 0; MPIDI_Pkt_init(es_pkt, MPIDI_CH3_PKT_EAGER_SYNC_SEND); es_pkt->match.parts.rank = comm->rank; es_pkt->match.parts.tag = tag; es_pkt->match.parts.context_id = comm->context_id + context_offset; es_pkt->sender_req_id = sreq->handle; es_pkt->data_sz = data_sz; MPIDI_Comm_get_vc_set_active(comm, rank, &vc); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(es_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPL_DBG_MSGPKT(vc,tag,es_pkt->match.parts.context_id,rank,data_sz,"EagerSync"); if (dt_contig) { MPL_IOV iov[2]; MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "sending contiguous sync eager message, data_sz=%" PRIdPTR, data_sz)); iov[0].MPL_IOV_BUF = (MPL_IOV_BUF_CAST)es_pkt; iov[0].MPL_IOV_LEN = sizeof(*es_pkt); iov[1].MPL_IOV_BUF = (MPL_IOV_BUF_CAST) ((char *)buf + dt_true_lb); iov[1].MPL_IOV_LEN = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iSendv(vc, sreq, iov, 2); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { /* Make sure to destroy the request before setting the pointer to * NULL, otherwise we lose the handle on the request */ MPIR_Request_free(sreq); *sreq_p = NULL; MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ } else { MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE, "sending non-contiguous sync eager message, data_sz=%" PRIdPTR, data_sz); sreq->dev.segment_ptr = MPIDU_Segment_alloc( ); MPIR_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPIDU_Segment_alloc"); MPIDU_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = vc->sendNoncontig_fn(vc, sreq, es_pkt, sizeof(MPIDI_CH3_Pkt_eager_sync_send_t)); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: return mpi_errno; fn_fail: *sreq_p = NULL; goto fn_exit; }