int MPID_Send(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; #if defined(FINEGRAIN_MPI) int destpid=-1, destworldrank=-1; #endif MPIDI_STATE_DECL(MPID_STATE_MPID_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } #if defined(FINEGRAIN_MPI) MPIDI_Comm_get_pid_worldrank(comm, rank, &destpid, &destworldrank); if (COMPARE_RANKS(rank,comm,destpid) && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(&buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); if (rank == comm->rank) { printf("my_fgrank=%d: %s, self send DEADLOCK\n", my_fgrank, __FUNCTION__); if (sreq != NULL && sreq->cc != 0) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|selfsenddeadlock"); } } #else if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { if (sreq != NULL && MPID_cc_get(sreq->cc) != 0) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**dev|selfsenddeadlock"); } } # endif #endif if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } #if defined(FINEGRAIN_MPI) MPIDI_Comm_get_vc_set_active_direct(comm, destpid, &vc); #else MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #endif MPIR_ERR_CHKANDJUMP1(vc->state == MPIDI_VC_STATE_MORIBUND, mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", rank); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->send) { mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); #if defined(FINEGRAIN_MPI) eager_pkt->match.parts.dest_rank = destworldrank; #endif eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = MPI_REQUEST_NULL; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq); MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg"); } /* --END ERROR HANDLING-- */ if (sreq != NULL) { MPIDI_Request_set_seqnum(sreq, seqnum); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); /* sreq->comm = comm; MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */ } goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); /* FIXME: flow control: limit number of outstanding eager messages containing data and need to be buffered by the receiver */ #ifdef USE_EAGER_SHORT if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) { mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, MPIDI_CH3_PKT_EAGERSHORT_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else #endif if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char *)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); } } else {
int MPID_Isend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq; MPIDI_VC_t * vc=0; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq); goto fn_exit; } if (rank != MPI_PROC_NULL) { MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES /* this needs to come before the sreq is created, since the override * function is responsible for creating its own request */ if (vc->comm_ops && vc->comm_ops->isend) { mpi_errno = vc->comm_ops->isend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif } MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); if (rank == MPI_PROC_NULL) { MPIU_Object_set_ref(sreq, 1); MPID_cc_set(&sreq->cc, 0); goto fn_exit; } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send; MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG); sreq->dev.OnDataAvail = 0; MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message"); MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND); eager_pkt->match.parts.rank = comm->rank; eager_pkt->match.parts.tag = tag; eager_pkt->match.parts.context_id = comm->context_id + context_offset; eager_pkt->sender_req_id = sreq->handle; eager_pkt->data_sz = 0; MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(eager_pkt, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_THREAD_CS_ENTER(CH3COMM,vc); mpi_errno = MPIDI_CH3_iSend(vc, sreq, eager_pkt, sizeof(*eager_pkt)); MPIU_THREAD_CS_EXIT(CH3COMM,vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { MPIU_Object_set_ref(sreq, 0); MPIDI_CH3_Request_destroy(sreq); sreq = NULL; MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg"); goto fn_exit; } /* --END ERROR HANDLING-- */ goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); /* FIXME: flow control: limit number of outstanding eager messages containing data and need to be buffered by the receiver */ if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold) { if (dt_contig) { mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, (char*)buf + dt_true_lb, data_sz, rank, tag, comm, context_offset ); } else { mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz, rank, tag, comm, context_offset ); /* If we're not complete, then add a reference to the datatype */ if (sreq && sreq->dev.OnDataAvail) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } } else { /* Note that the sreq was created above */ MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG ); mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ if (sreq && dt_ptr != NULL) { sreq->dev.datatype_ptr = dt_ptr; MPID_Datatype_add_ref(dt_ptr); } } fn_exit: *request = sreq; MPIU_DBG_STMT(CH3_OTHER,VERBOSE, { if (sreq != NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle); } } );
int MPID_Ssend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype * dt_ptr; MPID_Request * sreq = NULL; MPIDI_VC_t * vc; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_SSEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_SSEND); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SSEND, &sreq); /* In the single threaded case, sending to yourself will cause deadlock. Note that in the runtime-thread case, this check will not be made (long-term FIXME) */ # ifndef MPICH_IS_THREADED { /* --BEGIN ERROR HANDLING-- */ if (sreq != NULL && MPID_cc_get(sreq->cc) != 0) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**dev|selfsenddeadlock", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ } # endif goto fn_exit; } if (rank == MPI_PROC_NULL) { goto fn_exit; } MPIDI_Comm_get_vc_set_active(comm, rank, &vc); #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->ssend) { mpi_errno = vc->comm_ops->ssend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND); if (data_sz == 0) { mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, context_offset ); goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= eager_threshold) { mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count, datatype, data_sz, dt_contig, dt_true_lb, rank, tag, comm, context_offset ); } else { /* Note that the sreq was created above */ mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* Note that we don't increase the ref cound on the datatype because this is a blocking call, and the calling routine must wait until sreq completes */ } fn_fail: fn_exit: *request = sreq; MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{if (sreq!=NULL) { MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE, "request allocated, handle=0x%08x", sreq->handle);}});
int MPID_Issend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPIR_Comm * comm, int context_offset, MPIR_Request ** request) { intptr_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPIR_Datatype* dt_ptr; MPIR_Request * sreq; MPIDI_VC_t * vc=0; #if defined(MPID_USE_SEQUENCE_NUMBERS) MPID_Seqnum_t seqnum; #endif int eager_threshold = -1; int mpi_errno = MPI_SUCCESS; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_ISSEND); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_ISSEND); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->context_id + context_offset)); /* Check to make sure the communicator hasn't already been revoked */ if (comm->revoked && MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_TAG_COLL_BIT) && MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_TAG_COLL_BIT)) { MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked"); } if (rank == comm->rank && comm->comm_kind != MPIR_COMM_KIND__INTERCOMM) { mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SSEND, &sreq); goto fn_exit; } if (rank != MPI_PROC_NULL) { MPIDI_Comm_get_vc_set_active(comm, rank, &vc); /* this needs to come before the sreq is created, since the override */ /* function is responsible for creating its own request */ #ifdef ENABLE_COMM_OVERRIDES if (vc->comm_ops && vc->comm_ops->issend) { mpi_errno = vc->comm_ops->issend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq); goto fn_exit; } #endif } MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND); if (rank == MPI_PROC_NULL) { MPIR_Object_set_ref(sreq, 1); MPIR_cc_set(&sreq->cc, 0); goto fn_exit; } MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (data_sz == 0) { mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, context_offset ); goto fn_exit; } MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc); if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= eager_threshold) { mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count, datatype, data_sz, dt_contig, dt_true_lb, rank, tag, comm, context_offset ); /* If we're not complete and this is a derived datatype * communication, then add a reference to the datatype */ if (sreq && (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN)) { sreq->dev.datatype_ptr = dt_ptr; MPIR_Datatype_ptr_add_ref(dt_ptr); } } else { /* Note that the sreq was created above */ MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_RNDV_MSG); mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig, data_sz, dt_true_lb, rank, tag, comm, context_offset ); /* FIXME: fill temporary IOV or pack temporary buffer after send to hide some latency. This requires synchronization because the CTS packet could arrive and be processed before the above iStartmsg completes (depending on the progress engine, threads, etc.). */ if (sreq && dt_ptr != NULL) { sreq->dev.datatype_ptr = dt_ptr; MPIR_Datatype_ptr_add_ref(dt_ptr); } } fn_exit: *request = sreq; MPL_DBG_STMT(MPIDI_CH3_DBG_OTHER,VERBOSE, { if (sreq != NULL) { MPL_DBG_MSG_P(MPIDI_CH3_DBG_OTHER,VERBOSE, "request allocated, handle=0x%08x", sreq->handle); } } )