int MPIR_Get_intercomm_contextid(MPID_Comm * comm_ptr, MPIU_Context_id_t * context_id, MPIU_Context_id_t * recvcontext_id) { MPIU_Context_id_t mycontext_id, remote_context_id; int mpi_errno = MPI_SUCCESS; int tag = 31567; /* FIXME - we need an internal tag or * communication channel. Can we use a different * context instead?. Or can we use the tag * provided in the intercomm routine? (not on a dup, * but in that case it can use the collective context) */ MPIR_Errflag_t errflag = MPIR_ERR_NONE; MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID); if (!comm_ptr->local_comm) { /* Manufacture the local communicator */ mpi_errno = MPIR_Setup_intercomm_localcomm(comm_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } mpi_errno = MPIR_Get_contextid_sparse(comm_ptr->local_comm, &mycontext_id, FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIU_Assert(mycontext_id != 0); /* MPIC routine uses an internal context id. The local leads (process 0) * exchange data */ remote_context_id = -1; if (comm_ptr->rank == 0) { mpi_errno = MPIC_Sendrecv(&mycontext_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, tag, &remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, tag, comm_ptr, MPI_STATUS_IGNORE, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* Make sure that all of the local processes now have this * id */ mpi_errno = MPIR_Bcast_impl(&remote_context_id, 1, MPIU_CONTEXT_ID_T_DATATYPE, 0, comm_ptr->local_comm, &errflag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail"); /* The recvcontext_id must be the one that was allocated out of the local * group, not the remote group. Otherwise we could end up posting two * MPI_ANY_SOURCE,MPI_ANY_TAG recvs on the same context IDs even though we * are attempting to post them for two separate communicators. */ *context_id = remote_context_id; *recvcontext_id = mycontext_id; fn_fail: MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_INTERCOMM_CONTEXTID); return mpi_errno; }
int MPID_nem_scif_init(MPIDI_PG_t * pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p) { int mpi_errno = MPI_SUCCESS; int ret; int i; MPIU_CHKPMEM_DECL(2); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_INIT); /* first make sure that our private fields in the vc fit into the * area provided */ MPIU_Assert(sizeof(MPID_nem_scif_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN); MPID_nem_scif_nranks = pg_p->size; MPID_nem_scif_myrank = pg_rank; /* set up listener socket */ if (MPID_nem_scif_myrank < MPID_nem_scif_nranks - 1) { listen_fd = scif_open(); MPIU_ERR_CHKANDJUMP1(listen_fd == -1, mpi_errno, MPI_ERR_OTHER, "**scif_open", "**scif_open %s", MPIU_Strerror(errno)); listen_port = scif_bind(listen_fd, 0); MPIU_ERR_CHKANDJUMP1(listen_port == -1, mpi_errno, MPI_ERR_OTHER, "**scif_bind", "**scif_bind %s", MPIU_Strerror(errno)); ret = scif_listen(listen_fd, MPID_nem_scif_nranks); MPIU_ERR_CHKANDJUMP1(ret == -1, mpi_errno, MPI_ERR_OTHER, "**scif_listen", "**scif_listen %s", MPIU_Strerror(errno)); } /* create business card */ mpi_errno = MPID_nem_scif_get_business_card(pg_rank, bc_val_p, val_max_sz_p); if (mpi_errno) MPIU_ERR_POP(mpi_errno); MPIU_CHKPMEM_MALLOC(MPID_nem_scif_conns, scifconn_t *, MPID_nem_scif_nranks * sizeof(scifconn_t), mpi_errno, "connection table"); memset(MPID_nem_scif_conns, 0, MPID_nem_scif_nranks * sizeof(scifconn_t)); for (i = 0; i < MPID_nem_scif_nranks; ++i) MPID_nem_scif_conns[i].fd = -1; MPIU_CHKPMEM_MALLOC(MPID_nem_scif_recv_buf, char *, MPID_NEM_SCIF_RECV_MAX_PKT_LEN, mpi_errno, "SCIF temporary buffer"); MPIU_CHKPMEM_COMMIT(); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_INIT); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; }
static int barrier_smp_intra(MPID_Comm *comm_ptr, mpir_errflag_t *errflag) { int mpi_errno=MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; MPIU_Assert(MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_BARRIER && MPIR_Comm_is_node_aware(comm_ptr)); /* do the intranode barrier on all nodes */ if (comm_ptr->node_comm != NULL) { mpi_errno = MPIR_Barrier_impl(comm_ptr->node_comm, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIU_ERR_SET(mpi_errno, *errflag, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } } /* do the barrier across roots of all nodes */ if (comm_ptr->node_roots_comm != NULL) { mpi_errno = MPIR_Barrier_impl(comm_ptr->node_roots_comm, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIU_ERR_SET(mpi_errno, *errflag, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } } /* release the local processes on each node with a 1-byte broadcast (0-byte broadcast just returns without doing anything) */ if (comm_ptr->node_comm != NULL) { int i=0; mpi_errno = MPIR_Bcast_impl(&i, 1, MPI_BYTE, 0, comm_ptr->node_comm, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIU_ERR_SET(mpi_errno, *errflag, "**fail"); MPIU_ERR_ADD(mpi_errno_ret, mpi_errno); } } fn_exit: if (mpi_errno_ret) mpi_errno = mpi_errno_ret; else if (*errflag != MPIR_ERR_NONE) MPIU_ERR_SET(mpi_errno, *errflag, "**coll_fail"); return mpi_errno; fn_fail: goto fn_exit; }
static int _mxm_irecv(MPID_nem_mxm_ep_t * ep, MPID_nem_mxm_req_area * req, int id, mxm_mq_h mxm_mq, mxm_tag_t mxm_tag) { int mpi_errno = MPI_SUCCESS; mxm_error_t ret = MXM_OK; mxm_recv_req_t *mxm_rreq; list_head_t *free_queue = NULL; MPIU_Assert(req); free_queue = (ep ? &ep->free_queue : &mxm_obj->free_queue); req->mxm_req = list_dequeue_mxm_req(free_queue); if (!req->mxm_req) { list_grow_mxm_req(free_queue); req->mxm_req = list_dequeue_mxm_req(free_queue); if (!req->mxm_req) { MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "empty free queue"); mpi_errno = MPI_ERR_OTHER; goto fn_fail; } } mxm_rreq = &(req->mxm_req->item.recv); mxm_rreq->base.state = MXM_REQ_NEW; mxm_rreq->base.mq = mxm_mq; mxm_rreq->base.conn = (ep ? ep->mxm_conn : 0); mxm_rreq->base.completed_cb = _mxm_recv_completion_cb; mxm_rreq->base.context = req->ctx; mxm_rreq->tag = mxm_tag; mxm_rreq->tag_mask = _mxm_tag_mask(id); if (likely(req->iov_count == 1)) { mxm_rreq->base.data_type = MXM_REQ_DATA_BUFFER; mxm_rreq->base.data.buffer.ptr = req->iov_buf[0].ptr; mxm_rreq->base.data.buffer.length = req->iov_buf[0].length; } else { mxm_rreq->base.data_type = MXM_REQ_DATA_IOV; mxm_rreq->base.data.iov.vector = req->iov_buf; mxm_rreq->base.data.iov.count = req->iov_count; } ret = mxm_req_recv(mxm_rreq); if (MXM_OK != ret) { list_enqueue(free_queue, &req->mxm_req->queue); mpi_errno = MPI_ERR_OTHER; goto fn_fail; } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_mx_vc_init (MPIDI_VC_t *vc) { uint32_t threshold; MPIDI_CH3I_VC *vc_ch = VC_CH(vc); int mpi_errno = MPI_SUCCESS; /* first make sure that our private fields in the vc fit into the area provided */ MPIU_Assert(sizeof(MPID_nem_mx_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN); #ifdef ONDEMAND VC_FIELD(vc, local_connected) = 0; VC_FIELD(vc, remote_connected) = 0; #else { char *business_card; int val_max_sz; int ret; #ifdef USE_PMI2_API val_max_sz = PMI2_MAX_VALLEN; #else mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz); #endif business_card = (char *)MPIU_Malloc(val_max_sz); mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz, vc->pg); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPID_nem_mx_get_from_bc (business_card, &VC_FIELD(vc, remote_endpoint_id), &VC_FIELD(vc, remote_nic_id)); if (mpi_errno) MPIU_ERR_POP (mpi_errno); MPIU_Free(business_card); ret = mx_connect(MPID_nem_mx_local_endpoint,VC_FIELD(vc, remote_nic_id),VC_FIELD(vc, remote_endpoint_id), MPID_NEM_MX_FILTER,MX_INFINITE,&(VC_FIELD(vc, remote_endpoint_addr))); MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_connect", "**mx_connect %s", mx_strerror (ret)); mx_set_endpoint_addr_context(VC_FIELD(vc, remote_endpoint_addr),(void *)vc); MPIDI_CHANGE_VC_STATE(vc, ACTIVE); } #endif mx_get_info(MPID_nem_mx_local_endpoint, MX_COPY_SEND_MAX, NULL, 0, &threshold, sizeof(uint32_t)); vc->eager_max_msg_sz = threshold; vc->rndvSend_fn = NULL; vc->sendNoncontig_fn = MPID_nem_mx_SendNoncontig; vc->comm_ops = &comm_ops; vc_ch->iStartContigMsg = MPID_nem_mx_iStartContigMsg; vc_ch->iSendContig = MPID_nem_mx_iSendContig; fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Datatype_init(void) { int i; int mpi_errno = MPI_SUCCESS; MPID_Datatype *ptr; MPIU_Assert(MPID_Datatype_mem.initialized == 0); MPIU_Assert(MPID_DATATYPE_PREALLOC >= 5); for (i=0; mpi_pairtypes[i] != (MPI_Datatype) -1; ++i) { /* types based on 'long long' and 'long double', may be disabled at configure time, and their values set to MPI_DATATYPE_NULL. skip any such types. */ if (mpi_pairtypes[i] == MPI_DATATYPE_NULL) continue; /* XXX: this allocation strategy isn't right if one or more of the pairtypes is MPI_DATATYPE_NULL. in fact, the assert below will fail if any type other than the las in the list is equal to MPI_DATATYPE_NULL. obviously, this should be fixed, but I need to talk to Rob R. first. -- BRT */ /* XXX DJG it does work, but only because MPI_LONG_DOUBLE_INT is the * only one that is ever optional and it comes last */ /* we use the _unsafe version because we are still in MPI_Init, before * multiple threads are permitted and possibly before support for * critical sections is entirely setup */ ptr = (MPID_Datatype *)MPIU_Handle_obj_alloc_unsafe( &MPID_Datatype_mem ); MPIU_Assert(ptr); MPIU_Assert(ptr->handle == mpi_pairtypes[i]); /* this is a redundant alternative to the previous statement */ MPIU_Assert((void *) ptr == (void *) (MPID_Datatype_direct + HANDLE_INDEX(mpi_pairtypes[i]))); mpi_errno = MPID_Type_create_pairtype(mpi_pairtypes[i], (MPID_Datatype *) ptr); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } MPIR_Add_finalize(MPIR_Datatype_finalize, 0, MPIR_FINALIZE_CALLBACK_PRIO-1); fn_fail: return mpi_errno; }
int MPIR_Ibarrier_impl(MPID_Comm *comm_ptr, MPI_Request *request) { int mpi_errno = MPI_SUCCESS; MPID_Request *reqp = NULL; int tag = -1; MPID_Sched_t s = MPID_SCHED_NULL; *request = MPI_REQUEST_NULL; MPIU_Assert(comm_ptr->coll_fns != NULL); if (comm_ptr->coll_fns->Ibarrier_req != NULL) { /* FG:NBC Double-check */ /* --BEGIN USEREXTENSION-- */ mpi_errno = comm_ptr->coll_fns->Ibarrier_req(comm_ptr, &reqp); if (reqp) { *request = reqp->handle; if (mpi_errno) MPIR_ERR_POP(mpi_errno); goto fn_exit; } /* --END USEREXTENSION-- */ } if (comm_ptr->local_size != 1 || comm_ptr->comm_kind == MPID_INTERCOMM) { mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_Sched_create(&s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIU_Assert(comm_ptr->coll_fns->Ibarrier_sched != NULL); mpi_errno = comm_ptr->coll_fns->Ibarrier_sched(comm_ptr, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp); if (reqp) *request = reqp->handle; if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
static int handle_mprobe(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const req = e->user_ptr; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE); if (e->ni_fail_type == PTL_NI_NO_MATCH) { REQ_PTL(req)->found = FALSE; goto finish_mprobe; } REQ_PTL(req)->found = TRUE; req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits); req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits); MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data)); MPIDI_Request_set_sync_send_flag(req, e->hdr_data & NPTL_SSEND); MPIU_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf"); MPIU_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength); req->dev.recv_data_sz = e->mlength; if (!(e->hdr_data & NPTL_LARGE)) { MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_EAGER_MSG); } else { MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD); req->dev.match.parts.tag = req->status.MPI_TAG; req->dev.match.parts.context_id = NPTL_MATCH_GET_CTX(e->match_bits); req->dev.match.parts.rank = req->status.MPI_SOURCE; MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_RNDV_MSG); } /* At this point we know the ME is unlinked. Invalidate the handle to prevent further accesses, e.g. an attempted cancel. */ REQ_PTL(req)->put_me = PTL_INVALID_HANDLE; req->dev.recv_pending_count = 1; finish_mprobe: mpi_errno = MPID_Request_complete(req); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } fn_exit: MPIU_CHKPMEM_COMMIT(); MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; }
int MPIR_Status_set_elements_x_impl(MPI_Status *status, MPI_Datatype datatype, MPI_Count count) { int mpi_errno = MPI_SUCCESS; MPI_Count size_x; MPID_Datatype_get_size_macro(datatype, size_x); /* overflow check, should probably be a real error check? */ if (count != 0) { MPIU_Assert(size_x >= 0 && count > 0); MPIU_Assert(count * size_x < MPIR_COUNT_MAX); } MPIR_STATUS_SET_COUNT(*status, size_x * count); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Ialltoall_impl(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPI_Request *request) { int mpi_errno = MPI_SUCCESS; MPID_Request *reqp = NULL; int tag = -1; MPID_Sched_t s = MPID_SCHED_NULL; *request = MPI_REQUEST_NULL; MPIU_Assert(comm_ptr->coll_fns != NULL); if (comm_ptr->coll_fns->Ialltoall_req != NULL) { /* --BEGIN USEREXTENSION-- */ mpi_errno = comm_ptr->coll_fns->Ialltoall_req(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, &reqp); if (reqp) { *request = reqp->handle; if (mpi_errno) MPIU_ERR_POP(mpi_errno); goto fn_exit; } /* --END USEREXTENSION-- */ } mpi_errno = MPID_Sched_next_tag(comm_ptr, &tag); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPID_Sched_create(&s); if (mpi_errno) MPIU_ERR_POP(mpi_errno); MPIU_Assert(comm_ptr->coll_fns != NULL); MPIU_Assert(comm_ptr->coll_fns->Ialltoall_sched != NULL); mpi_errno = comm_ptr->coll_fns->Ialltoall_sched(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPID_Sched_start(&s, comm_ptr, tag, &reqp); if (reqp) *request = reqp->handle; if (mpi_errno) MPIU_ERR_POP(mpi_errno); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIC_Recv(void *buf, MPI_Aint count, MPI_Datatype datatype, int source, int tag, MPID_Comm *comm_ptr, MPI_Status *status, mpir_errflag_t *errflag) { int mpi_errno = MPI_SUCCESS; int context_id; MPI_Status mystatus; MPID_Request *request_ptr = NULL; MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV); MPIU_DBG_MSG_D(PT2PT, TYPICAL, "IN: errflag = %d", *errflag); MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT, "**countneg", "**countneg %d", count); context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ? MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL; if (status == MPI_STATUS_IGNORE) status = &mystatus; mpi_errno = MPID_Recv(buf, count, datatype, source, tag, comm_ptr, context_id, status, &request_ptr); if (mpi_errno) MPIU_ERR_POP(mpi_errno); if (request_ptr) { mpi_errno = MPIC_Wait(request_ptr, errflag); if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno); *status = request_ptr->status; mpi_errno = status->MPI_ERROR; MPID_Request_release(request_ptr); } else { MPIR_Process_status(status, errflag); MPIR_TAG_CLEAR_ERROR_BITS(status->MPI_TAG); } if (MPI_SUCCESS == MPIR_ERR_GET_CLASS(status->MPI_ERROR)) { MPIU_Assert(status->MPI_TAG == tag); } fn_exit: MPIU_DBG_MSG_D(PT2PT, TYPICAL, "OUT: errflag = %d", *errflag); MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ if (request_ptr) MPID_Request_release(request_ptr); goto fn_exit; /* --END ERROR HANDLING-- */ }
int MPID_nem_tcp_init (MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_INIT); MPID_nem_net_module_vc_dbg_print_sendq = MPID_nem_tcp_vc_dbg_print_sendq; /* first make sure that our private fields in the vc fit into the area provided */ MPIU_Assert(sizeof(MPID_nem_tcp_vc_area) <= MPIDI_NEM_VC_NETMOD_AREA_LEN); /* set up listener socket */ mpi_errno = set_up_listener(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* create business card */ mpi_errno = MPID_nem_tcp_get_business_card(pg_rank, bc_val_p, val_max_sz_p); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_tcp_sm_init(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_tcp_send_init(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); #ifdef HAVE_SIGNAL { /* In order to be able to handle socket errors on our own, we need to ignore SIGPIPE. This may cause problems for programs that intend to handle SIGPIPE or count on being killed, but I expect such programs are very rare, and I'm not sure what the best solution would be anyway. */ void *ret; ret = signal(SIGPIPE, SIG_IGN); MPIR_ERR_CHKANDJUMP1(ret == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIU_Strerror(errno)); if (ret != SIG_DFL && ret != SIG_IGN) { /* The app has set its own signal handler. Replace the previous handler. */ ret = signal(SIGPIPE, ret); MPIR_ERR_CHKANDJUMP1(ret == SIG_ERR, mpi_errno, MPI_ERR_OTHER, "**signal", "**signal %s", MPIU_Strerror(errno)); } } #endif fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_INIT); /* fprintf(stdout, FCNAME " Exit\n"); fflush(stdout); */ return mpi_errno; fn_fail: /* fprintf(stdout, "failure. mpi_errno = %d\n", mpi_errno); */ goto fn_exit; }
void MPIDI_CH3_Rendezvouz_r3_ack_recv(MPIDI_VC_t * vc, MPIDI_CH3_Pkt_rndv_r3_ack_t *r3ack_pkt) { MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_R3_ACK_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_R3_ACK_RECV); DEBUG_PRINT("Received R3 Ack %d\n", r3ack_pkt->ack_data); vc->ch.pending_r3_data -= r3ack_pkt->ack_data; MPIU_Assert(vc->ch.pending_r3_data == 0); PUSH_FLOWLIST(vc); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_R3_ACK_RECV); }
static int handler_recv_dequeue_complete(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const rreq = e->user_ptr; int is_contig; MPI_Aint last; MPI_Aint dt_true_lb; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr ATTRIBUTE((unused)); MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE); MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, is_contig, data_sz, dt_ptr, dt_true_lb); dequeue_req(e); if (e->type == PTL_EVENT_PUT_OVERFLOW) { /* unpack the data from unexpected buffer */ MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "is_contig = %d", is_contig); if (is_contig) { MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength); } else { last = e->mlength; MPID_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, e->start); if (last != e->mlength) MPIR_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch"); } } else { /* Data was placed directly into the user buffer, so datatype mismatch is harder to detect. We use a simple check ensuring the received bytes are a multiple of a single basic element. Currently, we do not detect mismatches with datatypes constructed of more than one basic type */ MPI_Datatype dt_basic_type; MPID_Datatype_get_basic_type(rreq->dev.datatype, dt_basic_type); if (dt_basic_type != MPI_DATATYPE_NULL && (e->mlength % MPID_Datatype_get_basic_size(dt_basic_type)) != 0) MPIR_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch"); } mpi_errno = handler_recv_complete(e); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE); return mpi_errno; fn_fail: goto fn_exit; }
static int sched_get_cid_nonblock(MPID_Comm * comm_ptr, MPID_Comm * newcomm, MPIU_Context_id_t * ctx0, MPIU_Context_id_t * ctx1, MPID_Sched_t s, MPID_Comm_kind_t gcn_cid_kind) { int mpi_errno = MPI_SUCCESS; struct gcn_state *st = NULL; MPIU_CHKPMEM_DECL(1); if (initialize_context_mask) { context_id_init(); } MPIU_CHKPMEM_MALLOC(st, struct gcn_state *, sizeof(struct gcn_state), mpi_errno, "gcn_state"); st->ctx0 = ctx0; st->ctx1 = ctx1; if (gcn_cid_kind == MPID_INTRACOMM) { st->comm_ptr = comm_ptr; st->comm_ptr_inter = NULL; } else { st->comm_ptr = comm_ptr->local_comm; st->comm_ptr_inter = comm_ptr; } st->s = s; st->gcn_cid_kind = gcn_cid_kind; *(st->ctx0) = 0; st->own_eager_mask = 0; st->first_iter = 1; st->new_comm = newcomm; st->own_mask = 0; if (eager_nelem < 0) { /* Ensure that at least one word of deadlock-free context IDs is * always set aside for the base protocol */ MPIU_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 && MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1); eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE; } mpi_errno = MPID_Sched_cb(&sched_cb_gcn_copy_mask, st, s); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPID_SCHED_BARRIER(s); MPIU_CHKPMEM_COMMIT(); fn_exit: return mpi_errno; /* --BEGIN ERROR HANDLING-- */ fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; /* --END ERROR HANDLING-- */ }
int MPIR_Ialltoall_pairwise(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s) { int mpi_errno = MPI_SUCCESS; int i; int src, dst, is_pof2; int rank, comm_size; MPI_Aint sendtype_extent, recvtype_extent; MPIU_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */ comm_size = comm_ptr->local_size; rank = comm_ptr->rank; MPID_Datatype_get_extent_macro(sendtype, sendtype_extent); MPID_Datatype_get_extent_macro(recvtype, recvtype_extent); /* Make local copy first */ mpi_errno = MPID_Sched_copy(((char *)sendbuf + rank*sendcount*sendtype_extent), sendcount, sendtype, ((char *)recvbuf + rank*recvcount*recvtype_extent), recvcount, recvtype, s); if (mpi_errno) MPIU_ERR_POP(mpi_errno); is_pof2 = MPIU_is_pof2(comm_size, NULL); /* Do the pairwise exchanges */ for (i = 1; i < comm_size; i++) { if (is_pof2 == 1) { /* use exclusive-or algorithm */ src = dst = rank ^ i; } else { src = (rank - i + comm_size) % comm_size; dst = (rank + i) % comm_size; } mpi_errno = MPID_Sched_send(((char *)sendbuf + dst*sendcount*sendtype_extent), sendcount, sendtype, dst, comm_ptr, s); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPID_Sched_recv(((char *)recvbuf + src*recvcount*recvtype_extent), recvcount, recvtype, src, comm_ptr, s); if (mpi_errno) MPIU_ERR_POP(mpi_errno); MPID_SCHED_BARRIER(s); } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_mxm_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz) { int mpi_errno = MPI_SUCCESS; MPID_nem_mxm_vc_area *vc_area = NULL; MPID_nem_mxm_req_area *req_area = NULL; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG); MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t)); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iSendContig"); MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr); MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t)); _dbg_mxm_output(5, "iSendContig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n", vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t), data_sz); vc_area = VC_BASE(vc); req_area = REQ_BASE(sreq); req_area->ctx = sreq; req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 1; req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt); req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t); if (data_sz) { req_area->iov_count = 2; req_area->iov_buf[1].ptr = (void *) data; req_area->iov_buf[1].length = data_sz; } vc_area->pending_sends += 1; sreq->ch.vc = vc; sreq->ch.noncontig = FALSE; mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM, mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0); if (mpi_errno) MPIU_ERR_POP(mpi_errno); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_Type_commit(MPI_Datatype *datatype_p) { int mpi_errno=MPI_SUCCESS; MPID_Datatype *datatype_ptr; MPIU_Assert(HANDLE_GET_KIND(*datatype_p) != HANDLE_KIND_BUILTIN); MPID_Datatype_get_ptr(*datatype_p, datatype_ptr); if (datatype_ptr->is_committed == 0) { datatype_ptr->is_committed = 1; #ifdef MPID_NEEDS_DLOOP_ALL_BYTES /* If MPID implementation needs use to reduce everything to a byte stream, do that. */ MPID_Dataloop_create(*datatype_p, &datatype_ptr->dataloop, &datatype_ptr->dataloop_size, &datatype_ptr->dataloop_depth, MPID_DATALOOP_ALL_BYTES); #else MPID_Dataloop_create(*datatype_p, &datatype_ptr->dataloop, &datatype_ptr->dataloop_size, &datatype_ptr->dataloop_depth, MPID_DATALOOP_HOMOGENEOUS); #endif /* create heterogeneous dataloop */ MPID_Dataloop_create(*datatype_p, &datatype_ptr->hetero_dloop, &datatype_ptr->hetero_dloop_size, &datatype_ptr->hetero_dloop_depth, MPID_DATALOOP_HETEROGENEOUS); MPL_DBG_MSG_D(MPIR_DBG_DATATYPE,TERSE,"# contig blocks = %d\n", (int) datatype_ptr->max_contig_blocks); #if 0 MPIDI_Dataloop_dot_printf(datatype_ptr->dataloop, 0, 1); #endif #ifdef MPID_Dev_datatype_commit_hook MPID_Dev_datatype_commit_hook(datatype_p); #endif /* MPID_Dev_datatype_commit_hook */ } return mpi_errno; }
void MPIU_ExPostOverlapped( MPIU_ExSetHandle_t Set, ULONG_PTR key, MPIU_EXOVERLAPPED* pOverlapped ) { MPIU_Assert(IsValidSet(Set)); MPIU_ExPostCompletion( Set, key, // Key, &pOverlapped->ov, 0 // BytesTransfered ); }
int MPID_nem_newmad_init_completed(void) { int mpi_errno = MPI_SUCCESS ; int ret; ret = nm_sr_monitor(mpid_nem_newmad_session, NM_SR_EVENT_RECV_UNEXPECTED, &MPID_nem_newmad_get_adi_msg); MPIU_Assert( ret == NM_ESUCCESS); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
void MPID_nem_mxm_anysource_posted(MPID_Request * req) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED); _dbg_mxm_output(5, "Any Source ========> Posting req %p \n", req); mpi_errno = MPID_nem_mxm_recv(NULL, req); MPIU_Assert(mpi_errno == MPI_SUCCESS); _dbg_mxm_out_req(req); MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ANYSOURCE_POSTED); }
int MPIR_Type_get_contig_blocks(MPI_Datatype type, int *nr_blocks_p) { MPID_Datatype *datatype_ptr; if (HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) { *nr_blocks_p = 1; return 0; } MPID_Datatype_get_ptr(type, datatype_ptr); MPIU_Assert(datatype_ptr->is_committed); *nr_blocks_p = datatype_ptr->max_contig_blocks; return 0; }
/* fills in req->dev.iov{,_offset,_count} based on the datatype info in the request, creating a segment if necessary */ static int populate_iov_from_req(MPID_Request *req) { int mpi_errno = MPI_SUCCESS; int dt_contig; MPI_Aint dt_true_lb; MPIDI_msg_sz_t data_sz; MPID_Datatype * dt_ptr; /* find out contig/noncontig, size, and lb for the datatype */ MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); if (dt_contig) { /* handle the iov creation ourselves */ req->dev.iov[0].MPL_IOV_BUF = (char *)req->dev.user_buf + dt_true_lb; req->dev.iov[0].MPL_IOV_LEN = data_sz; req->dev.iov_count = 1; } else { /* use the segment routines to handle the iovec creation */ MPIU_Assert(req->dev.segment_ptr == NULL); req->dev.iov_count = MPL_IOV_LIMIT; req->dev.iov_offset = 0; /* XXX DJG FIXME where is this segment freed? */ req->dev.segment_ptr = MPID_Segment_alloc(); MPIR_ERR_CHKANDJUMP1((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0); req->dev.segment_first = 0; req->dev.segment_size = data_sz; /* FIXME we should write our own function that isn't dependent on the in-request iov array. This will let us use IOVs that are larger than MPL_IOV_LIMIT. */ mpi_errno = MPIDI_CH3U_Request_load_send_iov(req, &req->dev.iov[0], &req->dev.iov_count); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } fn_fail: return mpi_errno; }
int MPID_nem_lmt_vmsplice_start_recv(MPIDI_VC_t *vc, MPID_Request *rreq, MPL_IOV s_cookie) { int mpi_errno = MPI_SUCCESS; int i; int complete = 0; struct lmt_vmsplice_node *node = NULL; MPIDI_CH3I_VC *vc_ch = &vc->ch; int pipe_fd; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV); if (vc_ch->lmt_recv_copy_buf_handle == NULL) { MPIU_Assert(s_cookie.MPL_IOV_BUF != NULL); vc_ch->lmt_recv_copy_buf_handle = MPL_strdup(s_cookie.MPL_IOV_BUF); } /* XXX DJG FIXME in a real version we would want to cache the fd on the vc so that we don't have two open's on the critical path every time. */ pipe_fd = open(vc_ch->lmt_recv_copy_buf_handle, O_NONBLOCK|O_RDONLY); MPIR_ERR_CHKANDJUMP1(pipe_fd < 0, mpi_errno, MPI_ERR_OTHER, "**open", "**open %s", MPIU_Strerror(errno)); MPID_nem_lmt_send_CTS(vc, rreq, NULL, 0); mpi_errno = populate_iov_from_req(rreq); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = do_readv(rreq, pipe_fd, rreq->dev.iov, &rreq->dev.iov_offset, &rreq->dev.iov_count, &complete); /* push request if not complete for progress checks later */ if (!complete) { node = MPL_malloc(sizeof(struct lmt_vmsplice_node)); node->pipe_fd = pipe_fd; node->req = rreq; node->next = outstanding_head; outstanding_head = node; ++MPID_nem_local_lmt_pending; } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_START_RECV); return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_T_cvar_read_impl(MPI_T_cvar_handle handle, void *buf) { int mpi_errno = MPI_SUCCESS; int i, count; void *addr; MPIR_T_cvar_handle_t *hnd = handle; count = hnd->count; addr = hnd->addr; MPIU_Assert(addr != NULL); switch (hnd->datatype) { case MPI_INT: for (i = 0; i < count; i++) ((int *)buf)[i] = ((int *)addr)[i]; break; case MPI_UNSIGNED: for (i = 0; i < count; i++) ((unsigned *)buf)[i] = ((unsigned *)addr)[i]; break; case MPI_UNSIGNED_LONG: for (i = 0; i < count; i++) ((unsigned long *)buf)[i] = ((unsigned long *)addr)[i]; break; case MPI_UNSIGNED_LONG_LONG: for (i = 0; i < count; i++) ((unsigned long long *)buf)[i] = ((unsigned long long *)addr)[i]; break; case MPI_DOUBLE: for (i = 0; i < count; i++) ((double *)buf)[i] = ((double *)addr)[i]; break; case MPI_CHAR: MPIU_Strncpy(buf, addr, count); break; default: /* FIXME the error handling code may not have been setup yet */ MPIR_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**intern", "**intern %s", "unexpected parameter type"); break; } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Ialltoall_perm_sr(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s) { int mpi_errno = MPI_SUCCESS; int i; int rank, comm_size; int ii, ss, bblock, dst; MPI_Aint sendtype_extent, recvtype_extent; MPIU_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */ comm_size = comm_ptr->local_size; rank = comm_ptr->rank; MPID_Datatype_get_extent_macro(sendtype, sendtype_extent); MPID_Datatype_get_extent_macro(recvtype, recvtype_extent); bblock = MPIR_CVAR_ALLTOALL_THROTTLE; if (bblock == 0) bblock = comm_size; for (ii = 0; ii < comm_size; ii += bblock) { ss = comm_size-ii < bblock ? comm_size-ii : bblock; /* do the communication -- post ss sends and receives: */ for (i = 0; i < ss; i++) { dst = (rank+i+ii) % comm_size; mpi_errno = MPID_Sched_recv(((char *)recvbuf + dst*recvcount*recvtype_extent), recvcount, recvtype, dst, comm_ptr, s); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } for (i = 0; i < ss; i++) { dst = (rank-i-ii+comm_size) % comm_size; mpi_errno = MPID_Sched_send(((char *)sendbuf + dst*sendcount*sendtype_extent), sendcount, sendtype, dst, comm_ptr, s); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } /* force the (2*ss) sends/recvs above to complete before posting additional ops */ MPID_SCHED_BARRIER(s); } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_Win_free(MPID_Win **win_ptr) { int mpi_errno=MPI_SUCCESS; int in_use; MPID_Comm *comm_ptr; MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FREE); MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FREE); MPIU_ERR_CHKANDJUMP((*win_ptr)->epoch_state != MPIDI_EPOCH_NONE, mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync"); mpi_errno = MPIDI_CH3I_Wait_for_pt_ops_finish(*win_ptr); if(mpi_errno) MPIU_ERR_POP(mpi_errno); comm_ptr = (*win_ptr)->comm_ptr; mpi_errno = MPIR_Comm_free_impl(comm_ptr); if (mpi_errno) MPIU_ERR_POP(mpi_errno); MPIU_Free((*win_ptr)->targets); MPIU_Free((*win_ptr)->base_addrs); MPIU_Free((*win_ptr)->sizes); MPIU_Free((*win_ptr)->disp_units); MPIU_Free((*win_ptr)->all_win_handles); MPIU_Free((*win_ptr)->pt_rma_puts_accs); /* Free the attached buffer for windows created with MPI_Win_allocate() */ if ((*win_ptr)->create_flavor == MPI_WIN_FLAVOR_ALLOCATE || (*win_ptr)->create_flavor == MPI_WIN_FLAVOR_SHARED) { if ((*win_ptr)->shm_allocated == FALSE && (*win_ptr)->size > 0) { MPIU_Free((*win_ptr)->base); } } MPIU_Object_release_ref(*win_ptr, &in_use); /* MPI windows don't have reference count semantics, so this should always be true */ MPIU_Assert(!in_use); MPIU_Handle_obj_free( &MPID_Win_mem, *win_ptr ); fn_exit: MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_FREE); return mpi_errno; fn_fail: goto fn_exit; }
/* Allocates a context ID from the given mask by clearing the bit * corresponding to the the given id. Returns 0 on failure, id on * success. */ static int allocate_context_bit(uint32_t mask[], MPIU_Context_id_t id) { int raw_prefix, idx, bitpos; raw_prefix = MPID_CONTEXT_READ_FIELD(PREFIX, id); idx = raw_prefix / MPIR_CONTEXT_INT_BITS; bitpos = raw_prefix % MPIR_CONTEXT_INT_BITS; /* the bit should not already be cleared (allocated) */ MPIU_Assert(mask[idx] & (1 << bitpos)); /* clear the bit */ mask[idx] &= ~(1 << bitpos); MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST, "allocating contextid = %d, (mask=%p, mask[%d], bit %d)", id, mask, idx, bitpos)); return id; }
static int handler_recv_dequeue_unpack_complete(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE); MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); dequeue_req(e); mpi_errno = handler_recv_unpack_complete(e); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_UNPACK_COMPLETE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_lmt_dma_done_send(MPIDI_VC_t *vc, MPID_Request *sreq) { int mpi_errno = MPI_SUCCESS; int complete = 0; int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND); /* free cookie from RTS packet */ MPIU_Free(sreq->ch.s_cookie); /* We shouldn't ever need to handle the more IOVs case here. The DONE message should only be sent when all of the data is truly transferred. However in the interest of robustness, we'll start to handle it and assert if it looks like we were supposed to send more data for some reason. */ reqFn = sreq->dev.OnDataAvail; if (!reqFn) { MPIDI_CH3U_Request_complete(sreq); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete"); goto fn_exit; } complete = 0; mpi_errno = reqFn(vc, sreq, &complete); if (mpi_errno) MPIU_ERR_POP(mpi_errno); if (complete) { /* request was completed by the OnDataAvail fn */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete"); goto fn_exit; } else { /* There is more data to send. */ MPIU_Assert(("should never be incomplete!", 0)); } MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_DONE_SEND); fn_exit: return MPI_SUCCESS; fn_fail: goto fn_exit; }