static MPID_Request * create_request(MPID_IOV * iov, int iov_count, int iov_offset, MPIU_Size_t nb) { MPID_Request * sreq; int i; MPIDI_STATE_DECL(MPID_STATE_CREATE_REQUEST); MPIDI_FUNC_ENTER(MPID_STATE_CREATE_REQUEST); sreq = MPID_Request_create(); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) return NULL; /* --END ERROR HANDLING-- */ MPIU_Object_set_ref(sreq, 2); sreq->kind = MPID_REQUEST_SEND; for (i = 0; i < iov_count; i++) { sreq->dev.iov[i] = iov[i]; } if (iov_offset == 0) { MPIU_Assert(iov[0].MPID_IOV_LEN == sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) iov[0].MPID_IOV_BUF; sreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) &sreq->dev.pending_pkt; } sreq->dev.iov[iov_offset].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *) sreq->dev.iov[iov_offset].MPID_IOV_BUF + nb); sreq->dev.iov[iov_offset].MPID_IOV_LEN -= nb; sreq->dev.iov_count = iov_count; sreq->dev.OnDataAvail = 0; MPIDI_FUNC_EXIT(MPID_STATE_CREATE_REQUEST); return sreq; }
static MPID_Request * create_request(void * hdr, MPIDI_msg_sz_t hdr_sz, MPIU_Size_t nb) { MPID_Request * sreq; MPIDI_STATE_DECL(MPID_STATE_CREATE_REQUEST); MPIDI_FUNC_ENTER(MPID_STATE_CREATE_REQUEST); sreq = MPID_Request_create(); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) return NULL; /* --END ERROR HANDLING-- */ MPIU_Object_set_ref(sreq, 2); sreq->kind = MPID_REQUEST_SEND; MPIU_Assert(hdr_sz == sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr; sreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *) &sreq->dev.pending_pkt + nb); sreq->dev.iov[0].MPID_IOV_LEN = hdr_sz - nb; sreq->dev.iov_count = 1; sreq->dev.OnDataAvail = 0; MPIDI_FUNC_EXIT(MPID_STATE_CREATE_REQUEST); return sreq; }
int MPID_nem_mxm_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr) { int mpi_errno = MPI_SUCCESS; MPID_Request *sreq = NULL; MPID_nem_mxm_vc_area *vc_area = NULL; MPID_nem_mxm_req_area *req_area = NULL; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG); MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t)); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iStartContigMsg"); MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr); /* create a request */ sreq = MPID_Request_create(); MPIU_Assert(sreq != NULL); MPIU_Object_set_ref(sreq, 2); MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t)); sreq->kind = MPID_REQUEST_SEND; sreq->dev.OnDataAvail = NULL; sreq->dev.tmpbuf = NULL; _dbg_mxm_output(5, "iStartContigMsg ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n", vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t), data_sz); vc_area = VC_BASE(vc); req_area = REQ_BASE(sreq); req_area->ctx = sreq; req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 1; req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt); req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t); if (data_sz) { req_area->iov_count = 2; req_area->iov_buf[1].ptr = (void *) data; req_area->iov_buf[1].length = data_sz; } vc_area->pending_sends += 1; sreq->ch.vc = vc; sreq->ch.noncontig = FALSE; mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM, mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0); if (mpi_errno) MPIU_ERR_POP(mpi_errno); fn_exit: *sreq_ptr = sreq; MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_Recv_init(void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPID_Request * rreq; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_RECV_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_RECV_INIT); rreq = MPID_Request_create(); if (rreq == NULL) { /* --BEGIN ERROR HANDLING-- */ mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomemreq", 0); /* --END ERROR HANDLING-- */ goto fn_exit; } MPIU_Object_set_ref(rreq, 1); rreq->kind = MPID_PREQUEST_RECV; rreq->comm = comm; MPID_cc_set(&rreq->cc, 0); MPIR_Comm_add_ref(comm); rreq->dev.match.parts.rank = rank; rreq->dev.match.parts.tag = tag; rreq->dev.match.parts.context_id = comm->recvcontext_id + context_offset; rreq->dev.user_buf = (void *) buf; rreq->dev.user_count = count; rreq->dev.datatype = datatype; rreq->partner_request = NULL; MPIDI_Request_set_type(rreq, MPIDI_REQUEST_TYPE_RECV); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPID_Datatype_get_ptr(datatype, rreq->dev.datatype_ptr); MPID_Datatype_add_ref(rreq->dev.datatype_ptr); } *request = rreq; fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_RECV_INIT); return mpi_errno; }
int MPID_Irecv(void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset, MPID_Request ** request) { MPID_Request * rreq; int found; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_IRECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRECV); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "rank=%d, tag=%d, context=%d", rank, tag, comm->recvcontext_id + context_offset)); if (rank == MPI_PROC_NULL) { rreq = MPID_Request_create(); if (rreq != NULL) { /* MT FIXME should these be handled by MPID_Request_create? */ MPIU_Object_set_ref(rreq, 1); MPID_cc_set(&rreq->cc, 0); rreq->kind = MPID_REQUEST_RECV; MPIR_Status_set_procnull(&rreq->status); } else { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomemreq"); } goto fn_exit; } MPIU_THREAD_CS_ENTER(MSGQUEUE,); rreq = MPIDI_CH3U_Recvq_FDU_or_AEP(rank, tag, comm->recvcontext_id + context_offset, comm, buf, count, datatype, &found); if (rreq == NULL) { MPIU_THREAD_CS_EXIT(MSGQUEUE,); MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomemreq"); }
int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, MPIDI_msg_sz_t hdr_sz, MPID_Request ** sreq_ptr) { MPID_Request * sreq = NULL; MPIDI_CH3I_VC *vcch = &vc->ch; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG); MPIU_Assert( hdr_sz <= sizeof(MPIDI_CH3_Pkt_t)); /* The SOCK channel uses a fixed length header, the size of which is the maximum of all possible packet headers */ hdr_sz = sizeof(MPIDI_CH3_Pkt_t); MPIU_DBG_STMT(CH3_CHANNEL,VERBOSE, MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr)); if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTED) /* MT */ { /* Connection already formed. If send queue is empty attempt to send data, queuing any unsent data. */ if (MPIDI_CH3I_SendQ_empty(vcch)) /* MT */ { MPIU_Size_t nb; int rc; MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "send queue empty, attempting to write"); MPIU_DBG_PKT(vcch->conn,hdr,"istartmsg"); /* MT: need some signalling to lock down our right to use the channel, thus insuring that the progress engine does not also try to write */ rc = MPIDU_Sock_write(vcch->sock, hdr, hdr_sz, &nb); if (rc == MPI_SUCCESS) { MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE, "wrote %ld bytes", (unsigned long) nb); if (nb == hdr_sz) { MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE, "entire write complete, " MPIDI_MSG_SZ_FMT " bytes", nb); /* done. get us out of here as quickly as possible. */ } else { MPIU_DBG_MSG_D(CH3_CHANNEL,VERBOSE, "partial write of " MPIDI_MSG_SZ_FMT " bytes, request enqueued at head", nb); sreq = create_request(hdr, hdr_sz, nb); if (!sreq) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } MPIDI_CH3I_SendQ_enqueue_head(vcch, sreq); MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE, (MPIU_DBG_FDEST,"posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle)); vcch->conn->send_active = sreq; mpi_errno = MPIDU_Sock_post_write(vcch->conn->sock, sreq->dev.iov[0].MPID_IOV_BUF, sreq->dev.iov[0].MPID_IOV_LEN, sreq->dev.iov[0].MPID_IOV_LEN, NULL); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p", sreq, vcch->conn, vc); goto fn_fail; } /* --END ERROR HANDLING-- */ } } /* --BEGIN ERROR HANDLING-- */ else { MPIU_DBG_MSG_D(CH3_CHANNEL,TYPICAL, "ERROR - MPIDU_Sock_write failed, rc=%d", rc); sreq = MPID_Request_create(); if (!sreq) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } sreq->kind = MPID_REQUEST_SEND; MPID_cc_set(&(sreq->cc), 0); sreq->status.MPI_ERROR = MPIR_Err_create_code( rc, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_INTERN, "**ch3|sock|writefailed", "**ch3|sock|writefailed %d", rc ); /* Make sure that the caller sees this error */ mpi_errno = sreq->status.MPI_ERROR; } /* --END ERROR HANDLING-- */ } else { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "send in progress, request enqueued"); sreq = create_request(hdr, hdr_sz, 0); if (!sreq) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } MPIDI_CH3I_SendQ_enqueue(vcch, sreq); } } else if (vcch->state == MPIDI_CH3I_VC_STATE_CONNECTING) /* MT */ { MPIU_DBG_VCUSE(vc, "connecteding. enqueuing request"); /* queue the data so it can be sent after the connection is formed */ sreq = create_request(hdr, hdr_sz, 0); if (!sreq) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } MPIDI_CH3I_SendQ_enqueue(vcch, sreq); } else if (vcch->state == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */ { MPIU_DBG_VCUSE(vc, "unconnected. posting connect and enqueuing request"); /* queue the data so it can be sent after the connection is formed */ sreq = create_request(hdr, hdr_sz, 0); if (!sreq) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } MPIDI_CH3I_SendQ_enqueue(vcch, sreq); /* Form a new connection */ MPIDI_CH3I_VC_post_connect(vc); } else if (vcch->state != MPIDI_CH3I_VC_STATE_FAILED) { /* Unable to send data at the moment, so queue it for later */ MPIU_DBG_VCUSE(vc,"forming connection, request enqueued"); sreq = create_request(hdr, hdr_sz, 0); if (!sreq) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } MPIDI_CH3I_SendQ_enqueue(vcch, sreq); } /* --BEGIN ERROR HANDLING-- */ else { /* Connection failed, so allocate a request and return an error. */ MPIU_DBG_VCUSE(vc,"ERROR - connection failed"); sreq = MPID_Request_create(); if (!sreq) { MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**nomem"); } sreq->kind = MPID_REQUEST_SEND; MPID_cc_set(&sreq->cc, 0); sreq->status.MPI_ERROR = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_INTERN, "**ch3|sock|connectionfailed",0 ); /* Make sure that the caller sees this error */ mpi_errno = sreq->status.MPI_ERROR; } /* --END ERROR HANDLING-- */ fn_fail: *sreq_ptr = sreq; MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG); return mpi_errno; }
int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp) { int ret, mpi_errno = MPI_SUCCESS; MPIDI_nem_ptl_pkt_cancel_send_req_t *req_pkt = (MPIDI_nem_ptl_pkt_cancel_send_req_t *)pkt; MPID_PKT_DECL_CAST(upkt, MPIDI_nem_ptl_pkt_cancel_send_resp_t, resp_pkt); MPID_Request *search_req, *resp_req; ptl_me_t me; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received cancel send req pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", req_pkt->sender_req_id, req_pkt->match.parts.rank, req_pkt->match.parts.tag, req_pkt->match.parts.context_id)); /* create a dummy request and search for the message */ /* create a request */ search_req = MPID_Request_create(); MPID_nem_ptl_init_req(search_req); MPIR_ERR_CHKANDJUMP1(!search_req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */ search_req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(req_pkt->match.parts.tag, req_pkt->match.parts.context_id, req_pkt->match.parts.rank); me.match_id = vc_ptl->id; me.ignore_bits = NPTL_MATCH_IGNORE; /* FIXME: this should use a custom handler that throws the data away inline */ REQ_PTL(search_req)->event_handler = handle_mprobe; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, search_req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, search_req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(search_req)); /* send response */ resp_pkt->type = MPIDI_NEM_PKT_NETMOD; resp_pkt->subtype = MPIDI_NEM_PTL_PKT_CANCEL_SEND_RESP; resp_pkt->ack = REQ_PTL(search_req)->found; resp_pkt->sender_req_id = req_pkt->sender_req_id; MPID_nem_ptl_iStartContigMsg(vc, resp_pkt, sizeof(*resp_pkt), NULL, 0, &resp_req); /* if the message was found, free the temporary buffer used to copy the data */ if (REQ_PTL(search_req)->found) MPIU_Free(search_req->dev.tmpbuf); MPID_Request_release(search_req); if (resp_req != NULL) MPID_Request_release(resp_req); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_improbe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, int context_offset, int *flag, MPID_Request **message, MPI_Status *status) { int mpi_errno = MPI_SUCCESS; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); int ret; ptl_process_t id_any; ptl_me_t me; MPID_Request *req; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_IMPROBE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_IMPROBE); id_any.phys.nid = PTL_NID_ANY; id_any.phys.pid = PTL_PID_ANY; /* create a request */ req = MPID_Request_create(); MPID_nem_ptl_init_req(req); MPIR_ERR_CHKANDJUMP1(!req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(req, 2); /* 1 ref for progress engine and 1 ref for us */ REQ_PTL(req)->event_handler = handle_mprobe; req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, source); if (source == MPI_ANY_SOURCE) me.match_id = id_any; else { if (!vc_ptl->id_initialized) { mpi_errno = MPID_nem_ptl_init_id(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } me.match_id = vc_ptl->id; } if (tag == MPI_ANY_TAG) me.ignore_bits = NPTL_MATCH_IGNORE_ANY_TAG; else me.ignore_bits = NPTL_MATCH_IGNORE; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(req)); *flag = REQ_PTL(req)->found; if (*flag) { req->comm = comm; MPIR_Comm_add_ref(comm); MPIR_Request_extract_status(req, status); *message = req; } else { MPID_Request_release(req); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_IMPROBE); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_scif_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr) { int mpi_errno = MPI_SUCCESS; MPID_Request *sreq = NULL; MPIDI_msg_sz_t offset = 0; MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc); scifconn_t *sc = vc_scif->sc; uint64_t seqno = 0; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_ISTARTCONTIGMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_ISTARTCONTIGMSG); MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t)); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "scif_iStartContigMsg"); MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr); if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue) && MPID_nem_scif_poll_send(sc->fd, &sc->csend)) { MPID_IOV iov[2]; iov[0].MPID_IOV_BUF = hdr; iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t); iov[1].MPID_IOV_BUF = data; iov[1].MPID_IOV_LEN = data_sz; offset = MPID_nem_scif_writev(sc->fd, &sc->csend, iov, 2, &seqno); MPIU_ERR_CHKANDJUMP1(offset <= 0, mpi_errno, MPI_ERR_OTHER, "**scif_writev", "**scif_writev %s", MPIU_Strerror(errno)); MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "scif_send " MPIDI_MSG_SZ_FMT " fd=%d", offset, sc->fd)); if (offset == sizeof(MPIDI_CH3_Pkt_t) + data_sz) { if (!seqno) { /* sent whole message */ *sreq_ptr = NULL; goto fn_exit; } } } /* create and enqueue request */ MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "enqueuing"); /* create a request */ sreq = MPID_Request_create(); RQ_SCIF(sreq)->seqno = seqno; MPIU_Assert(sreq != NULL); MPIU_Object_set_ref(sreq, 2); sreq->kind = MPID_REQUEST_SEND; sreq->dev.OnDataAvail = 0; sreq->ch.vc = vc; sreq->dev.iov_offset = 0; if (!seqno) { if (offset < sizeof(MPIDI_CH3_Pkt_t)) { sreq->dev.pending_pkt = *(MPIDI_CH3_Pkt_t *) hdr; sreq->dev.iov[0].MPID_IOV_BUF = (char *) &sreq->dev.pending_pkt + offset; sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_t) - offset; if (data_sz) { sreq->dev.iov[1].MPID_IOV_BUF = data; sreq->dev.iov[1].MPID_IOV_LEN = data_sz; sreq->dev.iov_count = 2; } else sreq->dev.iov_count = 1; } else { sreq->dev.iov[0].MPID_IOV_BUF = (char *) data + (offset - sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_Pkt_t)); sreq->dev.iov_count = 1; } MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0); } if (MPIDI_CH3I_Sendq_empty(vc_scif->send_queue)) { /* this will be the first send on the queue: queue it and set * the write flag on the pollfd */ MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq); } else { /* there are other sends in the queue before this one: try to * send from the queue */ MPIDI_CH3I_Sendq_enqueue(&vc_scif->send_queue, sreq); mpi_errno = MPID_nem_scif_send_queued(vc, &vc_scif->send_queue); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } *sreq_ptr = sreq; fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_ISTARTCONTIGMSG); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_iStartMsg(MPIDI_VC_t * vc, void * hdr, MPIDI_msg_sz_t hdr_sz, MPID_Request ** sreq_ptr) { MPID_Request * sreq = NULL; int mpi_errno = MPI_SUCCESS; int stream_no, ppid; MPIDI_CH3_Pkt_t* pkt; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_ISTARTMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_ISTARTMSG); MPIDI_DBG_PRINTF((50, FCNAME, "entering")); #ifdef MPICH_DBG_OUTPUT /* --BEGIN ERROR HANDLING-- */ if (hdr_sz > sizeof(MPIDI_CH3_Pkt_t)) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**arg", 0); goto fn_fail; } /* --END ERROR HANDLING-- */ #endif /* The sctp channel uses a fixed length header, the size of which is the maximum of all possible packet headers */ hdr_sz = sizeof(MPIDI_CH3_Pkt_t); MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t*)hdr); /* find out stream no. */ pkt = (MPIDI_CH3_Pkt_t*) hdr; stream_no = Req_Stream_from_pkt_and_req(pkt, *sreq_ptr); /* don't know pkt type here so pass it in */ ppid = 0; if (SEND_CONNECTED(vc, stream_no) == MPIDI_CH3I_VC_STATE_CONNECTED) { /* Connection already formed. If send queue is empty attempt to send data, queuing any unsent data. */ if (!SEND_ACTIVE(vc, stream_no)) /* MT */ { MPIU_Assert(MPIDI_CH3I_SendQ_empty_x(vc, stream_no)); MPIU_Size_t nb; int rc; MPIDI_DBG_PRINTF((55, FCNAME, "send queue empty, attempting to write")); /* MT - need some signalling to lock down our right to use the channel, thus insuring that the progress engine does not also try to write */ rc = MPIDU_Sctp_write(vc, hdr, hdr_sz, stream_no, ppid, &nb); if (rc == MPI_SUCCESS) { MPIDI_DBG_PRINTF((55, FCNAME, "wrote %ld bytes", (unsigned long) nb)); if (nb == hdr_sz) { MPIDI_DBG_PRINTF((55, FCNAME, "entire write complete, %d bytes", nb)); /* done. get us out of here as quickly as possible. */ } else { MPIDI_DBG_PRINTF((55, FCNAME, "partial write of %d bytes, request enqueued at head", nb)); sreq = create_request(hdr, hdr_sz, nb); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); goto fn_fail; } /* --END ERROR HANDLING-- */ /* put in in Global SendQ */ MPIDU_Sctp_post_write(vc, sreq, hdr_sz-nb, hdr_sz-nb, NULL, stream_no); MPIDI_DBG_PRINTF((55, FCNAME, "posting write, vc=0x%p, sreq=0x%08x", vc, sreq->handle)); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|sock|postwrite", "ch3|sock|postwrite %p %p %p", /* FIXME change error code */ sreq, vc->ch, vc); goto fn_fail; } /* --END ERROR HANDLING-- */ } } /* --BEGIN ERROR HANDLING-- */ else { MPIDI_DBG_PRINTF((55, FCNAME, "ERROR - MPIDU_Sctp_write failed, rc=%d", rc)); sreq = MPID_Request_create(); if (sreq == NULL) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); goto fn_fail; } sreq->kind = MPID_REQUEST_SEND; sreq->cc = 0; sreq->status.MPI_ERROR = MPI_ERR_INTERN; } /* --END ERROR HANDLING-- */ } else { MPIDI_DBG_PRINTF((55, FCNAME, "send in progress, request enqueued")); sreq = create_request(hdr, hdr_sz, 0); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); goto fn_fail; } /* --END ERROR HANDLING-- */ MPIDI_CH3I_SendQ_enqueue_x(vc, sreq, stream_no); } } else if (SEND_CONNECTED(vc, stream_no) == MPIDI_CH3I_VC_STATE_UNCONNECTED) /* MT */ { MPIDI_DBG_PRINTF((55, FCNAME, "unconnected. posting connect and enqueuing request")); /* queue the data so it can be sent after the connection is formed */ sreq = create_request(hdr, hdr_sz, 0); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); goto fn_fail; } /* --END ERROR HANDLING-- */ /* Form a new connection, called once per association (i.e. not per stream) */ if(vc->ch.pkt == NULL) { mpi_errno = MPIDI_CH3I_VC_post_connect(vc); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_fail; } /* --END ERROR HANDLING-- */ } MPIDU_Sctp_stream_init(vc, sreq, stream_no); } else if (vc->ch.state != MPIDI_CH3I_VC_STATE_FAILED) { /* Unable to send data at the moment, so queue it for later */ MPIDI_DBG_PRINTF((55, FCNAME, "forming connection, request enqueued")); sreq = create_request(hdr, hdr_sz, 0); /* --BEGIN ERROR HANDLING-- */ if (sreq == NULL) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); goto fn_fail; } /* --END ERROR HANDLING-- */ MPIDU_Sctp_stream_init(vc, sreq, stream_no); } /* --BEGIN ERROR HANDLING-- */ else { /* Connection failed, so allocate a request and return an error. */ MPIDI_DBG_PRINTF((55, FCNAME, "ERROR - connection failed")); sreq = MPID_Request_create(); if (sreq == NULL) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); goto fn_fail; } sreq->kind = MPID_REQUEST_SEND; sreq->cc = 0; /* TODO: Create an appropriate error message */ sreq->status.MPI_ERROR = MPI_ERR_INTERN; } /* --END ERROR HANDLING-- */ fn_exit: *sreq_ptr = sreq; MPIDI_DBG_PRINTF((50, FCNAME, "exiting")); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTMSG); return mpi_errno; fn_fail: /* --BEGIN ERROR HANDLING-- */ goto fn_exit; /* --END ERROR HANDLING-- */ }