static void __recvpath psmi_mq_req_copy(psm_mq_req_t req, psm_epaddr_t epaddr, uint32_t offset, const void *buf, uint32_t nbytes) { // recv_msglen may be changed by unexpected receive buf. uint32_t msglen_this, end; uint8_t *msgptr = (uint8_t *)req->buf + offset; end = offset + nbytes; if (end > req->recv_msglen) { if (offset >= req->recv_msglen) msglen_this = 0; else msglen_this = req->recv_msglen - offset; } else { msglen_this = nbytes; } VALGRIND_MAKE_MEM_DEFINED(msgptr, msglen_this); psmi_mq_mtucpy(msgptr, buf, msglen_this); if (req->recv_msgoff < end) { req->recv_msgoff = end; } req->send_msgoff += nbytes; return; }
int __recvpath psmi_mq_handle_data(psm_mq_req_t req, psm_epaddr_t epaddr, uint32_t egrid, uint32_t offset, const void *buf, uint32_t nbytes) { psm_mq_t mq; int rc; if (req == NULL) goto no_req; mq = req->mq; if (req->state == MQ_STATE_MATCHED) rc = MQ_RET_MATCH_OK; else { psmi_assert(req->state == MQ_STATE_UNEXP); rc = MQ_RET_UNEXP_OK; } psmi_assert(req->egrid.egr_data == egrid); psmi_mq_req_copy(req, epaddr, offset, buf, nbytes); if (req->send_msgoff == req->send_msglen) { if (req->type & MQE_TYPE_EGRLONG) { STAILQ_REMOVE(&epaddr->mctxt_master->egrlong, req, psm_mq_req, nextq); } if (req->state == MQ_STATE_MATCHED) { req->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, req); } else { /* MQ_STATE_UNEXP */ req->state = MQ_STATE_COMPLETE; } _IPATH_VDBG("epaddr=%s completed %d byte send, state=%d\n", psmi_epaddr_get_name(epaddr->epid), (int)req->send_msglen, req->state); } return rc; no_req: mq = epaddr->ep->mq; req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); req->egrid.egr_data = egrid; req->recv_msgoff = offset; req->recv_msglen = nbytes; req->buf = psmi_mq_sysbuf_alloc(mq, nbytes); psmi_mq_mtucpy(req->buf, buf, nbytes); STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrdata, req, nextq); return MQ_RET_UNEXP_OK; }
static void __recvpath psmi_mq_req_copy(psm_mq_req_t req, psm_epaddr_t epaddr, const void *buf, uint32_t nbytes) { // recv_msglen may be changed by unexpected receive buf. uint32_t msglen_left = req->recv_msglen - req->recv_msgoff; uint32_t msglen_this = min(msglen_left, nbytes); uint8_t *msgptr = (uint8_t *)req->buf + req->recv_msgoff; VALGRIND_MAKE_MEM_DEFINED(msgptr, msglen_this); psmi_mq_mtucpy(msgptr, buf, msglen_this); req->recv_msgoff += msglen_this; req->send_msgoff += nbytes; return; }
int __recvpath psmi_mq_handle_envelope_outoforder(psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr, uint16_t msg_seqnum, uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, const void *payload, uint32_t paylen) { psm_mq_req_t req; uint32_t msglen; req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); req->tag = tag; req->recv_msgoff = 0; req->recv_msglen = req->send_msglen = req->buf_len = msglen = send_msglen; _IPATH_VDBG( "from=%s match=NO (req=%p) mode=%x mqtag=%" PRIx64 " send_msglen=%d\n", psmi_epaddr_get_name(epaddr->epid), req, mode, tag, send_msglen); switch (mode) { case MQ_MSG_TINY: if (msglen > 0) { req->buf = psmi_mq_sysbuf_alloc(mq, msglen); mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen); } else req->buf = NULL; req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_SHORT: req->buf = psmi_mq_sysbuf_alloc(mq, msglen); psmi_mq_mtucpy(req->buf, payload, msglen); req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_LONG: req->egrid = egrid; req->epaddr = epaddr; req->send_msgoff = 0; req->buf = psmi_mq_sysbuf_alloc(mq, msglen); req->state = MQ_STATE_UNEXP; req->type |= MQE_TYPE_EGRLONG; STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq); _IPATH_VDBG("unexp MSG_LONG %d of length %d bytes pay=%d\n", egrid.egr_msgno, msglen, paylen); if (paylen > 0) psmi_mq_handle_data(req, epaddr, egrid.egr_data, 0, payload, paylen); psmi_mq_handle_egrdata(mq, req, epaddr); break; default: psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR, "Internal error, unknown packet 0x%x", mode); } req->msg_seqnum = msg_seqnum; mq_sq_append(&epaddr->mctxt_master->outoforder_q, req); epaddr->mctxt_master->outoforder_c++; mq->stats.rx_sys_bytes += msglen; mq->stats.rx_sys_num++; return MQ_RET_UNEXP_OK; }
/* * Note, epaddr is the master. */ int __recvpath psmi_mq_handle_outoforder_queue(psm_epaddr_t epaddr) { psm_mq_t mq = epaddr->ep->mq; psm_mq_req_t ureq, ereq; uint32_t msglen; next_ooo: ureq = mq_ooo_match(&epaddr->outoforder_q, epaddr->mctxt_recv_seqnum); if (ureq == NULL) return 0; epaddr->mctxt_recv_seqnum++; epaddr->outoforder_c--; ereq = mq_req_match(&(mq->expected_q), ureq->tag, 1); if (ereq == NULL) { mq_sq_append(&mq->unexpected_q, ureq); if (epaddr->outoforder_c) goto next_ooo; return 0; } psmi_assert(MQE_TYPE_IS_RECV(ereq->type)); ereq->tag = ureq->tag; msglen = mq_set_msglen(ereq, ereq->buf_len, ureq->send_msglen); switch (ureq->state) { case MQ_STATE_COMPLETE: if (ureq->buf != NULL) { /* 0-byte don't alloc a sysbuf */ psmi_mq_mtucpy(ereq->buf, (const void *)ureq->buf, msglen); psmi_mq_sysbuf_free(mq, ureq->buf); } ereq->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, ereq); break; case MQ_STATE_UNEXP: /* not done yet */ ereq->type = ureq->type; ereq->egrid = ureq->egrid; ereq->epaddr = ureq->epaddr; ereq->send_msgoff = ureq->send_msgoff; ereq->recv_msgoff = min(ureq->recv_msgoff, msglen); psmi_mq_mtucpy(ereq->buf, (const void *)ureq->buf, ereq->recv_msgoff); psmi_mq_sysbuf_free(mq, ureq->buf); ereq->state = MQ_STATE_MATCHED; STAILQ_INSERT_AFTER(&ureq->epaddr->mctxt_master->egrlong, ureq, ereq, nextq); STAILQ_REMOVE(&ureq->epaddr->mctxt_master->egrlong, ureq, psm_mq_req, nextq); break; case MQ_STATE_UNEXP_RV: /* rendez-vous ... */ ereq->state = MQ_STATE_MATCHED; ereq->rts_peer = ureq->rts_peer; ereq->rts_sbuf = ureq->rts_sbuf; ereq->send_msgoff = 0; ereq->rts_callback = ureq->rts_callback; ereq->rts_reqidx_peer = ureq->rts_reqidx_peer; ereq->type = ureq->type; ereq->rts_callback(ereq, 0); break; default: fprintf(stderr, "Unexpected state %d in req %p\n", ureq->state, ureq); fprintf(stderr, "type=%d, mq=%p, tag=%p\n", ureq->type, ureq->mq, (void *)(uintptr_t)ureq->tag); abort(); } psmi_mq_req_free(ureq); if (epaddr->outoforder_c) goto next_ooo; return 0; }
/* * This handles the regular (i.e. non-rendezvous MPI envelopes) */ int __recvpath psmi_mq_handle_envelope(psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr, uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, const void *payload, uint32_t paylen) { psm_mq_req_t req; uint32_t msglen; int rc; psmi_assert(epaddr != NULL); req = mq_req_match(&(mq->expected_q), tag, 1); if (req) { /* we have a match */ psmi_assert(MQE_TYPE_IS_RECV(req->type)); req->tag = tag; msglen = mq_set_msglen(req, req->buf_len, send_msglen); _IPATH_VDBG("from=%s match=YES (req=%p) mode=%x mqtag=%" PRIx64" msglen=%d paylen=%d\n", psmi_epaddr_get_name(epaddr->epid), req, mode, tag, msglen, paylen); switch(mode) { case MQ_MSG_TINY: PSM_VALGRIND_DEFINE_MQ_RECV(req->buf, req->buf_len, msglen); mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen); req->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, req); break; case MQ_MSG_SHORT: /* message fits in 1 payload */ PSM_VALGRIND_DEFINE_MQ_RECV(req->buf, req->buf_len, msglen); psmi_mq_mtucpy(req->buf, payload, msglen); req->state = MQ_STATE_COMPLETE; mq_qq_append(&mq->completed_q, req); break; case MQ_MSG_LONG: req->egrid = egrid; req->state = MQ_STATE_MATCHED; req->type |= MQE_TYPE_EGRLONG; req->send_msgoff = req->recv_msgoff = 0; STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq); _IPATH_VDBG("exp MSG_LONG %d of length %d bytes pay=%d\n", egrid.egr_msgno, msglen, paylen); if (paylen > 0) psmi_mq_handle_data(req, epaddr, egrid.egr_data, 0, payload, paylen); psmi_mq_handle_egrdata(mq, req, epaddr); break; default: psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR, "Internal error, unknown packet 0x%x", mode); } mq->stats.rx_user_bytes += msglen; mq->stats.rx_user_num++; rc = MQ_RET_MATCH_OK; if (mode == MQ_MSG_LONG) return rc; } else rc = psmi_mq_handle_envelope_unexpected(mq, mode, epaddr, tag, egrid, send_msglen, payload, paylen); return rc; }
int __recvpath psmi_mq_handle_envelope_unexpected( psm_mq_t mq, uint16_t mode, psm_epaddr_t epaddr, uint64_t tag, psmi_egrid_t egrid, uint32_t send_msglen, const void *payload, uint32_t paylen) { psm_mq_req_t req; uint32_t msglen; /* * Keep a callback here in case we want to fit some other high-level * protocols over MQ (i.e. shmem). These protocols would bypass the * normal mesage handling and go to higher-level message handlers. */ if (mode >= MQ_MSG_USER_FIRST && mq->unexpected_callback) { mq->unexpected_callback(mq,mode,epaddr,tag,send_msglen,payload,paylen); return MQ_RET_UNEXP_OK; } req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV); psmi_assert(req != NULL); req->tag = tag; req->recv_msgoff = 0; req->recv_msglen = req->send_msglen = req->buf_len = msglen = send_msglen; _IPATH_VDBG( "from=%s match=NO (req=%p) mode=%x mqtag=%" PRIx64 " send_msglen=%d\n", psmi_epaddr_get_name(epaddr->epid), req, mode, tag, send_msglen); #if 0 if (mq->cur_sysbuf_bytes+msglen > mq->max_sysbuf_bytes) { _IPATH_VDBG("req=%p with len=%d exceeds limit of %llu sysbuf_bytes\n", req, msglen, (unsigned long long) mq->max_sysbuf_bytes); return MQ_RET_UNEXP_NO_RESOURCES; } #endif switch (mode) { case MQ_MSG_TINY: if (msglen > 0) { req->buf = psmi_mq_sysbuf_alloc(mq, msglen); mq_copy_tiny((uint32_t *)req->buf, (uint32_t *)payload, msglen); } else req->buf = NULL; req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_SHORT: req->buf = psmi_mq_sysbuf_alloc(mq, msglen); psmi_mq_mtucpy(req->buf, payload, msglen); req->state = MQ_STATE_COMPLETE; break; case MQ_MSG_LONG: req->egrid = egrid; req->send_msgoff = 0; req->buf = psmi_mq_sysbuf_alloc(mq, msglen); req->state = MQ_STATE_UNEXP; req->type |= MQE_TYPE_EGRLONG; STAILQ_INSERT_TAIL(&epaddr->mctxt_master->egrlong, req, nextq); _IPATH_VDBG("unexp MSG_LONG %d of length %d bytes pay=%d\n", egrid.egr_msgno, msglen, paylen); if (paylen > 0) psmi_mq_handle_data(req, epaddr, egrid.egr_data, 0, payload, paylen); psmi_mq_handle_egrdata(mq, req, epaddr); break; default: psmi_handle_error(PSMI_EP_NORETURN, PSM_INTERNAL_ERR, "Internal error, unknown packet 0x%x", mode); } mq_sq_append(&mq->unexpected_q, req); mq->stats.rx_sys_bytes += msglen; mq->stats.rx_sys_num++; return MQ_RET_UNEXP_OK; }
static psm2_error_t am_short_reqrep(ips_scb_t *scb, struct ips_epaddr *ipsaddr, psm2_amarg_t *args, int nargs, uint8_t opcode, void *src, size_t len, int flags, int pad_bytes) { int i, hdr_qwords = IPS_AM_HDR_NARGS; struct ips_proto *proto = ((psm2_epaddr_t)ipsaddr)->proto; struct ips_flow *flow = &ipsaddr->flows[proto->msgflowid]; /* There are a limited number of bits for nargs in the header, making overflow very easy. Make sure the values match. */ psmi_assert(nargs == scb->ips_lrh.amhdr_nargs); _HFI_VDBG("%s src=%p len=%d, nargs=%d\n", ((opcode == OPCODE_AM_REQUEST) || (opcode == OPCODE_AM_REQUEST_NOREPLY)) ? "req" : "rep", src, (int)len, nargs); if (nargs == 1) { /* fastpath */ scb->ips_lrh.data[0].u64w0 = args[0].u64w0; hdr_qwords--; } else if (nargs > 1) { /* Easily unrollable but leave as is in case we can increase * qwords on the chip in the near future */ for (i = 0; i < IPS_AM_HDR_NARGS; i++, hdr_qwords--) scb->ips_lrh.data[i].u64w0 = args[i].u64w0; if (nargs > IPS_AM_HDR_NARGS) { /* Slow case -- we don't have iovec and not enough * space in the message header, so we have to copy the * user's arguments even if the payload is marked ASYNC */ uintptr_t bufp = (uintptr_t) ips_scb_buffer(scb); size_t arg_payload_len = sizeof(psm2_amarg_t) * (nargs - IPS_AM_HDR_NARGS); psmi_mq_mtucpy((void *)bufp, &args[IPS_AM_HDR_NARGS], arg_payload_len); bufp += arg_payload_len; scb->payload_size = arg_payload_len; if (src != NULL && len > 0) { psmi_mq_mtucpy((void *)bufp, src, len); scb->payload_size += len; } psmi_assert(pad_bytes < (1 << IPS_AM_HDR_LEN_BITS)); scb->payload_size += pad_bytes; scb->ips_lrh.amhdr_len = pad_bytes; goto send_scb; } } if (len == 0) { scb->payload_size = 0; scb->ips_lrh.amhdr_len = 0; } else if (len <= (hdr_qwords << 3)) { /* Inline the payload into the header. */ /* This path CANNOT handle length = 0 due to limited space in the header. If IPS_SEND_FLAG_AMISTINY is set, an amhdr_len value of 0 means a full payload, i.e. 1 << IPS_AM_HDR_LEN_BITS bytes of packed payload. */ psmi_assert(len > 0); psmi_mq_mtucpy(&scb->ips_lrh. data[IPS_AM_HDR_NARGS - hdr_qwords], src, len); scb->payload_size = 0; psmi_assert(len <= (1 << IPS_AM_HDR_LEN_BITS)); scb->ips_lrh.amhdr_len = len & ((1 << IPS_AM_HDR_LEN_BITS) - 1); scb->flags |= IPS_SEND_FLAG_AMISTINY; } else { /* Whatever's left requires a separate payload */ if (ips_scb_buffer(scb) == NULL) /* Just attach the buffer */ ips_scb_buffer(scb) = src; else /* May need to re-xmit user data, keep it around */ psmi_mq_mtucpy(ips_scb_buffer(scb), src, len); psmi_assert(pad_bytes < (1 << IPS_AM_HDR_LEN_BITS)); scb->payload_size = len + pad_bytes; scb->ips_lrh.amhdr_len = pad_bytes; } send_scb: ips_scb_opcode(scb) = opcode; scb->ips_lrh.khdr.kdeth0 = ipsaddr->msgctl->am_send_seqnum++; ips_proto_flow_enqueue(flow, scb); flow->flush(flow, NULL); return PSM2_OK; }