ssize_t usdf_msg_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context) { size_t i; struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; size_t tot_len; uint64_t op_flags; ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = TAILQ_FIRST(&tx->t.msg.tx_free_wqe); TAILQ_REMOVE(&tx->t.msg.tx_free_wqe, wqe, ms_link); wqe->ms_context = context; tot_len = 0; for (i = 0; i < count; ++i) { wqe->ms_iov[i].iov_base = (void *)iov[i].iov_base; wqe->ms_iov[i].iov_len = iov[i].iov_len; tot_len += iov[i].iov_len; } wqe->ms_last_iov = count - 1; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = iov[0].iov_base; wqe->ms_iov_resid = iov[0].iov_len; wqe->ms_resid = tot_len; wqe->ms_length = tot_len; op_flags = ep->ep_tx->tx_attr.op_flags; wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp || (op_flags & FI_COMPLETION) ? 1 : 0; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
ssize_t usdf_msg_inject(struct fid_ep *fep, const void *buf, size_t len, fi_addr_t dest_addr) { struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; if (len > USDF_MSG_MAX_INJECT_SIZE) { USDF_WARN_SYS(EP_DATA, "cannot inject more than inject_size bytes\n"); return -EINVAL; } ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = usdf_msg_get_tx_wqe(tx); wqe->ms_context = NULL; memcpy(wqe->ms_inject_buf, buf, len); wqe->ms_iov[0].iov_base = wqe->ms_inject_buf; wqe->ms_iov[0].iov_len = len; wqe->ms_last_iov = 0; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = buf; wqe->ms_iov_resid = len; wqe->ms_resid = len; wqe->ms_length = len; /* fi_inject() never signals a completion */ wqe->ms_signal_comp = 0; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
static inline void usdf_msg_process_ack(struct usdf_ep *ep, uint16_t seq) { struct usdf_cq_hard *hcq; struct usdf_msg_qe *wqe; struct usdf_tx *tx; uint16_t max_ack; unsigned credits; tx = ep->ep_tx; /* don't try to ACK what we don't think we've sent */ max_ack = ep->e.msg.ep_next_tx_seq - 1; if (RUDP_SEQ_GT(seq, max_ack)) { seq = max_ack; } hcq = tx->t.msg.tx_hcq; while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) { wqe = TAILQ_FIRST(&ep->e.msg.ep_sent_wqe); if (RUDP_SEQ_LE(wqe->ms_last_seq, seq)) { TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link); USDF_DBG_SYS(EP_DATA, "send complete, signal_comp=%u\n", wqe->ms_signal_comp); if (wqe->ms_signal_comp) hcq->cqh_post(hcq, wqe->ms_context, wqe->ms_length, FI_SUCCESS, FI_MSG | FI_SEND); usdf_msg_put_tx_wqe(tx, wqe); } else { break; } } credits = RUDP_SEQ_DIFF(seq, ep->e.msg.ep_last_rx_ack); if (ep->e.msg.ep_seq_credits == 0 && credits > 0 && !TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) { usdf_msg_ep_ready(ep); } ep->e.msg.ep_seq_credits += credits; ep->e.msg.ep_last_rx_ack = seq; /* If all ACKed, cancel timer, else reset it */ if (seq == max_ack) { usdf_timer_cancel(ep->ep_domain->dom_fabric, ep->e.msg.ep_ack_timer); } else { usdf_timer_reset(ep->ep_domain->dom_fabric, ep->e.msg.ep_ack_timer, USDF_RUDP_ACK_TIMEOUT); } }
ssize_t usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; uint64_t op_flags; ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = usdf_msg_get_tx_wqe(tx); wqe->ms_context = context; wqe->ms_iov[0].iov_base = (void *)buf; wqe->ms_iov[0].iov_len = len; wqe->ms_last_iov = 0; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = buf; wqe->ms_iov_resid = len; wqe->ms_resid = len; wqe->ms_length = len; op_flags = ep->ep_tx->tx_attr.op_flags; wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp || (op_flags & FI_COMPLETION) ? 1 : 0; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
static inline void usdf_msg_process_ack(struct usdf_ep *ep, uint16_t seq) { struct usdf_cq_hard *hcq; struct usdf_msg_qe *wqe; uint16_t max_ack; unsigned credits; /* don't try to ACK what we don't think we've sent */ max_ack = ep->e.msg.ep_next_tx_seq - 1; if (RUDP_SEQ_GT(seq, max_ack)) { seq = max_ack; } hcq = ep->ep_tx->t.msg.tx_hcq; while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) { wqe = TAILQ_FIRST(&ep->e.msg.ep_sent_wqe); if (RUDP_SEQ_LE(wqe->ms_last_seq, seq)) { TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link); hcq->cqh_post(hcq, wqe->ms_context, wqe->ms_length); TAILQ_INSERT_HEAD(&ep->ep_tx->t.msg.tx_free_wqe, wqe, ms_link); } else { break; } } credits = RUDP_SEQ_DIFF(seq, ep->e.msg.ep_last_rx_ack); if (ep->e.msg.ep_seq_credits == 0 && credits > 0 && !TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) { usdf_msg_ep_ready(ep); } ep->e.msg.ep_seq_credits += credits; ep->e.msg.ep_last_rx_ack = seq; /* If all ACKed, cancel timer, else reset it */ if (seq == max_ack) { usdf_timer_cancel(ep->ep_domain->dom_fabric, ep->e.msg.ep_ack_timer); } else { usdf_timer_reset(ep->ep_domain->dom_fabric, ep->e.msg.ep_ack_timer, USDF_RUDP_ACK_TIMEOUT); } }
ssize_t usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = TAILQ_FIRST(&tx->t.msg.tx_free_wqe); TAILQ_REMOVE(&tx->t.msg.tx_free_wqe, wqe, ms_link); wqe->ms_context = context; wqe->ms_iov[0].iov_base = (void *)buf; wqe->ms_iov[0].iov_len = len; wqe->ms_last_iov = 0; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = buf; wqe->ms_iov_resid = len; wqe->ms_resid = len; wqe->ms_length = len; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
static inline void usdf_process_nak(struct usdf_ep *ep, uint16_t seq) { struct usdf_msg_qe *wqe; size_t rewind; /* Ignore NAKs of future packets */ if (RUDP_SEQ_GE(seq, ep->e.msg.ep_next_tx_seq)) { return; } /* * Move any WQEs that contain NAKed sequences back to the * posted list. We set ms_resid == 0 here because final set to zero * is optimized out of the fastpath */ while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) { wqe = TAILQ_LAST(&ep->e.msg.ep_sent_wqe, usdf_msg_qe_head); TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link); wqe->ms_resid = 0; TAILQ_INSERT_HEAD(&ep->e.msg.ep_posted_wqe, wqe, ms_link); } wqe = TAILQ_FIRST(&ep->e.msg.ep_posted_wqe); /* reset WQE to old sequence # */ if (wqe->ms_resid == 0) { rewind = RUDP_SEQ_DIFF(wqe->ms_last_seq, seq) + 1; } else { rewind = RUDP_SEQ_DIFF(ep->e.msg.ep_next_tx_seq, seq); } if (rewind > 0) { ep->e.msg.ep_seq_credits = USDF_RUDP_SEQ_CREDITS; ep->e.msg.ep_next_tx_seq = seq; usdf_msg_rewind_qe(wqe, rewind, ep->ep_domain->dom_fabric->fab_dev_attrs->uda_mtu - sizeof(struct rudp_pkt)); usdf_msg_ep_ready(ep); } }
ssize_t usdf_msg_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) { size_t i; struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; size_t tot_len; const struct iovec *iov; ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; iov = msg->msg_iov; if (flags & ~USDF_MSG_SUPP_SENDMSG_FLAGS) { USDF_DBG_SYS(EP_DATA, "one or more flags in %#" PRIx64 " not supported\n", flags); return -FI_EOPNOTSUPP; } /* check for inject overrun before acquiring lock and allocating wqe, * easier to unwind this way */ if (flags & FI_INJECT) { iov = msg->msg_iov; tot_len = 0; for (i = 0; i < msg->iov_count; ++i) { tot_len += iov[i].iov_len; if (tot_len > USDF_MSG_MAX_INJECT_SIZE) { USDF_DBG_SYS(EP_DATA, "max inject len exceeded (%zu)\n", tot_len); return -FI_EINVAL; } } } if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = usdf_msg_get_tx_wqe(tx); wqe->ms_context = msg->context; if (flags & FI_INJECT) { tot_len = 0; for (i = 0; i < msg->iov_count; ++i) { assert(tot_len + iov[i].iov_len <= USDF_MSG_MAX_INJECT_SIZE); memcpy(&wqe->ms_inject_buf[tot_len], iov[i].iov_base, iov[i].iov_len); tot_len += iov[i].iov_len; } wqe->ms_iov[0].iov_base = wqe->ms_inject_buf; wqe->ms_iov[0].iov_len = tot_len; wqe->ms_last_iov = 0; } else { tot_len = 0; for (i = 0; i < msg->iov_count; ++i) { wqe->ms_iov[i].iov_base = (void *)iov[i].iov_base; wqe->ms_iov[i].iov_len = iov[i].iov_len; tot_len += iov[i].iov_len; } wqe->ms_last_iov = msg->iov_count - 1; } wqe->ms_cur_iov = 0; wqe->ms_resid = tot_len; wqe->ms_length = tot_len; wqe->ms_cur_ptr = iov[0].iov_base; wqe->ms_iov_resid = iov[0].iov_len; wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp || (flags & FI_COMPLETION) ? 1 : 0; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }