ssize_t usdf_dgram_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context) { struct usd_dest *dest; struct usdf_ep *ep; size_t len; ep = ep_ftou(fep); len = sizeof(struct usd_udp_hdr); dest = (struct usd_dest *)(uintptr_t) dest_addr; len += _usdf_iov_len(iov, count); if (len <= USD_SEND_MAX_COPY) { return _usdf_dgram_send_iov_copy(ep, dest, iov, count, context, ep->ep_tx_completion); } else if (ep->e.dg.tx_op_flags & FI_INJECT) { USDF_DBG_SYS(EP_DATA, "given inject length (%zu) exceeds max inject length (%d)\n", len, USD_SEND_MAX_COPY); return -FI_ENOSPC; } if (count > ep->e.dg.tx_iov_limit) { USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", count); return -FI_ENOSPC; } return _usdf_dgram_send_iov(ep, dest, iov, count, context, ep->ep_tx_completion); }
ssize_t usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len, void *desc, fi_addr_t src_addr, void *context) { struct usdf_ep *ep; struct usd_qp_impl *qp; struct usd_recv_desc rxd; uint32_t index; ep = ep_ftou(fep); qp = to_qpi(ep->e.dg.ep_qp); index = qp->uq_rq.urq_post_index; rxd.urd_context = context; rxd.urd_iov[0].iov_base = (uint8_t *)ep->e.dg.ep_hdr_buf + (index * USDF_HDR_BUF_ENTRY) + (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr)); rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr); rxd.urd_iov[1].iov_base = buf; rxd.urd_iov[1].iov_len = len; rxd.urd_iov_cnt = 2; rxd.urd_next = NULL; ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; index = (index + 1) & qp->uq_rq.urq_post_index_mask; ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; return usd_post_recv(ep->e.dg.ep_qp, &rxd); }
ssize_t usdf_dgram_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_dest *dest; struct usdf_ep *ep; uint32_t flags; ep = ep_ftou(fep); dest = (struct usdf_dest *)(uintptr_t) dest_addr; flags = (ep->ep_tx_completion) ? USD_SF_SIGNAL : 0; if (len + sizeof(struct usd_udp_hdr) <= USD_SEND_MAX_COPY) { return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, buf, len, flags, context); } else if (ep->e.dg.tx_op_flags & FI_INJECT) { USDF_DBG_SYS(EP_DATA, "given inject length (%zu) exceeds max inject length (%d)\n", len + sizeof(struct usd_udp_hdr), USD_SEND_MAX_COPY); return -FI_ENOSPC; } return usd_post_send_one(ep->e.dg.ep_qp, &dest->ds_dest, buf, len, flags, context); }
ssize_t usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t src_addr, void *context) { struct usdf_ep *ep; struct usd_recv_desc rxd; struct usd_qp_impl *qp; uint32_t index; size_t i; ep = ep_ftou(fep); qp = to_qpi(ep->e.dg.ep_qp); rxd.urd_context = context; rxd.urd_iov[0].iov_base = ep->e.dg.ep_hdr_buf + qp->uq_rq.urq_post_index * USDF_HDR_BUF_ENTRY; rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr); memcpy(&rxd.urd_iov[1], iov, sizeof(*iov) * count); rxd.urd_iov_cnt = count + 1; rxd.urd_next = NULL; index = qp->uq_rq.urq_post_index; for (i = 0; i < count; ++i) { ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; index = (index + 1) & qp->uq_rq.urq_post_index_mask; } return usd_post_recv(ep->e.dg.ep_qp, &rxd); }
static int usdf_msg_ep_enable(struct fid_ep *fep) { struct usdf_ep *ep; struct usd_filter filt; struct usd_qp_impl *uqp; int ret; ep = ep_ftou(fep); filt.uf_type = USD_FTY_UDP_SOCK; filt.uf_filter.uf_udp_sock.u_sock = ep->ep_sock; ret = usd_create_qp(ep->ep_domain->dom_dev, USD_QTR_UDP, USD_QTY_NORMAL, ep->ep_wcq->cq_cq, ep->ep_rcq->cq_cq, ep->ep_wqe, ep->ep_rqe, &filt, &ep->ep_qp); if (ret != 0) { goto fail; } ep->ep_qp->uq_context = ep; /* * Allocate a memory region big enough to hold a header for each * RQ entry */ uqp = to_qpi(ep->ep_qp); ep->ep_hdr_ptr = calloc(uqp->uq_rq.urq_num_entries, sizeof(ep->ep_hdr_ptr[0])); if (ep->ep_hdr_ptr == NULL) { ret = -FI_ENOMEM; goto fail; } ret = usd_alloc_mr(ep->ep_domain->dom_dev, usd_get_recv_credits(ep->ep_qp) * USDF_HDR_BUF_ENTRY, &ep->ep_hdr_buf); if (ret != 0) { goto fail; } return 0; fail: if (ep->ep_hdr_ptr != NULL) { free(ep->ep_hdr_ptr); } if (ep->ep_qp != NULL) { usd_destroy_qp(ep->ep_qp); } return ret; }
ssize_t usdf_dgram_prefix_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) { struct usdf_ep *ep; struct usd_qp_impl *qp; struct usd_rq *rq; struct vnic_rq *vrq; struct rq_enet_desc *desc; uint8_t *hdr_ptr; const struct iovec *iovp; uint32_t index; unsigned i; ep = ep_ftou(fep); qp = to_qpi(ep->e.dg.ep_qp); rq = &qp->uq_rq; vrq = &rq->urq_vnic_rq; desc = rq->urq_next_desc; index = rq->urq_post_index; iovp = msg->msg_iov; rq->urq_context[index] = msg->context; hdr_ptr = iovp[0].iov_base + (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr)); rq_enet_desc_enc(desc, (dma_addr_t) hdr_ptr, RQ_ENET_TYPE_ONLY_SOP, iovp[0].iov_len - (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr))); ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; index = (index+1) & rq->urq_post_index_mask; desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring + (index<<4)); for (i = 1; i < msg->iov_count; ++i) { rq->urq_context[index] = msg->context; rq_enet_desc_enc(desc, (dma_addr_t) iovp[i].iov_base, RQ_ENET_TYPE_NOT_SOP, iovp[i].iov_len); ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; index = (index+1) & rq->urq_post_index_mask; desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring + (index<<4)); } if ((flags & FI_MORE) == 0) { wmb(); iowrite32(index, &vrq->ctrl->posted_index); } rq->urq_next_desc = desc; rq->urq_post_index = index; rq->urq_recv_credits -= msg->iov_count; return 0; }
ssize_t usdf_msg_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) { size_t i; struct usdf_ep *ep; struct usdf_rx *rx; struct usdf_msg_qe *rqe; struct usdf_domain *udp; size_t tot_len; const struct iovec *iov; ep = ep_ftou(fep); rx = ep->ep_rx; udp = ep->ep_domain; iov = msg->msg_iov; if (TAILQ_EMPTY(&rx->r.msg.rx_free_rqe)) { return -FI_EAGAIN; } if (flags & ~USDF_MSG_SUPP_RECVMSG_FLAGS) { USDF_DBG_SYS(EP_DATA, "one or more flags in %#" PRIx64 " not supported\n", flags); return -FI_EOPNOTSUPP; } pthread_spin_lock(&udp->dom_progress_lock); rqe = usdf_msg_get_rx_rqe(rx); rqe->ms_context = msg->context; tot_len = 0; for (i = 0; i < msg->iov_count; ++i) { rqe->ms_iov[i].iov_base = (void *)iov[i].iov_base; rqe->ms_iov[i].iov_len = iov[i].iov_len; tot_len += iov[i].iov_len; } rqe->ms_last_iov = msg->iov_count - 1; rqe->ms_cur_iov = 0; rqe->ms_resid = tot_len; rqe->ms_length = 0; rqe->ms_cur_ptr = iov[0].iov_base; rqe->ms_iov_resid = iov[0].iov_len; rqe->ms_signal_comp = ep->ep_rx_dflt_signal_comp || (flags & FI_COMPLETION) ? 1 : 0; TAILQ_INSERT_TAIL(&rx->r.msg.rx_posted_rqe, rqe, ms_link); pthread_spin_unlock(&udp->dom_progress_lock); return 0; }
ssize_t usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usd_dest *dest; struct usd_wq *wq; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; uint32_t last_post; struct usd_wq_post_info *info; struct iovec send_iov[USDF_DGRAM_MAX_SGE]; size_t len; unsigned i; size_t padding; ep = ep_ftou(fep); dest = (struct usd_dest *)(uintptr_t) dest_addr; padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); len = 0; for (i = 0; i < count; i++) { len += iov[i].iov_len; } if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) { qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; hdr = (struct usd_udp_hdr *) ((char *) iov[0].iov_base + padding); memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); /* adjust lengths and insert source port */ hdr->uh_ip.tot_len = htons(len - padding - sizeof(struct ether_header)); hdr->uh_udp.len = htons(len - padding - sizeof(struct ether_header) - sizeof(struct iphdr)); hdr->uh_udp.source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; memcpy(send_iov, iov, sizeof(struct iovec) * count); send_iov[0].iov_base = hdr; send_iov[0].iov_len -= padding; last_post = _usd_post_send_iov(wq, send_iov, count, 1); info = &wq->uwq_post_info[last_post]; info->wp_context = context; info->wp_len = len; } else { _usdf_dgram_send_iov_copy(ep, dest, iov, count, context); } return 0; }
ssize_t usdf_msg_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context) { size_t i; struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; size_t tot_len; uint64_t op_flags; ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = TAILQ_FIRST(&tx->t.msg.tx_free_wqe); TAILQ_REMOVE(&tx->t.msg.tx_free_wqe, wqe, ms_link); wqe->ms_context = context; tot_len = 0; for (i = 0; i < count; ++i) { wqe->ms_iov[i].iov_base = (void *)iov[i].iov_base; wqe->ms_iov[i].iov_len = iov[i].iov_len; tot_len += iov[i].iov_len; } wqe->ms_last_iov = count - 1; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = iov[0].iov_base; wqe->ms_iov_resid = iov[0].iov_len; wqe->ms_resid = tot_len; wqe->ms_length = tot_len; op_flags = ep->ep_tx->tx_attr.op_flags; wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp || (op_flags & FI_COMPLETION) ? 1 : 0; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
ssize_t usdf_dgram_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usdf_dest *dest; ep = ep_ftou(fep); dest = (struct usdf_dest *)(uintptr_t) dest_addr; return _usdf_dgram_send(ep, dest, buf, len, context); }
ssize_t usdf_msg_inject(struct fid_ep *fep, const void *buf, size_t len, fi_addr_t dest_addr) { struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; if (len > USDF_MSG_MAX_INJECT_SIZE) { USDF_WARN_SYS(EP_DATA, "cannot inject more than inject_size bytes\n"); return -EINVAL; } ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = usdf_msg_get_tx_wqe(tx); wqe->ms_context = NULL; memcpy(wqe->ms_inject_buf, buf, len); wqe->ms_iov[0].iov_base = wqe->ms_inject_buf; wqe->ms_iov[0].iov_len = len; wqe->ms_last_iov = 0; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = buf; wqe->ms_iov_resid = len; wqe->ms_resid = len; wqe->ms_length = len; /* fi_inject() never signals a completion */ wqe->ms_signal_comp = 0; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
ssize_t usdf_msg_tx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; struct usdf_tx *tx; USDF_DBG_SYS(EP_DATA, "\n"); ep = ep_ftou(fep); tx = ep->ep_tx; if (tx == NULL) return -FI_EOPBADSTATE; /* EP not enabled */ return tx->t.msg.tx_num_free_wqe; }
ssize_t usdf_msg_rx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; struct usdf_rx *rx; USDF_DBG_SYS(EP_DATA, "\n"); ep = ep_ftou(fep); rx = ep->ep_rx; if (rx == NULL) return -FI_EOPBADSTATE; /* EP not enabled */ return rx->r.msg.rx_num_free_rqe; }
ssize_t usdf_msg_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t src_addr, void *context) { struct usdf_ep *ep; struct usdf_rx *rx; struct usdf_msg_qe *rqe; struct usdf_domain *udp; size_t tot_len; uint64_t op_flags; uint32_t i; ep = ep_ftou(fep); rx = ep->ep_rx; udp = ep->ep_domain; if (TAILQ_EMPTY(&rx->r.msg.rx_free_rqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); rqe = TAILQ_FIRST(&rx->r.msg.rx_free_rqe); TAILQ_REMOVE(&rx->r.msg.rx_free_rqe, rqe, ms_link); --rx->r.msg.rx_num_free_rqe; rqe->ms_context = context; tot_len = 0; for (i = 0; i < count; ++i) { rqe->ms_iov[i].iov_base = (void *)iov[i].iov_base; rqe->ms_iov[i].iov_len = iov[i].iov_len; tot_len += iov[i].iov_len; } rqe->ms_last_iov = count - 1; rqe->ms_cur_iov = 0; rqe->ms_cur_ptr = iov[0].iov_base; rqe->ms_iov_resid = iov[0].iov_len; rqe->ms_resid = tot_len; rqe->ms_length = tot_len; op_flags = ep->ep_rx->rx_attr.op_flags; rqe->ms_signal_comp = ep->ep_rx_dflt_signal_comp || (op_flags & FI_COMPLETION) ? 1 : 0; TAILQ_INSERT_TAIL(&rx->r.msg.rx_posted_rqe, rqe, ms_link); pthread_spin_unlock(&udp->dom_progress_lock); return 0; }
ssize_t usdf_msg_tx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; struct usdf_tx *tx; USDF_DBG_SYS(EP_DATA, "\n"); ep = ep_ftou(fep); tx = ep->ep_tx; if (!(ep->flags & USDF_EP_ENABLED)) return -FI_EOPBADSTATE; return tx->t.msg.tx_num_free_wqe; }
ssize_t usdf_msg_rx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; struct usdf_rx *rx; USDF_DBG_SYS(EP_DATA, "\n"); ep = ep_ftou(fep); rx = ep->ep_rx; if (!(ep->flags & USDF_EP_ENABLED)) return -FI_EOPBADSTATE; return rx->r.msg.rx_num_free_rqe; }
ssize_t usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; uint64_t op_flags; ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = usdf_msg_get_tx_wqe(tx); wqe->ms_context = context; wqe->ms_iov[0].iov_base = (void *)buf; wqe->ms_iov[0].iov_len = len; wqe->ms_last_iov = 0; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = buf; wqe->ms_iov_resid = len; wqe->ms_resid = len; wqe->ms_length = len; op_flags = ep->ep_tx->tx_attr.op_flags; wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp || (op_flags & FI_COMPLETION) ? 1 : 0; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
ssize_t usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usd_udp_hdr *hdr; struct usd_qp_impl *qp; struct usdf_dest *dest; struct usdf_ep *ep; struct usd_wq *wq; uint32_t last_post; uint32_t flags; size_t padding; ep = ep_ftou(fep); dest = (struct usdf_dest *)(uintptr_t) dest_addr; padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); flags = (ep->ep_tx_completion) ? USD_SF_SIGNAL : 0; if (ep->e.dg.tx_op_flags & FI_INJECT) { if ((len - padding) > USD_SEND_MAX_COPY) { USDF_DBG_SYS(EP_DATA, "given inject length (%zu) exceeds max inject length (%d)\n", len, USD_SEND_MAX_COPY); return -FI_ENOSPC; } return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, buf + USDF_HDR_BUF_ENTRY, len - USDF_HDR_BUF_ENTRY, flags, context); } qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; hdr = (struct usd_udp_hdr *) ((char *) buf + padding); memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr)); _usdf_adjust_prefix_hdr(hdr, qp, len, padding); last_post = _usd_post_send_one(wq, hdr, len - padding, ep->ep_tx_completion); _usdf_adjust_post_info(wq, last_post, context, len - USDF_HDR_BUF_ENTRY); return FI_SUCCESS; }
ssize_t usdf_dgram_prefix_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) { struct iovec send_iov[USDF_DGRAM_MAX_SGE]; struct usd_dest *dest; struct usdf_ep *ep; uint8_t completion; size_t len; size_t padding; ep = ep_ftou(fep); dest = (struct usd_dest *)(uintptr_t) msg->addr; len = _usdf_iov_len(msg->msg_iov, msg->iov_count); completion = ep->ep_tx_dflt_signal_comp || (flags & FI_COMPLETION); padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); if (msg->iov_count > ep->e.dg.tx_iov_limit) { USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", msg->iov_count); return -FI_ENOSPC; } if ((len - padding) <= USD_SEND_MAX_COPY) { /* _usdf_dgram_send_iov_copy isn't prefix aware and allocates * its own prefix. reorganize iov[0] base to point to data and * len to reflect data length. */ memcpy(send_iov, msg->msg_iov, sizeof(struct iovec) * msg->iov_count); send_iov[0].iov_base = ((char *) send_iov[0].iov_base + USDF_HDR_BUF_ENTRY); send_iov[0].iov_len -= USDF_HDR_BUF_ENTRY; return _usdf_dgram_send_iov_copy(ep, dest, send_iov, msg->iov_count, msg->context, completion); } else if (flags & FI_INJECT) { USDF_DBG_SYS(EP_DATA, "given inject length (%zu) exceeds max inject length (%d)\n", len, USD_SEND_MAX_COPY); return -FI_ENOSPC; } return _usdf_dgram_send_iov_prefix(ep, dest, msg->msg_iov, msg->iov_count, msg->context, completion); }
ssize_t usdf_dgram_tx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; USDF_DBG_SYS(EP_DATA, "\n"); if (fep == NULL) return -FI_EINVAL; ep = ep_ftou(fep); if (ep->e.dg.ep_qp == NULL) return -FI_EOPBADSTATE; /* EP not enabled */ return usd_get_send_credits(ep->e.dg.ep_qp) / (ep->e.dg.tx_iov_limit + 1); }
ssize_t usdf_dgram_tx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; USDF_DBG_SYS(EP_DATA, "\n"); if (fep == NULL) return -FI_EINVAL; ep = ep_ftou(fep); if (ep->e.dg.ep_qp == NULL) return -FI_EOPBADSTATE; /* EP not enabled */ /* see NOTE-SIZE-LEFT */ return usd_get_send_credits(ep->e.dg.ep_qp) / (USDF_DGRAM_DFLT_SGE + 1); }
ssize_t usdf_dgram_inject(struct fid_ep *fep, const void *buf, size_t len, fi_addr_t dest_addr) { struct usdf_ep *ep; struct usdf_dest *dest; struct usd_wq *wq; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; uint32_t last_post; struct usd_wq_post_info *info; uint8_t *copybuf; if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) { return -FI_ENOSPC; } ep = ep_ftou(fep); dest = (struct usdf_dest *)(uintptr_t)dest_addr; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; hdr = (struct usd_udp_hdr *)copybuf; memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr)); hdr->uh_udp.source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; hdr->uh_ip.tot_len = htons(len + sizeof(*hdr) - sizeof(struct ether_header)); hdr->uh_udp.len = htons(len + sizeof(*hdr) - sizeof(struct ether_header) - sizeof(struct iphdr)); memcpy(hdr + 1, buf, len); last_post = _usd_post_send_one(wq, hdr, len + sizeof(*hdr), 1); info = &wq->uwq_post_info[last_post]; info->wp_context = NULL; info->wp_len = len; return 0; }
ssize_t usdf_dgram_prefix_tx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; USDF_DBG_SYS(EP_DATA, "\n"); if (fep == NULL) return -FI_EINVAL; ep = ep_ftou(fep); if (ep->e.dg.ep_qp == NULL) return -FI_EOPBADSTATE; /* EP not enabled */ /* prefix_sendvcan post up to iov_limit descriptors */ return (usd_get_send_credits(ep->e.dg.ep_qp) / ep->e.dg.tx_iov_limit); }
ssize_t usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usdf_tx *tx; struct usdf_msg_qe *wqe; struct usdf_domain *udp; ep = ep_ftou(fep); tx = ep->ep_tx; udp = ep->ep_domain; if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); wqe = TAILQ_FIRST(&tx->t.msg.tx_free_wqe); TAILQ_REMOVE(&tx->t.msg.tx_free_wqe, wqe, ms_link); wqe->ms_context = context; wqe->ms_iov[0].iov_base = (void *)buf; wqe->ms_iov[0].iov_len = len; wqe->ms_last_iov = 0; wqe->ms_cur_iov = 0; wqe->ms_cur_ptr = buf; wqe->ms_iov_resid = len; wqe->ms_resid = len; wqe->ms_length = len; /* add send to EP, and add EP to TX list if not present */ TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link); usdf_msg_ep_ready(ep); pthread_spin_unlock(&udp->dom_progress_lock); usdf_domain_progress(udp); return 0; }
ssize_t usdf_dgram_prefix_rx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; USDF_DBG_SYS(EP_DATA, "\n"); if (fep == NULL) return -FI_EINVAL; ep = ep_ftou(fep); if (ep->e.dg.ep_qp == NULL) return -FI_EOPBADSTATE; /* EP not enabled */ /* prefix_recvv can post up to iov_limit descriptors * * also see NOTE-SIZE-LEFT */ return (usd_get_recv_credits(ep->e.dg.ep_qp) / USDF_DGRAM_DFLT_SGE); }
ssize_t usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context) { struct iovec send_iov[USDF_DGRAM_MAX_SGE]; struct usd_dest *dest; struct usdf_ep *ep; size_t len; size_t padding; ep = ep_ftou(fep); dest = (struct usd_dest *)(uintptr_t) dest_addr; len = _usdf_iov_len(iov, count); padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); if (count > ep->e.dg.tx_iov_limit) { USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", count); return -FI_ENOSPC; } if ((len - padding) <= USD_SEND_MAX_COPY) { /* _usdf_dgram_send_iov_copy isn't prefix aware and allocates * its own prefix. reorganize iov[0] base to point to data and * len to reflect data length. */ memcpy(send_iov, iov, sizeof(struct iovec) * count); send_iov[0].iov_base = ((char *) send_iov[0].iov_base + USDF_HDR_BUF_ENTRY); send_iov[0].iov_len -= USDF_HDR_BUF_ENTRY; return _usdf_dgram_send_iov_copy(ep, dest, send_iov, count, context, ep->ep_tx_completion); } else if (ep->e.dg.tx_op_flags & FI_INJECT) { USDF_DBG_SYS(EP_DATA, "given inject length (%zu) exceeds max inject length (%d)\n", len, USD_SEND_MAX_COPY); return -FI_ENOSPC; } return _usdf_dgram_send_iov_prefix(ep, dest, iov, count, context, ep->ep_tx_completion); }
ssize_t usdf_msg_recv(struct fid_ep *fep, void *buf, size_t len, void *desc, fi_addr_t src_addr, void *context) { struct usdf_ep *ep; struct usdf_rx *rx; struct usdf_msg_qe *rqe; struct usdf_domain *udp; ep = ep_ftou(fep); rx = ep->ep_rx; udp = ep->ep_domain; if (TAILQ_EMPTY(&rx->r.msg.rx_free_rqe)) { return -FI_EAGAIN; } pthread_spin_lock(&udp->dom_progress_lock); rqe = TAILQ_FIRST(&rx->r.msg.rx_free_rqe); TAILQ_REMOVE(&rx->r.msg.rx_free_rqe, rqe, ms_link); --rx->r.msg.rx_num_free_rqe; rqe->ms_context = context; rqe->ms_iov[0].iov_base = buf; rqe->ms_iov[0].iov_len = len; rqe->ms_last_iov = 0; rqe->ms_cur_iov = 0; rqe->ms_cur_ptr = buf; rqe->ms_iov_resid = len; rqe->ms_length = 0; rqe->ms_resid = len; TAILQ_INSERT_TAIL(&rx->r.msg.rx_posted_rqe, rqe, ms_link); pthread_spin_unlock(&udp->dom_progress_lock); return 0; }
ssize_t usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usd_dest *dest; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; struct usd_wq *wq; uint32_t last_post; struct usd_wq_post_info *info; ep = ep_ftou(fep); dest = (struct usd_dest *)(uintptr_t)dest_addr; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; hdr = (struct usd_udp_hdr *) buf - 1; memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); /* adjust lengths and insert source port */ hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - sizeof(struct ether_header)); hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - sizeof(struct ether_header) - sizeof(struct iphdr)) + len); hdr->uh_udp.source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; last_post = _usd_post_send_one(wq, hdr, len + sizeof(struct usd_udp_hdr), 1); info = &wq->uwq_post_info[last_post]; info->wp_context = context; info->wp_len = len; return 0; }
ssize_t usdf_dgram_rx_size_left(struct fid_ep *fep) { struct usdf_ep *ep; USDF_DBG_SYS(EP_DATA, "\n"); if (fep == NULL) return -FI_EINVAL; ep = ep_ftou(fep); if (ep->e.dg.ep_qp == NULL) return -FI_EOPBADSTATE; /* EP not enabled */ /* NOTE-SIZE-LEFT: divide by constant right now, rather than keeping * track of the rx_attr->iov_limit value we gave to the user. This * sometimes under-reports the number of RX ops that could be posted, * but it avoids touching a cache line that we don't otherwise need. * * sendv/recvv could potentially post iov_limit+1 descriptors */ return usd_get_recv_credits(ep->e.dg.ep_qp) / (USDF_DGRAM_DFLT_SGE + 1); }
ssize_t usdf_dgram_inject(struct fid_ep *fep, const void *buf, size_t len, fi_addr_t dest_addr) { struct usdf_dest *dest; struct usdf_ep *ep; ep = ep_ftou(fep); dest = (struct usdf_dest *)(uintptr_t) dest_addr; if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) { USDF_DBG_SYS(EP_DATA, "given inject length (%zu) exceeds max inject length (%d)\n", len + sizeof(struct usd_udp_hdr), USD_SEND_MAX_COPY); return -FI_ENOSPC; } /* * fi_inject never generates a completion */ return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, buf, len, 0, NULL); }