static ssize_t _usdf_dgram_send_iov(struct usdf_ep *ep, struct usd_dest *dest, const struct iovec *iov, size_t count, void *context, uint8_t cq_entry) { struct iovec send_iov[USDF_DGRAM_MAX_SGE]; struct usd_udp_hdr *hdr; struct usd_qp_impl *qp; struct usd_wq *wq; uint32_t last_post; size_t len; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; len = _usdf_iov_len(iov, count); hdr = _usdf_find_hdr(wq); memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); _usdf_adjust_hdr(hdr, qp, len); send_iov[0].iov_base = hdr; send_iov[0].iov_len = sizeof(*hdr); memcpy(&send_iov[1], iov, sizeof(struct iovec) * count); last_post = _usd_post_send_iov(wq, send_iov, count + 1, cq_entry); _usdf_adjust_post_info(wq, last_post, context, len); return FI_SUCCESS; }
ssize_t usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t src_addr, void *context) { struct usdf_ep *ep; struct usd_recv_desc rxd; struct usd_qp_impl *qp; uint32_t index; size_t i; ep = ep_ftou(fep); qp = to_qpi(ep->e.dg.ep_qp); rxd.urd_context = context; rxd.urd_iov[0].iov_base = ep->e.dg.ep_hdr_buf + qp->uq_rq.urq_post_index * USDF_HDR_BUF_ENTRY; rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr); memcpy(&rxd.urd_iov[1], iov, sizeof(*iov) * count); rxd.urd_iov_cnt = count + 1; rxd.urd_next = NULL; index = qp->uq_rq.urq_post_index; for (i = 0; i < count; ++i) { ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; index = (index + 1) & qp->uq_rq.urq_post_index_mask; } return usd_post_recv(ep->e.dg.ep_qp, &rxd); }
/* * Return local address of an EP */ int usdf_cm_rdm_getname(fid_t fid, void *addr, size_t *addrlen) { struct usdf_ep *ep; struct usdf_rx *rx; struct sockaddr_in sin; size_t copylen; USDF_TRACE_SYS(EP_CTRL, "\n"); ep = ep_fidtou(fid); rx = ep->ep_rx; copylen = sizeof(sin); if (copylen > *addrlen) { copylen = *addrlen; } *addrlen = sizeof(sin); memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ep->ep_domain->dom_fabric->fab_dev_attrs->uda_ipaddr_be; if (rx == NULL || rx->rx_qp == NULL) { sin.sin_port = 0; } else { sin.sin_port = to_qpi(rx->rx_qp)->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; } memcpy(addr, &sin, copylen); if (copylen < sizeof(sin)) { return -FI_ETOOSMALL; } else { return 0; } }
static ssize_t _usdf_dgram_send_iov_copy(struct usdf_ep *ep, struct usd_dest *dest, const struct iovec *iov, size_t count, void *context, uint8_t cq_entry) { struct usd_wq *wq; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; uint32_t last_post; size_t len; unsigned i; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; hdr = _usdf_find_hdr(wq); memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); len = 0; for (i = 0; i < count; i++) { memcpy((char *) hdr + sizeof(*hdr) + len, iov[i].iov_base, iov[i].iov_len); len += iov[i].iov_len; } _usdf_adjust_hdr(hdr, qp, len); last_post = _usd_post_send_one(wq, hdr, len + sizeof(*hdr), cq_entry); _usdf_adjust_post_info(wq, last_post, context, len); return 0; }
ssize_t usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len, void *desc, fi_addr_t src_addr, void *context) { struct usdf_ep *ep; struct usd_qp_impl *qp; struct usd_recv_desc rxd; uint32_t index; ep = ep_ftou(fep); qp = to_qpi(ep->e.dg.ep_qp); index = qp->uq_rq.urq_post_index; rxd.urd_context = context; rxd.urd_iov[0].iov_base = (uint8_t *)ep->e.dg.ep_hdr_buf + (index * USDF_HDR_BUF_ENTRY) + (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr)); rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr); rxd.urd_iov[1].iov_base = buf; rxd.urd_iov[1].iov_len = len; rxd.urd_iov_cnt = 2; rxd.urd_next = NULL; ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; index = (index + 1) & qp->uq_rq.urq_post_index_mask; ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; return usd_post_recv(ep->e.dg.ep_qp, &rxd); }
/* * semi-native rx buffer post, i want to eventually avoid using the * vnic_*() calls */ static inline int _usdf_msg_post_recv(struct usdf_rx *rx, void *buf, size_t len) { struct usd_rq *rq; struct vnic_rq *vrq; struct rq_enet_desc *desc; struct usd_qp_impl *qp; qp = to_qpi(rx->rx_qp); rq = &qp->uq_rq; vrq = &rq->urq_vnic_rq; rq->urq_context[rq->urq_post_index] = buf; rq->urq_post_index = (rq->urq_post_index + 1) & rq->urq_post_index_mask; desc = rq->urq_next_desc; rq_enet_desc_enc(desc, (dma_addr_t) buf, RQ_ENET_TYPE_ONLY_SOP, len); wmb(); iowrite32(rq->urq_post_index, &vrq->ctrl->posted_index); rq->urq_next_desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring + ((rq->urq_post_index)<<4)); rq->urq_recv_credits -= 1; return 0; }
static ssize_t _usdf_dgram_send_iov_prefix(struct usdf_ep *ep, struct usd_dest *dest, const struct iovec *iov, size_t count, void *context, uint8_t cq_entry) { struct iovec send_iov[USDF_DGRAM_MAX_SGE]; struct usd_udp_hdr *hdr; struct usd_qp_impl *qp; uint32_t last_post; struct usd_wq *wq; size_t padding; size_t len; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; len = _usdf_iov_len(iov, count); padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); hdr = (struct usd_udp_hdr *) ((char *) iov[0].iov_base + padding); memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); _usdf_adjust_prefix_hdr(hdr, qp, len, padding); memcpy(send_iov, iov, sizeof(struct iovec) * count); send_iov[0].iov_base = hdr; send_iov[0].iov_len -= padding; last_post = _usd_post_send_iov(wq, send_iov, count, cq_entry); _usdf_adjust_post_info(wq, last_post, context, len - USDF_HDR_BUF_ENTRY); return FI_SUCCESS; }
static int usdf_msg_ep_enable(struct fid_ep *fep) { struct usdf_ep *ep; struct usd_filter filt; struct usd_qp_impl *uqp; int ret; ep = ep_ftou(fep); filt.uf_type = USD_FTY_UDP_SOCK; filt.uf_filter.uf_udp_sock.u_sock = ep->ep_sock; ret = usd_create_qp(ep->ep_domain->dom_dev, USD_QTR_UDP, USD_QTY_NORMAL, ep->ep_wcq->cq_cq, ep->ep_rcq->cq_cq, ep->ep_wqe, ep->ep_rqe, &filt, &ep->ep_qp); if (ret != 0) { goto fail; } ep->ep_qp->uq_context = ep; /* * Allocate a memory region big enough to hold a header for each * RQ entry */ uqp = to_qpi(ep->ep_qp); ep->ep_hdr_ptr = calloc(uqp->uq_rq.urq_num_entries, sizeof(ep->ep_hdr_ptr[0])); if (ep->ep_hdr_ptr == NULL) { ret = -FI_ENOMEM; goto fail; } ret = usd_alloc_mr(ep->ep_domain->dom_dev, usd_get_recv_credits(ep->ep_qp) * USDF_HDR_BUF_ENTRY, &ep->ep_hdr_buf); if (ret != 0) { goto fail; } return 0; fail: if (ep->ep_hdr_ptr != NULL) { free(ep->ep_hdr_ptr); } if (ep->ep_qp != NULL) { usd_destroy_qp(ep->ep_qp); } return ret; }
ssize_t usdf_dgram_prefix_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) { struct usdf_ep *ep; struct usd_qp_impl *qp; struct usd_rq *rq; struct vnic_rq *vrq; struct rq_enet_desc *desc; uint8_t *hdr_ptr; const struct iovec *iovp; uint32_t index; unsigned i; ep = ep_ftou(fep); qp = to_qpi(ep->e.dg.ep_qp); rq = &qp->uq_rq; vrq = &rq->urq_vnic_rq; desc = rq->urq_next_desc; index = rq->urq_post_index; iovp = msg->msg_iov; rq->urq_context[index] = msg->context; hdr_ptr = iovp[0].iov_base + (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr)); rq_enet_desc_enc(desc, (dma_addr_t) hdr_ptr, RQ_ENET_TYPE_ONLY_SOP, iovp[0].iov_len - (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr))); ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; index = (index+1) & rq->urq_post_index_mask; desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring + (index<<4)); for (i = 1; i < msg->iov_count; ++i) { rq->urq_context[index] = msg->context; rq_enet_desc_enc(desc, (dma_addr_t) iovp[i].iov_base, RQ_ENET_TYPE_NOT_SOP, iovp[i].iov_len); ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; index = (index+1) & rq->urq_post_index_mask; desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring + (index<<4)); } if ((flags & FI_MORE) == 0) { wmb(); iowrite32(index, &vrq->ctrl->posted_index); } rq->urq_next_desc = desc; rq->urq_post_index = index; rq->urq_recv_credits -= msg->iov_count; return 0; }
unsigned usd_get_recv_credits( struct usd_qp *uqp) { struct usd_qp_impl *qp; qp = to_qpi(uqp); return qp->uq_rq.urq_recv_credits; }
unsigned usd_get_send_credits( struct usd_qp *uqp) { struct usd_qp_impl *qp; qp = to_qpi(uqp); return qp->uq_wq.uwq_send_credits; }
int usd_post_recv( struct usd_qp *uqp, struct usd_recv_desc *recv_list) { struct usd_qp_impl *qp; struct usd_rq *rq; struct vnic_rq *vrq; struct rq_enet_desc *desc; struct iovec *iovp; uint32_t index; uint32_t count; unsigned i; qp = to_qpi(uqp); rq = &qp->uq_rq; vrq = &rq->urq_vnic_rq; desc = rq->urq_next_desc; index = rq->urq_post_index; iovp = recv_list->urd_iov; count = 0; while (recv_list != NULL) { rq->urq_context[index] = recv_list->urd_context; rq_enet_desc_enc(desc, (dma_addr_t) iovp[0].iov_base, RQ_ENET_TYPE_ONLY_SOP, iovp[0].iov_len); count++; index = (index+1) & rq->urq_post_index_mask; desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring + (index<<4)); for (i = 1; i < recv_list->urd_iov_cnt; ++i) { rq->urq_context[index] = recv_list->urd_context; rq_enet_desc_enc(desc, (dma_addr_t) iovp[i].iov_base, RQ_ENET_TYPE_NOT_SOP, iovp[i].iov_len); count++; index = (index+1) & rq->urq_post_index_mask; desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring + (index<<4)); } recv_list = recv_list->urd_next; } wmb(); iowrite32(index, &vrq->ctrl->posted_index); rq->urq_next_desc = desc; rq->urq_post_index = index; rq->urq_recv_credits -= count; return 0; }
ssize_t usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usd_dest *dest; struct usd_wq *wq; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; uint32_t last_post; struct usd_wq_post_info *info; struct iovec send_iov[USDF_DGRAM_MAX_SGE]; size_t len; unsigned i; size_t padding; ep = ep_ftou(fep); dest = (struct usd_dest *)(uintptr_t) dest_addr; padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); len = 0; for (i = 0; i < count; i++) { len += iov[i].iov_len; } if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) { qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; hdr = (struct usd_udp_hdr *) ((char *) iov[0].iov_base + padding); memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); /* adjust lengths and insert source port */ hdr->uh_ip.tot_len = htons(len - padding - sizeof(struct ether_header)); hdr->uh_udp.len = htons(len - padding - sizeof(struct ether_header) - sizeof(struct iphdr)); hdr->uh_udp.source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; memcpy(send_iov, iov, sizeof(struct iovec) * count); send_iov[0].iov_base = hdr; send_iov[0].iov_len -= padding; last_post = _usd_post_send_iov(wq, send_iov, count, 1); info = &wq->uwq_post_info[last_post]; info->wp_context = context; info->wp_len = len; } else { _usdf_dgram_send_iov_copy(ep, dest, iov, count, context); } return 0; }
/* * Keep progressing sends on this queue until: * a) no more send credits on the queue (it's full) * or * b) all endpoints are complete or blocked awaiting ACKs */ void usdf_msg_tx_progress(struct usdf_tx *tx) { struct usdf_ep *ep; struct usd_qp_impl *qp; qp = to_qpi(tx->tx_qp); while (qp->uq_wq.uwq_send_credits > 1 && !TAILQ_EMPTY(&tx->t.msg.tx_ep_have_acks)) { ep = TAILQ_FIRST(&tx->t.msg.tx_ep_have_acks); TAILQ_REMOVE_MARK(&tx->t.msg.tx_ep_have_acks, ep, e.msg.ep_ack_link); usdf_msg_send_ack(tx, ep); } while (qp->uq_wq.uwq_send_credits > 1 && !TAILQ_EMPTY(&tx->t.msg.tx_ep_ready)) { ep = TAILQ_FIRST(&tx->t.msg.tx_ep_ready); /* * Send next segment on this EP. This will also remove the * current send from the EP send list if it completes */ usdf_msg_send_segment(tx, ep); --ep->e.msg.ep_seq_credits; if (TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) { TAILQ_REMOVE_MARK(&tx->t.msg.tx_ep_ready, ep, e.msg.ep_link); } else { --ep->e.msg.ep_fairness_credits; if (ep->e.msg.ep_seq_credits == 0) { TAILQ_REMOVE_MARK(&tx->t.msg.tx_ep_ready, ep, e.msg.ep_link); ep->e.msg.ep_fairness_credits = USDF_MSG_FAIRNESS_CREDITS; /* fairness credits exhausted, go to back of the line */ } else if (ep->e.msg.ep_fairness_credits == 0) { TAILQ_REMOVE(&tx->t.msg.tx_ep_ready, ep, e.msg.ep_link); TAILQ_INSERT_TAIL(&tx->t.msg.tx_ep_ready, ep, e.msg.ep_link); ep->e.msg.ep_fairness_credits = USDF_MSG_FAIRNESS_CREDITS; } } } }
ssize_t usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usd_udp_hdr *hdr; struct usd_qp_impl *qp; struct usdf_dest *dest; struct usdf_ep *ep; struct usd_wq *wq; uint32_t last_post; uint32_t flags; size_t padding; ep = ep_ftou(fep); dest = (struct usdf_dest *)(uintptr_t) dest_addr; padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); flags = (ep->ep_tx_completion) ? USD_SF_SIGNAL : 0; if (ep->e.dg.tx_op_flags & FI_INJECT) { if ((len - padding) > USD_SEND_MAX_COPY) { USDF_DBG_SYS(EP_DATA, "given inject length (%zu) exceeds max inject length (%d)\n", len, USD_SEND_MAX_COPY); return -FI_ENOSPC; } return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, buf + USDF_HDR_BUF_ENTRY, len - USDF_HDR_BUF_ENTRY, flags, context); } qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; hdr = (struct usd_udp_hdr *) ((char *) buf + padding); memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr)); _usdf_adjust_prefix_hdr(hdr, qp, len, padding); last_post = _usd_post_send_one(wq, hdr, len - padding, ep->ep_tx_completion); _usdf_adjust_post_info(wq, last_post, context, len - USDF_HDR_BUF_ENTRY); return FI_SUCCESS; }
ssize_t usdf_dgram_inject(struct fid_ep *fep, const void *buf, size_t len, fi_addr_t dest_addr) { struct usdf_ep *ep; struct usdf_dest *dest; struct usd_wq *wq; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; uint32_t last_post; struct usd_wq_post_info *info; uint8_t *copybuf; if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) { return -FI_ENOSPC; } ep = ep_ftou(fep); dest = (struct usdf_dest *)(uintptr_t)dest_addr; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; hdr = (struct usd_udp_hdr *)copybuf; memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr)); hdr->uh_udp.source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; hdr->uh_ip.tot_len = htons(len + sizeof(*hdr) - sizeof(struct ether_header)); hdr->uh_udp.len = htons(len + sizeof(*hdr) - sizeof(struct ether_header) - sizeof(struct iphdr)); memcpy(hdr + 1, buf, len); last_post = _usd_post_send_one(wq, hdr, len + sizeof(*hdr), 1); info = &wq->uwq_post_info[last_post]; info->wp_context = NULL; info->wp_len = len; return 0; }
/* * Issue HANG_NOTIFY to the VNIC */ int usd_vnic_hang_notify( struct usd_qp *uqp) { struct usd_qp_impl *qp; u64 a0; int ret; qp = to_qpi(uqp); ret = vnic_dev_cmd(qp->uq_vf->vf_vdev, CMD_HANG_NOTIFY, &a0, &a0, 1000); if (ret != 0) { fprintf(stderr, "hang_notify ret = %d\n", ret); return ret; } return 0; }
static ssize_t _usdf_dgram_send_iov_copy(struct usdf_ep *ep, struct usd_dest *dest, const struct iovec *iov, size_t count, void *context) { struct usd_wq *wq; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; uint32_t last_post; struct usd_wq_post_info *info; uint8_t *copybuf; size_t len; unsigned i; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; hdr = (struct usd_udp_hdr *)copybuf; memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); hdr->uh_udp.source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; len = sizeof(*hdr); for (i = 0; i < count; i++) { memcpy(copybuf + len, iov[i].iov_base, iov[i].iov_len); len += iov[i].iov_len; } /* adjust lengths */ hdr->uh_ip.tot_len = htons(len - sizeof(struct ether_header)); hdr->uh_udp.len = htons(len - sizeof(struct ether_header) - sizeof(struct iphdr)); last_post = _usd_post_send_one(wq, hdr, len, 1); info = &wq->uwq_post_info[last_post]; info->wp_context = context; info->wp_len = len; return 0; }
static inline void usdf_msg_send_ack(struct usdf_tx *tx, struct usdf_ep *ep) { struct rudp_pkt *hdr; struct usd_wq *wq; uint32_t last_post; struct usd_wq_post_info *info; uint16_t seq; wq = &(to_qpi(tx->tx_qp)->uq_wq); hdr = (struct rudp_pkt *) (wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY); memcpy(hdr, &ep->e.msg.ep_dest->ds_dest.ds_udp.u_hdr, sizeof(struct usd_udp_hdr)); hdr->msg.src_peer_id = htons(ep->e.msg.ep_lcl_peer_id); if (ep->e.msg.ep_send_nak) { hdr->msg.opcode = htons(RUDP_OP_NAK); seq = ep->e.msg.ep_next_rx_seq; hdr->msg.m.nak.nak_seq = htons(seq); ep->e.msg.ep_send_nak = 0; } else { hdr->msg.opcode = htons(RUDP_OP_ACK); seq = ep->e.msg.ep_next_rx_seq - 1; hdr->msg.m.ack.ack_seq = htons(seq); } /* add packet lengths */ hdr->hdr.uh_ip.tot_len = htons( sizeof(struct rudp_pkt) - sizeof(struct ether_header)); hdr->hdr.uh_udp.len = htons(sizeof(struct rudp_pkt) - sizeof(struct ether_header) - sizeof(struct iphdr)); last_post = _usd_post_send_one(wq, hdr, sizeof(*hdr), 1); info = &wq->uwq_post_info[last_post]; info->wp_context = tx; info->wp_len = 0; }
ssize_t usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, fi_addr_t dest_addr, void *context) { struct usdf_ep *ep; struct usd_dest *dest; struct usd_qp_impl *qp; struct usd_udp_hdr *hdr; struct usd_wq *wq; uint32_t last_post; struct usd_wq_post_info *info; ep = ep_ftou(fep); dest = (struct usd_dest *)(uintptr_t)dest_addr; qp = to_qpi(ep->e.dg.ep_qp); wq = &qp->uq_wq; hdr = (struct usd_udp_hdr *) buf - 1; memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); /* adjust lengths and insert source port */ hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - sizeof(struct ether_header)); hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - sizeof(struct ether_header) - sizeof(struct iphdr)) + len); hdr->uh_udp.source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; last_post = _usd_post_send_one(wq, hdr, len + sizeof(struct usd_udp_hdr), 1); info = &wq->uwq_post_info[last_post]; info->wp_context = context; info->wp_len = len; return 0; }
int usdf_cm_msg_connect(struct fid_ep *fep, const void *addr, const void *param, size_t paramlen) { struct usdf_connreq *crp; struct usdf_ep *ep; struct usdf_rx *rx; struct usdf_domain *udp; const struct sockaddr_in *sin; struct epoll_event ev; struct usdf_fabric *fp; struct usdf_connreq_msg *reqp; struct usd_qp_impl *qp; int ret; USDF_TRACE_SYS(EP_CTRL, "\n"); if (paramlen > USDF_MAX_CONN_DATA) return -FI_EINVAL; ep = ep_ftou(fep); udp = ep->ep_domain; fp = udp->dom_fabric; sin = addr; crp = NULL; crp = calloc(1, sizeof(*crp) + sizeof(struct usdf_connreq_msg) + paramlen); if (crp == NULL) { ret = -errno; goto fail; } crp->handle.fclass = FI_CLASS_CONNREQ; crp->cr_sockfd = socket(AF_INET, SOCK_STREAM, 0); if (crp->cr_sockfd == -1) { ret = -errno; goto fail; } ret = fcntl(crp->cr_sockfd, F_GETFL, 0); if (ret == -1) { ret = -errno; goto fail; } ret = fcntl(crp->cr_sockfd, F_SETFL, ret | O_NONBLOCK); if (ret == -1) { ret = -errno; goto fail; } ret = usdf_ep_msg_get_queues(ep); if (ret != 0) { goto fail; } rx = ep->ep_rx; qp = to_qpi(rx->rx_qp); ret = connect(crp->cr_sockfd, (struct sockaddr *)sin, sizeof(*sin)); if (ret != 0 && errno != EINPROGRESS) { ret = -errno; goto fail; } /* register for notification when connect completes */ crp->cr_pollitem.pi_rtn = usdf_cm_msg_connect_cb_wr; crp->cr_pollitem.pi_context = crp; ev.events = EPOLLOUT; ev.data.ptr = &crp->cr_pollitem; ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_ADD, crp->cr_sockfd, &ev); if (ret != 0) { crp->cr_pollitem.pi_rtn = NULL; ret = -errno; goto fail; } /* allocate remote peer ID */ ep->e.msg.ep_rem_peer_id = udp->dom_next_peer; udp->dom_peer_tab[udp->dom_next_peer] = ep; ++udp->dom_next_peer; crp->cr_ep = ep; reqp = (struct usdf_connreq_msg *)crp->cr_data; crp->cr_ptr = crp->cr_data; crp->cr_resid = sizeof(*reqp) + paramlen; reqp->creq_peer_id = htons(ep->e.msg.ep_rem_peer_id); reqp->creq_ipaddr = fp->fab_dev_attrs->uda_ipaddr_be; reqp->creq_port = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; memcpy(reqp->creq_mac, fp->fab_dev_attrs->uda_mac_addr, ETH_ALEN); reqp->creq_datalen = htonl(paramlen); memcpy(reqp->creq_data, param, paramlen); return 0; fail: if (crp != NULL) { if (crp->cr_sockfd != -1) { close(crp->cr_sockfd); } free(crp); } usdf_ep_msg_release_queues(ep); return ret; }
int usdf_cm_msg_accept(struct fid_ep *fep, const void *param, size_t paramlen) { struct usdf_ep *ep; struct usdf_rx *rx; struct usdf_domain *udp; struct usdf_fabric *fp; struct usdf_connreq *crp; struct usdf_connreq_msg *reqp; struct usd_qp_impl *qp; int ret; int n; USDF_TRACE_SYS(EP_CTRL, "\n"); if (paramlen > USDF_MAX_CONN_DATA) return -FI_EINVAL; ep = ep_ftou(fep); udp = ep->ep_domain; fp = udp->dom_fabric; crp = ep->e.msg.ep_connreq; if (crp == NULL) { return -FI_ENOTCONN; } if (ep->ep_eq == NULL) { return -FI_ENOEQ; } crp->cr_ep = ep; reqp = (struct usdf_connreq_msg *)crp->cr_data; ep->e.msg.ep_lcl_peer_id = ntohs(reqp->creq_peer_id); /* start creating the dest early */ ret = usd_create_dest_with_mac(udp->dom_dev, reqp->creq_ipaddr, reqp->creq_port, reqp->creq_mac, &ep->e.msg.ep_dest); if (ret != 0) { goto fail; } ret = usdf_ep_msg_get_queues(ep); if (ret != 0) { goto fail; } rx = ep->ep_rx; qp = to_qpi(rx->rx_qp); /* allocate a peer ID */ ep->e.msg.ep_rem_peer_id = udp->dom_next_peer; udp->dom_peer_tab[udp->dom_next_peer] = ep; ++udp->dom_next_peer; crp->cr_ptr = crp->cr_data; crp->cr_resid = sizeof(*reqp) + paramlen; reqp->creq_peer_id = htons(ep->e.msg.ep_rem_peer_id); reqp->creq_ipaddr = fp->fab_dev_attrs->uda_ipaddr_be; reqp->creq_port = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; memcpy(reqp->creq_mac, fp->fab_dev_attrs->uda_mac_addr, ETH_ALEN); reqp->creq_result = htonl(0); reqp->creq_datalen = htonl(paramlen); memcpy(reqp->creq_data, param, paramlen); n = write(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid); if (n == -1) { usdf_cm_msg_connreq_cleanup(crp); ret = -errno; goto fail; } crp->cr_resid -= n; if (crp->cr_resid == 0) { usdf_cm_msg_accept_complete(crp); } else { // XXX set up epoll junk to send rest } return 0; fail: free(ep->e.msg.ep_dest); /* XXX release queues */ return ret; }
static int usd_post_send_one_prefixed_raw_normal( struct usd_qp *uqp, struct usd_dest __attribute__ ((unused)) * dest, const void *buf, size_t len, uint32_t flags, void *context) { struct usd_qp_impl *qp; struct usd_wq *wq; uint32_t last_post; struct usd_wq_post_info *info; qp = to_qpi(uqp); wq = &qp->uq_wq; last_post = _usd_post_send_one(wq, buf, len, USD_SF_ISSET(flags, SIGNAL)); info = &wq->uwq_post_info[last_post]; info->wp_context = context; info->wp_len = len; return 0; } struct usd_qp_ops usd_qp_ops_raw_normal = { .qo_post_send_one_prefixed = usd_post_send_one_prefixed_raw_normal, };
static inline void usdf_msg_send_segment(struct usdf_tx *tx, struct usdf_ep *ep) { struct usdf_msg_qe *msg; struct rudp_pkt *hdr; struct usd_wq *wq; uint32_t index; size_t cur_iov; size_t cur_resid; size_t resid; const uint8_t *cur_ptr; const uint8_t *send_ptr; size_t sge_len; uint8_t *ptr; struct usd_wq_post_info *info; msg = TAILQ_FIRST(&ep->e.msg.ep_posted_wqe); wq = &(to_qpi(tx->tx_qp)->uq_wq); index = wq->uwq_post_index; hdr = (struct rudp_pkt *)(wq->uwq_copybuf + index * USD_SEND_MAX_COPY); memcpy(hdr, &ep->e.msg.ep_dest->ds_dest.ds_udp.u_hdr, sizeof(struct usd_udp_hdr)); hdr->msg.src_peer_id = htons(ep->e.msg.ep_lcl_peer_id); resid = msg->ms_resid; cur_iov = msg->ms_cur_iov; cur_ptr = msg->ms_cur_ptr; cur_resid = msg->ms_iov_resid; /* save first seq for message */ if (cur_iov == 0 && cur_resid == msg->ms_iov[0].iov_len) { msg->ms_first_seq = ep->e.msg.ep_next_tx_seq; } if (resid < USD_SEND_MAX_COPY - sizeof(*hdr)) { hdr->msg.opcode = htons(RUDP_OP_LAST); hdr->msg.m.rc_data.length = htons(resid); hdr->msg.m.rc_data.seqno = htons(ep->e.msg.ep_next_tx_seq); ++ep->e.msg.ep_next_tx_seq; sge_len = resid; ptr = (uint8_t *)(hdr + 1); while (resid > 0) { memcpy(ptr, cur_ptr, cur_resid); ptr += cur_resid; resid -= cur_resid; ++cur_iov; cur_ptr = msg->ms_iov[cur_iov].iov_base; cur_resid = msg->ms_iov[cur_iov].iov_len; } /* add packet lengths */ hdr->hdr.uh_ip.tot_len = htons( sge_len + sizeof(struct rudp_pkt) - sizeof(struct ether_header)); hdr->hdr.uh_udp.len = htons( (sizeof(struct rudp_pkt) - sizeof(struct ether_header) - sizeof(struct iphdr)) + sge_len); index = _usd_post_send_one(wq, hdr, sge_len + sizeof(*hdr), 1); } else { struct vnic_wq *vwq; u_int8_t offload_mode = 0, eop; u_int16_t mss = 7, header_length = 0, vlan_tag = 0; u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0; struct wq_enet_desc *desc; size_t space; size_t num_sge; size_t sent; vwq = &wq->uwq_vnic_wq; desc = wq->uwq_next_desc; space = ep->ep_domain->dom_fabric->fab_dev_attrs->uda_mtu - sizeof(*hdr); num_sge = 1; /* encode header desc */ eop = 0; wq_enet_desc_enc(desc, (uintptr_t)hdr, sizeof(*hdr), mss, header_length, offload_mode, eop, 0, fcoe_encap, vlan_tag_insert, vlan_tag, loopback); do { desc = (struct wq_enet_desc *) ((uintptr_t)wq->uwq_desc_ring + (index << 4)); index = (index + 1) & wq->uwq_post_index_mask; send_ptr = cur_ptr; if (cur_resid >= space) { sge_len = space; eop = 1; cur_resid -= sge_len; cur_ptr += sge_len; } else { sge_len = cur_resid; if (num_sge == USDF_MSG_MAX_SGE || cur_resid == resid) { eop = 1; } ++cur_iov; cur_ptr = msg->ms_iov[cur_iov].iov_base; cur_resid = msg->ms_iov[cur_iov].iov_len; } wq_enet_desc_enc(desc, (uintptr_t)send_ptr, sge_len, mss, header_length, offload_mode, eop, eop, fcoe_encap, vlan_tag_insert, vlan_tag, loopback); ++num_sge; space -= sge_len; resid -= sge_len; } while (space > 0 && num_sge <= USDF_MSG_MAX_SGE && resid > 0); /* add packet lengths */ sent = ep->ep_domain->dom_fabric->fab_dev_attrs->uda_mtu - sizeof(*hdr) - space; hdr->hdr.uh_ip.tot_len = htons( sent + sizeof(struct rudp_pkt) - sizeof(struct ether_header)); hdr->hdr.uh_udp.len = htons( (sizeof(struct rudp_pkt) - sizeof(struct ether_header) - sizeof(struct iphdr)) + sent); #if 0 if ((random() % 177) == 0 && resid == 0) { hdr->hdr.uh_eth.ether_type = 0; //printf("BORK seq %u\n", ep->e.msg.ep_next_tx_seq); } #endif if (resid == 0) { hdr->msg.opcode = htons(RUDP_OP_LAST); } else { hdr->msg.opcode = htons(RUDP_OP_FIRST); } hdr->msg.m.rc_data.length = htons(sent); hdr->msg.m.rc_data.seqno = htons(ep->e.msg.ep_next_tx_seq); ++ep->e.msg.ep_next_tx_seq; wmb(); iowrite64(index, &vwq->ctrl->posted_index); wq->uwq_next_desc = (struct wq_enet_desc *) ((uintptr_t)wq->uwq_desc_ring + (index << 4)); wq->uwq_post_index = (index + 1) & wq->uwq_post_index_mask; wq->uwq_send_credits -= num_sge; } info = &wq->uwq_post_info[index]; info->wp_context = tx; info->wp_len = sge_len; /* If send complete, remove from send list */ if (resid == 0) { usdf_msg_send_complete(ep, msg); } else { msg->ms_resid = resid; msg->ms_iov_resid = cur_resid; msg->ms_cur_iov = cur_iov; msg->ms_cur_ptr = cur_ptr; } /* set ACK timer */ usdf_timer_set(ep->ep_domain->dom_fabric, ep->e.msg.ep_ack_timer, USDF_RUDP_ACK_TIMEOUT); }
int usdf_cm_msg_connect(struct fid_ep *fep, const void *addr, const void *param, size_t paramlen) { struct usdf_connreq *crp; struct usdf_ep *ep; struct usdf_rx *rx; struct usdf_domain *udp; const struct sockaddr_in *sin; struct epoll_event ev; struct usdf_fabric *fp; struct usdf_connreq_msg *reqp; struct usd_qp_impl *qp; size_t request_size; int ret; USDF_TRACE_SYS(EP_CTRL, "\n"); if (paramlen > USDF_MAX_CONN_DATA) return -FI_EINVAL; ep = ep_ftou(fep); udp = ep->ep_domain; fp = udp->dom_fabric; sin = addr; /* Although paramlen may be less than USDF_MAX_CONN_DATA, the same crp * struct is used for receiving the accept and reject payload. The * structure has to be prepared to receive the maximum allowable amount * of data per transfer. The maximum size includes the connection * request structure, the connection request message, and the maximum * amount of data per connection request message. */ request_size = sizeof(*crp) + sizeof(*reqp) + USDF_MAX_CONN_DATA; crp = calloc(1, request_size); if (crp == NULL) { ret = -errno; goto fail; } ep->e.msg.ep_connreq = crp; crp->handle.fclass = FI_CLASS_CONNREQ; if (ep->e.msg.ep_cm_sock == -1) { crp->cr_sockfd = socket(AF_INET, SOCK_STREAM, 0); if (crp->cr_sockfd == -1) { ret = -errno; goto fail; } } else { crp->cr_sockfd = ep->e.msg.ep_cm_sock; ep->e.msg.ep_cm_sock = -1; } ret = fi_fd_nonblock(crp->cr_sockfd); if (ret) { ret = -errno; goto fail; } ret = usdf_ep_msg_get_queues(ep); if (ret != 0) { goto fail; } rx = ep->ep_rx; qp = to_qpi(rx->rx_qp); ret = connect(crp->cr_sockfd, (struct sockaddr *)sin, sizeof(*sin)); if (ret != 0 && errno != EINPROGRESS) { ret = -errno; goto fail; } /* If cr_sockfd was previously unbound, connect(2) will do a a bind(2) * for us. Update our snapshot of the locally bound address. */ ret = usdf_msg_upd_lcl_addr(ep); if (ret) goto fail; /* allocate remote peer ID */ ep->e.msg.ep_rem_peer_id = udp->dom_next_peer; udp->dom_peer_tab[udp->dom_next_peer] = ep; ++udp->dom_next_peer; crp->cr_ep = ep; reqp = (struct usdf_connreq_msg *)crp->cr_data; crp->cr_ptr = crp->cr_data; crp->cr_resid = sizeof(*reqp) + paramlen; reqp->creq_peer_id = htons(ep->e.msg.ep_rem_peer_id); reqp->creq_ipaddr = fp->fab_dev_attrs->uda_ipaddr_be; reqp->creq_port = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; reqp->creq_datalen = htonl(paramlen); memcpy(reqp->creq_data, param, paramlen); /* register for notification when connect completes */ crp->cr_pollitem.pi_rtn = usdf_cm_msg_connect_cb_wr; crp->cr_pollitem.pi_context = crp; ev.events = EPOLLOUT; ev.data.ptr = &crp->cr_pollitem; ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_ADD, crp->cr_sockfd, &ev); if (ret != 0) { crp->cr_pollitem.pi_rtn = NULL; ret = -errno; goto fail; } return 0; fail: if (crp != NULL) { if (crp->cr_sockfd != -1) { close(crp->cr_sockfd); } free(crp); ep->e.msg.ep_connreq = NULL; } usdf_ep_msg_release_queues(ep); return ret; }