int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) { struct bnxt_rx_ring_info *rxr; struct bnxt_ring *ring; uint32_t prod, type; unsigned int i; type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT | RX_PROD_PKT_BD_FLAGS_EOP_PAD; rxr = rxq->rx_ring; ring = rxr->rx_ring_struct; bnxt_init_rxbds(ring, type, rxq->rx_buf_use_size); prod = rxr->rx_prod; for (i = 0; i < ring->ring_size; i++) { if (bnxt_alloc_rx_data(rxq, rxr, prod) != 0) { RTE_LOG(WARNING, PMD, "init'ed rx ring %d with %d/%d mbufs only\n", rxq->queue_id, i, ring->ring_size); break; } rxr->rx_prod = prod; prod = RING_NEXT(rxr->rx_ring_struct, prod); } return 0; }
static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts) { struct bnxt_tx_ring_info *txr = txq->tx_ring; uint16_t cons = txr->tx_cons; int i, j; for (i = 0; i < nr_pkts; i++) { struct bnxt_sw_tx_bd *tx_buf; struct rte_mbuf *mbuf; tx_buf = &txr->tx_buf_ring[cons]; cons = RING_NEXT(txr->tx_ring_struct, cons); mbuf = tx_buf->mbuf; tx_buf->mbuf = NULL; /* EW - no need to unmap DMA memory? */ for (j = 1; j < tx_buf->nr_bds; j++) cons = RING_NEXT(txr->tx_ring_struct, cons); rte_pktmbuf_free(mbuf); } txr->tx_cons = cons; }
static int bnxt_isc_txd_encap(void *sc, if_pkt_info_t pi) { struct bnxt_softc *softc = (struct bnxt_softc *)sc; struct bnxt_ring *txr = &softc->tx_rings[pi->ipi_qsidx]; struct tx_bd_long *tbd; struct tx_bd_long_hi *tbdh; bool need_hi = false; uint16_t flags_type; uint16_t lflags; uint32_t cfa_meta; int seg = 0; /* If we have offloads enabled, we need to use two BDs. */ if ((pi->ipi_csum_flags & (CSUM_OFFLOAD | CSUM_TSO | CSUM_IP)) || pi->ipi_mflags & M_VLANTAG) need_hi = true; /* TODO: Devices before Cu+B1 need to not mix long and short BDs */ need_hi = true; pi->ipi_new_pidx = pi->ipi_pidx; tbd = &((struct tx_bd_long *)txr->vaddr)[pi->ipi_new_pidx]; pi->ipi_ndescs = 0; /* No need to byte-swap the opaque value */ tbd->opaque = ((pi->ipi_nsegs + need_hi) << 24) | pi->ipi_new_pidx; tbd->len = htole16(pi->ipi_segs[seg].ds_len); tbd->addr = htole64(pi->ipi_segs[seg++].ds_addr); flags_type = ((pi->ipi_nsegs + need_hi) << TX_BD_SHORT_FLAGS_BD_CNT_SFT) & TX_BD_SHORT_FLAGS_BD_CNT_MASK; if (pi->ipi_len >= 2048) flags_type |= TX_BD_SHORT_FLAGS_LHINT_GTE2K; else flags_type |= bnxt_tx_lhint[pi->ipi_len >> 9]; if (need_hi) { flags_type |= TX_BD_LONG_TYPE_TX_BD_LONG; pi->ipi_new_pidx = RING_NEXT(txr, pi->ipi_new_pidx); tbdh = &((struct tx_bd_long_hi *)txr->vaddr)[pi->ipi_new_pidx]; tbdh->mss = htole16(pi->ipi_tso_segsz); tbdh->hdr_size = htole16((pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen) >> 1); tbdh->cfa_action = 0; lflags = 0; cfa_meta = 0; if (pi->ipi_mflags & M_VLANTAG) { /* TODO: Do we need to byte-swap the vtag here? */ cfa_meta = TX_BD_LONG_CFA_META_KEY_VLAN_TAG | pi->ipi_vtag; cfa_meta |= TX_BD_LONG_CFA_META_VLAN_TPID_TPID8100; } tbdh->cfa_meta = htole32(cfa_meta); if (pi->ipi_csum_flags & CSUM_TSO) { lflags |= TX_BD_LONG_LFLAGS_LSO | TX_BD_LONG_LFLAGS_T_IPID; } else if(pi->ipi_csum_flags & CSUM_OFFLOAD) { lflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM | TX_BD_LONG_LFLAGS_IP_CHKSUM; } else if(pi->ipi_csum_flags & CSUM_IP) { lflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM; } tbdh->lflags = htole16(lflags); } else {
static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq) { struct bnxt_tx_ring_info *txr = txq->tx_ring; struct tx_bd_long *txbd; struct tx_bd_long_hi *txbd1; uint32_t vlan_tag_flags, cfa_action; bool long_bd = false; uint16_t last_prod = 0; struct rte_mbuf *m_seg; struct bnxt_sw_tx_bd *tx_buf; static const uint32_t lhint_arr[4] = { TX_BD_LONG_FLAGS_LHINT_LT512, TX_BD_LONG_FLAGS_LHINT_LT1K, TX_BD_LONG_FLAGS_LHINT_LT2K, TX_BD_LONG_FLAGS_LHINT_LT2K }; if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM)) long_bd = true; tx_buf = &txr->tx_buf_ring[txr->tx_prod]; tx_buf->mbuf = tx_pkt; tx_buf->nr_bds = long_bd + tx_pkt->nb_segs; last_prod = (txr->tx_prod + tx_buf->nr_bds - 1) & txr->tx_ring_struct->ring_mask; if (unlikely(bnxt_tx_avail(txr) < tx_buf->nr_bds)) return -ENOMEM; txbd = &txr->tx_desc_ring[txr->tx_prod]; txbd->opaque = txr->tx_prod; txbd->flags_type = tx_buf->nr_bds << TX_BD_LONG_FLAGS_BD_CNT_SFT; txbd->len = tx_pkt->data_len; if (txbd->len >= 2014) txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K; else txbd->flags_type |= lhint_arr[txbd->len >> 9]; txbd->addr = rte_cpu_to_le_32(RTE_MBUF_DATA_DMA_ADDR(tx_buf->mbuf)); if (long_bd) { txbd->flags_type |= TX_BD_LONG_TYPE_TX_BD_LONG; vlan_tag_flags = 0; cfa_action = 0; if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) { /* shurd: Should this mask at * TX_BD_LONG_CFA_META_VLAN_VID_MASK? */ vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG | tx_buf->mbuf->vlan_tci; /* Currently supports 8021Q, 8021AD vlan offloads * QINQ1, QINQ2, QINQ3 vlan headers are deprecated */ /* DPDK only supports 802.11q VLAN packets */ vlan_tag_flags |= TX_BD_LONG_CFA_META_VLAN_TPID_TPID8100; } txr->tx_prod = RING_NEXT(txr->tx_ring_struct, txr->tx_prod); txbd1 = (struct tx_bd_long_hi *) &txr->tx_desc_ring[txr->tx_prod]; txbd1->lflags = 0; txbd1->cfa_meta = vlan_tag_flags; txbd1->cfa_action = cfa_action; if (tx_pkt->ol_flags & PKT_TX_TCP_SEG) { /* TSO */ txbd1->lflags |= TX_BD_LONG_LFLAGS_LSO; txbd1->hdr_size = tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len + tx_pkt->outer_l2_len + tx_pkt->outer_l3_len; txbd1->mss = tx_pkt->tso_segsz; } else if (tx_pkt->ol_flags & PKT_TX_OIP_IIP_TCP_UDP_CKSUM) { /* Outer IP, Inner IP, Inner TCP/UDP CSO */ txbd1->lflags |= TX_BD_FLG_TIP_IP_TCP_UDP_CHKSUM; txbd1->mss = 0; } else if (tx_pkt->ol_flags & PKT_TX_IIP_TCP_UDP_CKSUM) { /* (Inner) IP, (Inner) TCP/UDP CSO */ txbd1->lflags |= TX_BD_FLG_IP_TCP_UDP_CHKSUM; txbd1->mss = 0; } else if (tx_pkt->ol_flags & PKT_TX_OIP_TCP_UDP_CKSUM) { /* Outer IP, (Inner) TCP/UDP CSO */ txbd1->lflags |= TX_BD_FLG_TIP_TCP_UDP_CHKSUM; txbd1->mss = 0; } else if (tx_pkt->ol_flags & PKT_TX_OIP_IIP_CKSUM) { /* Outer IP, Inner IP CSO */ txbd1->lflags |= TX_BD_FLG_TIP_IP_CHKSUM; txbd1->mss = 0; } else if (tx_pkt->ol_flags & PKT_TX_TCP_UDP_CKSUM) { /* TCP/UDP CSO */ txbd1->lflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM; txbd1->mss = 0; } else if (tx_pkt->ol_flags & PKT_TX_IP_CKSUM) { /* IP CSO */ txbd1->lflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM; txbd1->mss = 0; } else if (tx_pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM) { /* IP CSO */ txbd1->lflags |= TX_BD_LONG_LFLAGS_T_IP_CHKSUM; txbd1->mss = 0; } } else { txbd->flags_type |= TX_BD_SHORT_TYPE_TX_BD_SHORT; } m_seg = tx_pkt->next; /* i is set at the end of the if(long_bd) block */ while (txr->tx_prod != last_prod) { txr->tx_prod = RING_NEXT(txr->tx_ring_struct, txr->tx_prod); tx_buf = &txr->tx_buf_ring[txr->tx_prod]; txbd = &txr->tx_desc_ring[txr->tx_prod]; txbd->addr = rte_cpu_to_le_32(RTE_MBUF_DATA_DMA_ADDR(m_seg)); txbd->flags_type = TX_BD_SHORT_TYPE_TX_BD_SHORT; txbd->len = m_seg->data_len; m_seg = m_seg->next; } txbd->flags_type |= TX_BD_LONG_FLAGS_PACKET_END; txr->tx_prod = RING_NEXT(txr->tx_ring_struct, txr->tx_prod); return 0; }
static uint16_t bnxt_rx_pkt(struct rte_mbuf **rx_pkt, struct bnxt_rx_queue *rxq, uint32_t *raw_cons) { struct bnxt_cp_ring_info *cpr = rxq->cp_ring; struct bnxt_rx_ring_info *rxr = rxq->rx_ring; struct rx_pkt_cmpl *rxcmp; struct rx_pkt_cmpl_hi *rxcmp1; uint32_t tmp_raw_cons = *raw_cons; uint16_t cons, prod, cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons); struct bnxt_sw_rx_bd *rx_buf; struct rte_mbuf *mbuf; int rc = 0; rxcmp = (struct rx_pkt_cmpl *) &cpr->cp_desc_ring[cp_cons]; tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons); rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cp_cons]; if (!CMP_VALID(rxcmp1, tmp_raw_cons, cpr->cp_ring_struct)) return -EBUSY; prod = rxr->rx_prod; /* EW - GRO deferred to phase 3 */ cons = rxcmp->opaque; rx_buf = &rxr->rx_buf_ring[cons]; mbuf = rx_buf->mbuf; rte_prefetch0(mbuf); mbuf->nb_segs = 1; mbuf->next = NULL; mbuf->pkt_len = rxcmp->len; mbuf->data_len = mbuf->pkt_len; mbuf->port = rxq->port_id; mbuf->ol_flags = 0; if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) { mbuf->hash.rss = rxcmp->rss_hash; mbuf->ol_flags |= PKT_RX_RSS_HASH; } else { mbuf->hash.fdir.id = rxcmp1->cfa_code; mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; } if (rxcmp1->flags2 & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) { mbuf->vlan_tci = rxcmp1->metadata & (RX_PKT_CMPL_METADATA_VID_MASK | RX_PKT_CMPL_METADATA_DE | RX_PKT_CMPL_METADATA_PRI_MASK); mbuf->ol_flags |= PKT_RX_VLAN_PKT; } rx_buf->mbuf = NULL; if (rxcmp1->errors_v2 & RX_CMP_L2_ERRORS) { /* Re-install the mbuf back to the rx ring */ bnxt_reuse_rx_mbuf(rxr, cons, mbuf); rc = -EIO; goto next_rx; } /* * TODO: Redesign this.... * If the allocation fails, the packet does not get received. * Simply returning this will result in slowly falling behind * on the producer ring buffers. * Instead, "filling up" the producer just before ringing the * doorbell could be a better solution since it will let the * producer ring starve until memory is available again pushing * the drops into hardware and getting them out of the driver * allowing recovery to a full producer ring. * * This could also help with cache usage by preventing per-packet * calls in favour of a tight loop with the same function being called * in it. */ if (bnxt_alloc_rx_data(rxq, rxr, prod)) { RTE_LOG(ERR, PMD, "mbuf alloc failed with prod=0x%x\n", prod); rc = -ENOMEM; goto next_rx; } /* * All MBUFs are allocated with the same size under DPDK, * no optimization for rx_copy_thresh */ /* AGG buf operation is deferred */ /* EW - VLAN reception. Must compare against the ol_flags */ *rx_pkt = mbuf; next_rx: rxr->rx_prod = RING_NEXT(rxr->rx_ring_struct, prod); *raw_cons = tmp_raw_cons; return rc; }