int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct skb_frag_struct *skb_frags; struct mlx4_en_rx_desc *rx_desc; struct sk_buff *skb; int index; int nr; unsigned int length; int polled = 0; int ip_summed; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; cqe = &cq->buf[index]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { skb_frags = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor " "syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *) cqe)->syndrome); goto next; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); goto next; } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); ring->bytes += length; ring->packets++; if (likely(dev->features & NETIF_F_RXCSUM)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ if (dev->features & NETIF_F_GRO) { struct sk_buff *gro_skb = napi_get_frags(&cq->napi); if (!gro_skb) goto next; nr = mlx4_en_complete_rx_desc( priv, rx_desc, skb_frags, skb_shinfo(gro_skb)->frags, ring->page_alloc, length); if (!nr) goto next; skb_shinfo(gro_skb)->nr_frags = nr; gro_skb->len = length; gro_skb->data_len = length; gro_skb->truesize += length; gro_skb->ip_summed = CHECKSUM_UNNECESSARY; if (priv->vlgrp && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK))) vlan_gro_frags(&cq->napi, priv->vlgrp, be16_to_cpu(cqe->sl_vid)); else napi_gro_frags(&cq->napi); goto next; } /* LRO not possible, complete processing here */ ip_summed = CHECKSUM_UNNECESSARY; } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, ring->page_alloc, length); if (!skb) { priv->stats.rx_dropped++; goto next; } if (unlikely(priv->validate_loopback)) { validate_loopback(priv, skb); goto next; } skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); /* Push it up the stack */ if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK)) { vlan_hwaccel_receive_skb(skb, priv->vlgrp, be16_to_cpu(cqe->sl_vid)); } else netif_receive_skb(skb); next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ goto out; } } out: AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; }
static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe = cq->buf; u16 index; u16 new_index; u32 txbbs_skipped = 0; u32 cq_last_sav; /* index always points to the first TXBB of the last polled descriptor */ index = ring->cons & ring->size_mask; new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; if (index == new_index) return; if (!priv->port_up) return; /* * We use a two-stage loop: * - the first samples the HW-updated CQE * - the second frees TXBBs until the last sample * This lets us amortize CQE cache misses, while still polling the CQ * until is quiescent. */ cq_last_sav = mcq->cons_index; do { do { /* Skip over last polled CQE */ index = (index + ring->last_nr_txbb) & ring->size_mask; txbbs_skipped += ring->last_nr_txbb; /* Poll next CQE */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, index, !!((ring->cons + txbbs_skipped) & ring->size)); ++mcq->cons_index; } while (index != new_index); new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; } while (index != new_index); AVG_PERF_COUNTER(priv->pstats.tx_coal_avg, (u32) (mcq->cons_index - cq_last_sav)); /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* Wakeup Tx queue if this ring stopped it */ if (unlikely(ring->blocked)) { if ((u32) (ring->prod - ring->cons) <= ring->size - HEADROOM - MAX_DESC_TXBBS) { ring->blocked = 0; netif_tx_wake_queue(netdev_get_tx_queue(dev, cq->ring)); priv->port_stats.wake_queue++; } } }
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; #ifdef INET struct lro_entry *queued; #endif int index; unsigned int length; int polled = 0; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; cqe = &cq->buf[index]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { mb_list = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); if (invalid_cqe(priv, cqe)) goto next; /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length); if (!mb) { ring->errors++; goto next; } ring->bytes += length; ring->packets++; if (unlikely(priv->validate_loopback)) { validate_loopback(priv, mb); goto next; } mb->m_pkthdr.flowid = cq->ring; mb->m_flags |= M_FLOWID; mb->m_pkthdr.rcvif = dev; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); mb->m_flags |= M_VLANTAG; } if (likely(priv->rx_csum) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ #ifdef INET if (mlx4_en_can_lro(cqe->status) && (dev->if_capenable & IFCAP_LRO)) { if (ring->lro.lro_cnt != 0 && tcp_lro_rx(&ring->lro, mb, 0) == 0) goto next; } #endif /* LRO not possible, complete processing here */ INC_PERF_COUNTER(priv->pstats.lro_misses); } else { mb->m_pkthdr.csum_flags = 0; priv->port_stats.rx_chksum_none++; #ifdef INET if (priv->ip_reasm && cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4) && !mlx4_en_rx_frags(priv, ring, mb, cqe)) goto next; #endif } /* Push it up the stack */ dev->if_input(dev, mb); next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; if (++polled == budget) goto out; } /* Flush all pending IP reassembly sessions */ out: #ifdef INET mlx4_en_flush_frags(priv, ring); while ((queued = SLIST_FIRST(&ring->lro.lro_active)) != NULL) { SLIST_REMOVE_HEAD(&ring->lro.lro_active, next); tcp_lro_flush(&ring->lro, queued); } #endif AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; }
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; int tx_ind; int nr_txbb; int desc_size; int real_size; u32 index, bf_index; __be32 op_own; int lso_header_size; void *fragptr = NULL; bool bounce = false; bool send_doorbell; bool stop_queue; bool inline_ok; u8 data_offset; u32 ring_cons; bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[TX][tx_ind]; if (unlikely(!priv->port_up)) goto tx_drop; /* fetch ring->cons far ahead before needing it to avoid stall */ ring_cons = READ_ONCE(ring->cons); real_size = get_real_size(skb, shinfo, dev, &lso_header_size, &inline_ok, &fragptr); if (unlikely(!real_size)) goto tx_drop_count; /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); nr_txbb = desc_size >> LOG_TXBB_SIZE; if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (netif_msg_tx_err(priv)) en_warn(priv, "Oversized header or SG list\n"); goto tx_drop_count; } bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { u16 vlan_proto; qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); if (vlan_proto == ETH_P_8021AD) qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; else if (vlan_proto == ETH_P_8021Q) qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; else qpn_vlan.ins_vlan = 0; bf_ok = false; } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, (u32)(ring->prod - ring_cons - 1)); /* Packet is good - grab an index and transmit it */ index = ring->prod & ring->size_mask; bf_index = ring->prod; /* See if we have enough space for whole descriptor TXBB for setting * SW ownership on next descriptor; if not, use a bounce buffer. */ if (likely(index + nr_txbb <= ring->size)) tx_desc = ring->buf + (index << LOG_TXBB_SIZE); else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; bf_ok = false; } /* Save skb in tx_info ring */ tx_info = &ring->tx_info[index]; tx_info->skb = skb; tx_info->nr_txbb = nr_txbb; if (!lso_header_size) { data = &tx_desc->data; data_offset = offsetof(struct mlx4_en_tx_desc, data); } else {
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct skb_frag_struct *skb_frags; struct mlx4_en_rx_desc *rx_desc; struct sk_buff *skb; int index; unsigned int length; int polled = 0; int ip_summed; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; cqe = &cq->buf[index]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { skb_frags = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { mlx4_err(mdev, "CQE completed in error - vendor " "syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *) cqe)->syndrome); goto next; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { mlx4_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); goto next; } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); ring->bytes += length; ring->packets++; if (likely(priv->rx_csum)) { if ((cqe->status & MLX4_CQE_STATUS_IPOK) && (cqe->checksum == 0xffff)) { priv->port_stats.rx_chksum_good++; if (mdev->profile.num_lro && !mlx4_en_lro_rx(priv, ring, rx_desc, skb_frags, length, cqe)) goto next; /* LRO not possible, complete processing here */ ip_summed = CHECKSUM_UNNECESSARY; INC_PERF_COUNTER(priv->pstats.lro_misses); } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, ring->page_alloc, length); if (!skb) { priv->stats.rx_dropped++; goto next; } skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); /* Push it up the stack */ if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK)) { vlan_hwaccel_receive_skb(skb, priv->vlgrp, be16_to_cpu(cqe->sl_vid)); } else netif_receive_skb(skb); dev->last_rx = jiffies; next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ if (mdev->profile.num_lro) mlx4_en_lro_flush(priv, ring, 0); goto out; } } /* If CQ is empty flush all LRO sessions unconditionally */ if (mdev->profile.num_lro) mlx4_en_lro_flush(priv, ring, 1); out: AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ if (unlikely(!ring->full)) { mlx4_en_copy_desc(priv, ring, ring->cons - polled, ring->prod - polled, polled); mlx4_en_fill_rx_buf(dev, ring); } mlx4_en_update_rx_prod_db(ring); return polled; }