int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx4_ib_cq *cq = to_mcq(ibcq); struct mlx4_ib_qp *cur_qp = NULL; unsigned long flags; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); for (npolled = 0; npolled < num_entries; ++npolled) { err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); if (err) break; } if (npolled) mlx4_cq_set_ci(&cq->mcq); spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) return npolled; else return err; }
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) { u32 prod_index; int nfreed = 0; struct mlx4_cqe *cqe, *dest; u8 owner_bit; /* * First we need to find the current producer index, so we * know where to start cleaning from. It doesn't matter if HW * adds new entries after this loop -- the QP we're worried * about is already in RESET, so the new entries won't come * from our QP and therefore don't need to be checked. */ for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index) if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) break; /* * Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; memcpy(dest, cqe, sizeof *cqe); dest->owner_sr_opcode = owner_bit | (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); } } if (nfreed) { cq->mcq.cons_index += nfreed; /* * Make sure update of buffer contents is done before * updating consumer index. */ wmb(); mlx4_cq_set_ci(&cq->mcq); } }
static int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index, stamp_index; u32 txbbs_skipped = 0; #ifndef CONFIG_WQE_FORMAT_1 u32 txbbs_stamp = 0; #endif u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; u64 timestamp = 0; int done = 0; if (!priv->port_up) return 0; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; stamp_index = ring_index; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { /* * make sure we read the CQE after we read the * ownership bit */ rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n", ((struct mlx4_err_cqe *)cqe)-> vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); } /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size), timestamp); #ifndef CONFIG_WQE_FORMAT_1 mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring->cons + txbbs_stamp) & ring->size)); stamp_index = ring_index; txbbs_stamp = txbbs_skipped; #endif packets++; bytes += ring->tx_info[ring_index].nr_bytes; } while (ring_index != new_index); ++cons_index; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* Wakeup Tx queue if it was stopped and ring is not full */ if (unlikely(ring->blocked) && (ring->prod - ring->cons) <= ring->full_size) { ring->blocked = 0; #ifdef CONFIG_RATELIMIT if (cq->ring < priv->native_tx_ring_num) { if (atomic_fetchadd_int(&priv->blocked, -1) == 1) atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE); priv->port_stats.wake_queue++; } #else if (atomic_fetchadd_int(&priv->blocked, -1) == 1) atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE); priv->port_stats.wake_queue++; #endif ring->wake_queue++; } return done; }
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct skb_frag_struct *skb_frags; struct mlx4_en_rx_desc *rx_desc; struct sk_buff *skb; int index; int nr; unsigned int length; int polled = 0; int ip_summed; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; cqe = &cq->buf[index]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { skb_frags = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor " "syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *) cqe)->syndrome); goto next; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); goto next; } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); ring->bytes += length; ring->packets++; if (likely(dev->features & NETIF_F_RXCSUM)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ if (dev->features & NETIF_F_GRO) { struct sk_buff *gro_skb = napi_get_frags(&cq->napi); if (!gro_skb) goto next; nr = mlx4_en_complete_rx_desc( priv, rx_desc, skb_frags, skb_shinfo(gro_skb)->frags, ring->page_alloc, length); if (!nr) goto next; skb_shinfo(gro_skb)->nr_frags = nr; gro_skb->len = length; gro_skb->data_len = length; gro_skb->truesize += length; gro_skb->ip_summed = CHECKSUM_UNNECESSARY; if (priv->vlgrp && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK))) vlan_gro_frags(&cq->napi, priv->vlgrp, be16_to_cpu(cqe->sl_vid)); else napi_gro_frags(&cq->napi); goto next; } /* LRO not possible, complete processing here */ ip_summed = CHECKSUM_UNNECESSARY; } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, ring->page_alloc, length); if (!skb) { priv->stats.rx_dropped++; goto next; } if (unlikely(priv->validate_loopback)) { validate_loopback(priv, skb); goto next; } skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); /* Push it up the stack */ if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK)) { vlan_hwaccel_receive_skb(skb, priv->vlgrp, be16_to_cpu(cqe->sl_vid)); } else netif_receive_skb(skb); next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ goto out; } } out: AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; }
static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe = cq->buf; u16 index; u16 new_index; u32 txbbs_skipped = 0; u32 cq_last_sav; /* index always points to the first TXBB of the last polled descriptor */ index = ring->cons & ring->size_mask; new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; if (index == new_index) return; if (!priv->port_up) return; /* * We use a two-stage loop: * - the first samples the HW-updated CQE * - the second frees TXBBs until the last sample * This lets us amortize CQE cache misses, while still polling the CQ * until is quiescent. */ cq_last_sav = mcq->cons_index; do { do { /* Skip over last polled CQE */ index = (index + ring->last_nr_txbb) & ring->size_mask; txbbs_skipped += ring->last_nr_txbb; /* Poll next CQE */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, index, !!((ring->cons + txbbs_skipped) & ring->size)); ++mcq->cons_index; } while (index != new_index); new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; } while (index != new_index); AVG_PERF_COUNTER(priv->pstats.tx_coal_avg, (u32) (mcq->cons_index - cq_last_sav)); /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* Wakeup Tx queue if this ring stopped it */ if (unlikely(ring->blocked)) { if ((u32) (ring->prod - ring->cons) <= ring->size - HEADROOM - MAX_DESC_TXBBS) { ring->blocked = 0; netif_tx_wake_queue(netdev_get_tx_queue(dev, cq->ring)); priv->port_stats.wake_queue++; } } }
static bool mlx4_en_process_tx_cq(struct ether *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; uint16_t index; uint16_t new_index, ring_index, stamp_index; uint32_t txbbs_skipped = 0; uint32_t txbbs_stamp = 0; uint32_t cons_index = mcq->cons_index; int size = cq->size; uint32_t size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; uint32_t packets = 0; uint32_t bytes = 0; int factor = priv->cqe_factor; uint64_t timestamp = 0; int done = 0; int budget = priv->tx_work_limit; uint32_t last_nr_txbb; uint32_t ring_cons; if (!priv->port_up) return true; #if 0 // AKAROS_PORT netdev_txq_bql_complete_prefetchw(ring->tx_queue); #endif index = cons_index & size_mask; cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb); ring_cons = ACCESS_ONCE(ring->cons); ring_index = ring_cons & size_mask; stamp_index = ring_index; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size) && (done < budget)) { /* * make sure we read the CQE after we read the * ownership bit */ bus_rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe; en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n", cqe_err->vendor_err_syndrome, cqe_err->syndrome); } /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += last_nr_txbb; ring_index = (ring_index + last_nr_txbb) & size_mask; if (ring->tx_info[ring_index].ts_requested) timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring_cons + txbbs_skipped) & ring->size), timestamp); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring_cons + txbbs_stamp) & ring->size)); stamp_index = ring_index; txbbs_stamp = txbbs_skipped; packets++; bytes += ring->tx_info[ring_index].nr_bytes; } while ((++done < budget) && (ring_index != new_index)); ++cons_index; index = cons_index & size_mask; cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); /* we want to dirty this cache line once */ ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; #if 0 // AKAROS_PORT netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* * Wakeup Tx queue if this stopped, and at least 1 packet * was completed */ if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } #endif return done < budget; }
int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index; u32 txbbs_skipped = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; int factor = priv->cqe_factor; int done = 0; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size) && done < budget) { /* * make sure we read the CQE after we read the * ownership bit */ rmb(); /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size)); } while ((++done < budget) && ring_index != new_index); ++cons_index; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; atomic_sub(txbbs_skipped, &ring->inflight); /* Wakeup Tx queue if this ring stopped it */ if (unlikely(ring->blocked && txbbs_skipped > 0)) { ring->blocked = 0; #ifndef __VMKERNEL_MLX4_EN_TX_HASH__ netif_tx_wake_queue(netdev_get_tx_queue(dev, cq->ring)); #else netif_tx_wake_queue(netdev_get_tx_queue(dev, ring->reported_index)); #endif /* NOT __VMKERNEL_MLX4_EN_TX_HASH__ */ priv->port_stats.wake_queue++; } return done; }
u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u64 timestamp, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_rx_alloc frame = { .page = tx_info->page, .dma = tx_info->map0_dma, }; if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { dma_unmap_page(priv->ddev, tx_info->map0_dma, PAGE_SIZE, priv->dma_dir); put_page(tx_info->page); } return tx_info->nr_txbb; } int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { struct mlx4_en_priv *priv = netdev_priv(dev); int cnt = 0; /* Skip last polled descriptor */ ring->cons += ring->last_nr_txbb; en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n", ring->cons, ring->prod); if ((u32) (ring->prod - ring->cons) > ring->size) { if (netif_msg_tx_err(priv)) en_warn(priv, "Tx consumer passed producer!\n"); return 0; } while (ring->cons != ring->prod) { ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, 0, 0 /* Non-NAPI caller */); ring->cons += ring->last_nr_txbb; cnt++; } if (ring->tx_queue) netdev_tx_reset_queue(ring->tx_queue); if (cnt) en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); return cnt; } bool mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int napi_budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring]; struct mlx4_cqe *cqe; u16 index, ring_index, stamp_index; u32 txbbs_skipped = 0; u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; int done = 0; int budget = priv->tx_work_limit; u32 last_nr_txbb; u32 ring_cons; if (unlikely(!priv->port_up)) return true; netdev_txq_bql_complete_prefetchw(ring->tx_queue); index = cons_index & size_mask; cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; last_nr_txbb = READ_ONCE(ring->last_nr_txbb); ring_cons = READ_ONCE(ring->cons); ring_index = ring_cons & size_mask; stamp_index = ring_index; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size) && (done < budget)) { u16 new_index; /* * make sure we read the CQE after we read the * ownership bit */ dma_rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe; en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n", cqe_err->vendor_err_syndrome, cqe_err->syndrome); } /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { u64 timestamp = 0; txbbs_skipped += last_nr_txbb; ring_index = (ring_index + last_nr_txbb) & size_mask; if (unlikely(ring->tx_info[ring_index].ts_requested)) timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, timestamp, napi_budget); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring_cons + txbbs_stamp) & ring->size)); stamp_index = ring_index; txbbs_stamp = txbbs_skipped; packets++; bytes += ring->tx_info[ring_index].nr_bytes; } while ((++done < budget) && (ring_index != new_index)); ++cons_index; index = cons_index & size_mask; cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); /* we want to dirty this cache line once */ WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb); WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped); if (cq->type == TX_XDP) return done < budget; netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* Wakeup Tx queue if this stopped, and ring is not full. */ if (netif_tx_queue_stopped(ring->tx_queue) && !mlx4_en_is_tx_ring_full(ring)) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } return done < budget; } void mlx4_en_tx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); if (likely(priv->port_up)) napi_schedule_irqoff(&cq->napi); else mlx4_en_arm_cq(priv, cq); } /* TX CQ polling - called by NAPI */ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) { struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); bool clean_complete; clean_complete = mlx4_en_process_tx_cq(dev, cq, budget); if (!clean_complete) return budget; napi_complete(napi); mlx4_en_arm_cq(priv, cq); return 0; } static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 index, unsigned int desc_size) { u32 copy = (ring->size - index) << LOG_TXBB_SIZE; int i; for (i = desc_size - copy - 4; i >= 0; i -= 4) { if ((i & (TXBB_SIZE - 1)) == 0) wmb(); *((u32 *) (ring->buf + i)) = *((u32 *) (ring->bounce_buf + copy + i)); } for (i = copy - 4; i >= 4 ; i -= 4) { if ((i & (TXBB_SIZE - 1)) == 0) wmb(); *((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) = *((u32 *) (ring->bounce_buf + i)); } /* Return real descriptor location */ return ring->buf + (index << LOG_TXBB_SIZE); }
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; #ifdef INET struct lro_entry *queued; #endif int index; unsigned int length; int polled = 0; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; cqe = &cq->buf[index]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { mb_list = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); if (invalid_cqe(priv, cqe)) goto next; /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length); if (!mb) { ring->errors++; goto next; } ring->bytes += length; ring->packets++; if (unlikely(priv->validate_loopback)) { validate_loopback(priv, mb); goto next; } mb->m_pkthdr.flowid = cq->ring; mb->m_flags |= M_FLOWID; mb->m_pkthdr.rcvif = dev; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); mb->m_flags |= M_VLANTAG; } if (likely(priv->rx_csum) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ #ifdef INET if (mlx4_en_can_lro(cqe->status) && (dev->if_capenable & IFCAP_LRO)) { if (ring->lro.lro_cnt != 0 && tcp_lro_rx(&ring->lro, mb, 0) == 0) goto next; } #endif /* LRO not possible, complete processing here */ INC_PERF_COUNTER(priv->pstats.lro_misses); } else { mb->m_pkthdr.csum_flags = 0; priv->port_stats.rx_chksum_none++; #ifdef INET if (priv->ip_reasm && cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4) && !mlx4_en_rx_frags(priv, ring, mb, cqe)) goto next; #endif } /* Push it up the stack */ dev->if_input(dev, mb); next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; if (++polled == budget) goto out; } /* Flush all pending IP reassembly sessions */ out: #ifdef INET mlx4_en_flush_frags(priv, ring); while ((queued = SLIST_FIRST(&ring->lro.lro_active)) != NULL) { SLIST_REMOVE_HEAD(&ring->lro.lro_active, next); tcp_lro_flush(&ring->lro, queued); } #endif AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; }
static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index; u32 txbbs_skipped = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; if (!priv->port_up) return; index = cons_index & size_mask; cqe = &buf[index]; ring_index = ring->cons & size_mask; /* */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { /* */ rmb(); /* */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size)); } while (ring_index != new_index); ++cons_index; index = cons_index & size_mask; cqe = &buf[index]; } /* */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* */ if (unlikely(ring->blocked)) { if ((u32) (ring->prod - ring->cons) <= ring->size - HEADROOM - MAX_DESC_TXBBS) { ring->blocked = 0; netif_tx_wake_queue(netdev_get_tx_queue(dev, cq->ring)); priv->port_stats.wake_queue++; } } }
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; struct skb_frag_struct *skb_frags; struct mlx4_en_rx_desc *rx_desc; struct sk_buff *skb; int index; unsigned int length; int polled = 0; int ip_summed; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ index = cq->mcq.cons_index & ring->size_mask; cqe = &cq->buf[index]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { skb_frags = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { mlx4_err(mdev, "CQE completed in error - vendor " "syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *) cqe)->syndrome); goto next; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { mlx4_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); goto next; } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); ring->bytes += length; ring->packets++; if (likely(priv->rx_csum)) { if ((cqe->status & MLX4_CQE_STATUS_IPOK) && (cqe->checksum == 0xffff)) { priv->port_stats.rx_chksum_good++; if (mdev->profile.num_lro && !mlx4_en_lro_rx(priv, ring, rx_desc, skb_frags, length, cqe)) goto next; /* LRO not possible, complete processing here */ ip_summed = CHECKSUM_UNNECESSARY; INC_PERF_COUNTER(priv->pstats.lro_misses); } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } } else { ip_summed = CHECKSUM_NONE; priv->port_stats.rx_chksum_none++; } skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags, ring->page_alloc, length); if (!skb) { priv->stats.rx_dropped++; goto next; } skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); /* Push it up the stack */ if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK)) { vlan_hwaccel_receive_skb(skb, priv->vlgrp, be16_to_cpu(cqe->sl_vid)); } else netif_receive_skb(skb); dev->last_rx = jiffies; next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = &cq->buf[index]; if (++polled == budget) { /* We are here because we reached the NAPI budget - * flush only pending LRO sessions */ if (mdev->profile.num_lro) mlx4_en_lro_flush(priv, ring, 0); goto out; } } /* If CQ is empty flush all LRO sessions unconditionally */ if (mdev->profile.num_lro) mlx4_en_lro_flush(priv, ring, 1); out: AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = cq->mcq.cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ if (unlikely(!ring->full)) { mlx4_en_copy_desc(priv, ring, ring->cons - polled, ring->prod - polled, polled); mlx4_en_fill_rx_buf(dev, ring); } mlx4_en_update_rx_prod_db(ring); return polled; }
static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index; u32 txbbs_skipped = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; if (!priv->port_up) return; index = cons_index & size_mask; cqe = &buf[index]; ring_index = ring->cons & size_mask; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { /* * make sure we read the CQE after we read the * ownership bit */ rmb(); /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size)); } while (ring_index != new_index); ++cons_index; index = cons_index & size_mask; cqe = &buf[index]; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* Wakeup Tx queue if this ring stopped it */ if (unlikely(ring->blocked)) { if ((u32) (ring->prod - ring->cons) <= ring->size - HEADROOM - MAX_DESC_TXBBS) { ring->blocked = 0; netif_tx_wake_queue(netdev_get_tx_queue(dev, cq->ring)); priv->port_stats.wake_queue++; } } }