static int iwl_pcie_gen2_tx_add_frags(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_tfh_tfd *tfd, struct iwl_cmd_meta *out_meta) { int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr_t tb_phys; int tb_idx; if (!skb_frag_size(frag)) continue; tb_phys = skb_frag_dma_map(trans->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) return -ENOMEM; tb_idx = iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, skb_frag_size(frag)); trace_iwlwifi_dev_tx_tb(trans->dev, skb, skb_frag_address(frag), skb_frag_size(frag)); if (tb_idx < 0) return tb_idx; out_meta->tbs |= BIT(tb_idx); } return 0; }
static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, skb_frag_t *frag) { st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { st->dma_flags = 0; st->unmap_len = skb_frag_size(frag); st->in_len = skb_frag_size(frag); st->dma_addr = st->unmap_addr; return 0; } return -ENOMEM; }
static int map_skb(struct device *dev, const struct sk_buff *skb, struct mpodp_tx *tx) { const skb_frag_t *fp, *end; const struct skb_shared_info *si; int count = 1; dma_addr_t handler; sg_init_table(tx->sg, MAX_SKB_FRAGS + 1); handler = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(dev, handler)) goto out_err; sg_dma_address(&tx->sg[0]) = handler; sg_dma_len(&tx->sg[0]) = skb_headlen(skb); si = skb_shinfo(skb); end = &si->frags[si->nr_frags]; for (fp = si->frags; fp < end; fp++, count++) { handler = skb_frag_dma_map(dev, fp, 0, skb_frag_size(fp), DMA_TO_DEVICE); if (dma_mapping_error(dev, handler)) goto unwind; sg_dma_address(&tx->sg[count]) = handler; sg_dma_len(&tx->sg[count]) = skb_frag_size(fp); } sg_mark_end(&tx->sg[count - 1]); tx->sg_len = count; return 0; unwind: while (fp-- > si->frags) dma_unmap_page(dev, sg_dma_address(&tx->sg[--count]), skb_frag_size(fp), DMA_TO_DEVICE); dma_unmap_single(dev, sg_dma_address(&tx->sg[0]), skb_headlen(skb), DMA_TO_DEVICE); out_err: return -ENOMEM; }
static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; u8 opcode = MLX5_OPCODE_SEND; dma_addr_t dma_addr = 0; bool bf = false; u16 headlen; u16 ds_cnt; u16 ihs; int i; memset(wqe, 0, sizeof(*wqe)); if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; else sq->stats.csum_offload_none++; if (sq->cc != sq->prev_cc) { sq->prev_cc = sq->cc; sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; } if (skb_is_gso(skb)) { u32 payload_len; eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); opcode = MLX5_OPCODE_LSO; ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); payload_len = skb->len - ihs; MLX5E_TX_SKB_CB(skb)->num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { bf = sq->bf_budget && !skb->xmit_more && !skb_shinfo(skb)->nr_frags; ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); MLX5E_TX_SKB_CB(skb)->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs); skb_pull_inline(skb, ihs); eseg->inline_hdr_sz = cpu_to_be16(ihs); ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start), MLX5_SEND_WQE_DS); dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; MLX5E_TX_SKB_CB(skb)->num_dma = 0; headlen = skb_headlen(skb); if (headlen) { dma_addr = dma_map_single(sq->pdev, skb->data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); mlx5e_dma_push(sq, dma_addr, headlen); MLX5E_TX_SKB_CB(skb)->num_dma++; dseg++; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz); MLX5E_TX_SKB_CB(skb)->num_dma++; dseg++; } ds_cnt += MLX5E_TX_SKB_CB(skb)->num_dma; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->skb[pi] = skb; MLX5E_TX_SKB_CB(skb)->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += MLX5E_TX_SKB_CB(skb)->num_wqebbs; netdev_tx_sent_queue(sq->txq, MLX5E_TX_SKB_CB(skb)->num_bytes); if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { netif_tx_stop_queue(sq->txq); sq->stats.stopped++; } if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { int bf_sz = 0; if (bf && sq->uar_bf_map) bf_sz = MLX5E_TX_SKB_CB(skb)->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; mlx5e_tx_notify_hw(sq, wqe, bf_sz); } /* fill sq edge with nops to avoid wqe wrap around */ while ((sq->pc & wq->sz_m1) > sq->edge) mlx5e_send_nop(sq, false); sq->bf_budget = bf ? sq->bf_budget - 1 : 0; sq->stats.packets++; return NETDEV_TX_OK; dma_unmap_wqe_err: sq->stats.dropped++; mlx5e_dma_unmap_wqe_err(sq, skb); dev_kfree_skb_any(skb); return NETDEV_TX_OK; }
/** * nfp_net_tx() - Main transmit entry point * @skb: SKB to transmit * @netdev: netdev structure * * Return: NETDEV_TX_OK on success. */ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); const struct skb_frag_struct *frag; struct nfp_net_r_vector *r_vec; struct nfp_net_tx_desc *txd, txdg; struct nfp_net_tx_buf *txbuf; struct nfp_net_tx_ring *tx_ring; struct netdev_queue *nd_q; dma_addr_t dma_addr; unsigned int fsize; int f, nr_frags; int wr_idx; u16 qidx; qidx = skb_get_queue_mapping(skb); tx_ring = &nn->tx_rings[qidx]; r_vec = tx_ring->r_vec; nd_q = netdev_get_tx_queue(nn->netdev, qidx); nr_frags = skb_shinfo(skb)->nr_frags; if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n", qidx, tx_ring->wr_p, tx_ring->rd_p); netif_tx_stop_queue(nd_q); u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_busy++; u64_stats_update_end(&r_vec->tx_sync); return NETDEV_TX_BUSY; } /* Start with the head skbuf */ dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_free; wr_idx = tx_ring->wr_p % tx_ring->cnt; /* Stash the soft descriptor of the head then initialize it */ txbuf = &tx_ring->txbufs[wr_idx]; txbuf->skb = skb; txbuf->dma_addr = dma_addr; txbuf->fidx = -1; txbuf->pkt_cnt = 1; txbuf->real_len = skb->len; /* Build TX descriptor */ txd = &tx_ring->txds[wr_idx]; txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0; txd->dma_len = cpu_to_le16(skb_headlen(skb)); nfp_desc_set_dma_addr(txd, dma_addr); txd->data_len = cpu_to_le16(skb->len); txd->flags = 0; txd->mss = 0; txd->l4_offset = 0; nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb); nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb); if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { txd->flags |= PCIE_DESC_TX_VLAN; txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); } /* Gather DMA */ if (nr_frags > 0) { /* all descs must match except for in addr, length and eop */ txdg = *txd; for (f = 0; f < nr_frags; f++) { frag = &skb_shinfo(skb)->frags[f]; fsize = skb_frag_size(frag); dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0, fsize, DMA_TO_DEVICE); if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_unmap; wr_idx = (wr_idx + 1) % tx_ring->cnt; tx_ring->txbufs[wr_idx].skb = skb; tx_ring->txbufs[wr_idx].dma_addr = dma_addr; tx_ring->txbufs[wr_idx].fidx = f; txd = &tx_ring->txds[wr_idx]; *txd = txdg; txd->dma_len = cpu_to_le16(fsize); nfp_desc_set_dma_addr(txd, dma_addr); txd->offset_eop = (f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0; } u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_gather++; u64_stats_update_end(&r_vec->tx_sync); } netdev_tx_sent_queue(nd_q, txbuf->real_len); tx_ring->wr_p += nr_frags + 1; if (nfp_net_tx_ring_should_stop(tx_ring)) nfp_net_tx_ring_stop(nd_q, tx_ring); tx_ring->wr_ptr_add += nr_frags + 1; if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { /* force memory write before we let HW know */ wmb(); nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); tx_ring->wr_ptr_add = 0; } skb_tx_timestamp(skb); return NETDEV_TX_OK; err_unmap: --f; while (f >= 0) { frag = &skb_shinfo(skb)->frags[f]; dma_unmap_page(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, skb_frag_size(frag), DMA_TO_DEVICE); tx_ring->txbufs[wr_idx].skb = NULL; tx_ring->txbufs[wr_idx].dma_addr = 0; tx_ring->txbufs[wr_idx].fidx = -2; wr_idx = wr_idx - 1; if (wr_idx < 0) wr_idx += tx_ring->cnt; } dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, skb_headlen(skb), DMA_TO_DEVICE); tx_ring->txbufs[wr_idx].skb = NULL; tx_ring->txbufs[wr_idx].dma_addr = 0; tx_ring->txbufs[wr_idx].fidx = -2; err_free: nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n"); u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_errors++; u64_stats_update_end(&r_vec->tx_sync); dev_kfree_skb_any(skb); return NETDEV_TX_OK; }
static netdev_tx_t greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) { struct greth_private *greth = netdev_priv(dev); struct greth_bd *bdp; u32 status, dma_addr; int curr_tx, nr_frags, i, err = NETDEV_TX_OK; unsigned long flags; u16 tx_last; nr_frags = skb_shinfo(skb)->nr_frags; tx_last = greth->tx_last; rmb(); /* tx_last is updated by the poll task */ if (greth_num_free_bds(tx_last, greth->tx_next) < nr_frags + 1) { netif_stop_queue(dev); err = NETDEV_TX_BUSY; goto out; } if (netif_msg_pktdata(greth)) greth_print_tx_packet(skb); if (unlikely(skb->len > MAX_FRAME_SIZE)) { dev->stats.tx_errors++; goto out; } /* Save skb pointer. */ greth->tx_skbuff[greth->tx_next] = skb; /* Linear buf */ if (nr_frags != 0) status = GRETH_TXBD_MORE; else status = GRETH_BD_IE; if (skb->ip_summed == CHECKSUM_PARTIAL) status |= GRETH_TXBD_CSALL; status |= skb_headlen(skb) & GRETH_BD_LEN; if (greth->tx_next == GRETH_TXBD_NUM_MASK) status |= GRETH_BD_WR; bdp = greth->tx_bd_base + greth->tx_next; greth_write_bd(&bdp->stat, status); dma_addr = dma_map_single(greth->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(greth->dev, dma_addr))) goto map_error; greth_write_bd(&bdp->addr, dma_addr); curr_tx = NEXT_TX(greth->tx_next); /* Frags */ for (i = 0; i < nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; greth->tx_skbuff[curr_tx] = NULL; bdp = greth->tx_bd_base + curr_tx; status = GRETH_BD_EN; if (skb->ip_summed == CHECKSUM_PARTIAL) status |= GRETH_TXBD_CSALL; status |= skb_frag_size(frag) & GRETH_BD_LEN; /* Wrap around descriptor ring */ if (curr_tx == GRETH_TXBD_NUM_MASK) status |= GRETH_BD_WR; /* More fragments left */ if (i < nr_frags - 1) status |= GRETH_TXBD_MORE; else status |= GRETH_BD_IE; /* enable IRQ on last fragment */ greth_write_bd(&bdp->stat, status); dma_addr = skb_frag_dma_map(greth->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(greth->dev, dma_addr))) goto frag_map_error; greth_write_bd(&bdp->addr, dma_addr); curr_tx = NEXT_TX(curr_tx); } wmb(); /* Enable the descriptor chain by enabling the first descriptor */ bdp = greth->tx_bd_base + greth->tx_next; greth_write_bd(&bdp->stat, greth_read_bd(&bdp->stat) | GRETH_BD_EN); spin_lock_irqsave(&greth->devlock, flags); /*save from poll/irq*/ greth->tx_next = curr_tx; greth_enable_tx_and_irq(greth); spin_unlock_irqrestore(&greth->devlock, flags); return NETDEV_TX_OK; frag_map_error: /* Unmap SKB mappings that succeeded and disable descriptor */ for (i = 0; greth->tx_next + i != curr_tx; i++) { bdp = greth->tx_bd_base + greth->tx_next + i; dma_unmap_single(greth->dev, greth_read_bd(&bdp->addr), greth_read_bd(&bdp->stat) & GRETH_BD_LEN, DMA_TO_DEVICE); greth_write_bd(&bdp->stat, 0); } map_error: if (net_ratelimit()) dev_warn(greth->dev, "Could not create TX DMA mapping\n"); dev_kfree_skb(skb); out: return err; }
static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, struct bgmac_dma_ring *ring, struct sk_buff *skb) { struct device *dma_dev = bgmac->core->dma_dev; struct net_device *net_dev = bgmac->net_dev; int index = ring->end % BGMAC_TX_RING_SLOTS; struct bgmac_slot_info *slot = &ring->slots[index]; int nr_frags; u32 flags; int i; if (skb->len > BGMAC_DESC_CTL1_LEN) { bgmac_err(bgmac, "Too long skb (%d)\n", skb->len); goto err_drop; } if (skb->ip_summed == CHECKSUM_PARTIAL) skb_checksum_help(skb); nr_frags = skb_shinfo(skb)->nr_frags; /* ring->end - ring->start will return the number of valid slots, * even when ring->end overflows */ if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) { bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); netif_stop_queue(net_dev); return NETDEV_TX_BUSY; } slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) goto err_dma_head; flags = BGMAC_DESC_CTL0_SOF; if (!nr_frags) flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags); flags = 0; for (i = 0; i < nr_frags; i++) { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; int len = skb_frag_size(frag); index = (index + 1) % BGMAC_TX_RING_SLOTS; slot = &ring->slots[index]; slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0, len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) goto err_dma; if (i == nr_frags - 1) flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags); } slot->skb = skb; ring->end += nr_frags + 1; netdev_sent_queue(net_dev, skb->len); wmb(); /* Increase ring->end to point empty slot. We tell hardware the first * slot it should *not* read. */ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, ring->index_base + (ring->end % BGMAC_TX_RING_SLOTS) * sizeof(struct bgmac_dma_desc)); if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8) netif_stop_queue(net_dev); return NETDEV_TX_OK; err_dma: dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb), DMA_TO_DEVICE); while (i > 0) { int index = (ring->end + i) % BGMAC_TX_RING_SLOTS; struct bgmac_slot_info *slot = &ring->slots[index]; u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1); int len = ctl1 & BGMAC_DESC_CTL1_LEN; dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE); } err_dma_head: bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", ring->mmio_base); err_drop: dev_kfree_skb(skb); return NETDEV_TX_OK; }
static inline int dma_xmit(struct sk_buff *skb, struct net_device *dev, END_DEVICE *ei_local, int gmac_no) { struct netdev_queue *txq; dma_addr_t frag_addr; u32 frag_size, nr_desc; u32 txd_info3, txd_info4; #if defined (CONFIG_RAETH_SG_DMA_TX) u32 i, nr_frags; const skb_frag_t *tx_frag; const struct skb_shared_info *shinfo; #else #define nr_frags 0 #endif #if defined (CONFIG_RA_HW_NAT) || defined (CONFIG_RA_HW_NAT_MODULE) if (ra_sw_nat_hook_tx != NULL) { #if defined (CONFIG_RA_HW_NAT_WIFI) || defined (CONFIG_RA_HW_NAT_PCI) if (IS_DPORT_PPE_VALID(skb)) gmac_no = PSE_PORT_PPE; else #endif if (ra_sw_nat_hook_tx(skb, gmac_no) == 0) { dev_kfree_skb(skb); return NETDEV_TX_OK; } } #endif txd_info3 = TX3_QDMA_SWC; if (gmac_no != PSE_PORT_PPE) { u32 QID = M2Q_table[(skb->mark & 0x3f)]; if (QID < 8 && M2Q_wan_lan) { #if defined (CONFIG_PSEUDO_SUPPORT) if (gmac_no == PSE_PORT_GMAC2) QID += 8; #elif defined (CONFIG_RAETH_HW_VLAN_TX) if ((skb_vlan_tag_get(skb) & VLAN_VID_MASK) > 1) QID += 8; #endif } txd_info3 |= TX3_QDMA_QID(QID); } txd_info4 = TX4_DMA_FPORT(gmac_no); #if defined (CONFIG_RAETH_CHECKSUM_OFFLOAD) if (skb->ip_summed == CHECKSUM_PARTIAL) txd_info4 |= TX4_DMA_TUI_CO(7); #endif #if defined (CONFIG_RAETH_HW_VLAN_TX) if (skb_vlan_tag_present(skb)) txd_info4 |= (0x10000 | skb_vlan_tag_get(skb)); #endif #if defined (CONFIG_RAETH_SG_DMA_TX) shinfo = skb_shinfo(skb); #endif #if defined (CONFIG_RAETH_TSO) /* fill MSS info in tcp checksum field */ if (shinfo->gso_size) { u32 hdr_len; if (!(shinfo->gso_type & (SKB_GSO_TCPV4|SKB_GSO_TCPV6))) { dev_kfree_skb(skb); return NETDEV_TX_OK; } if (skb_header_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { dev_kfree_skb(skb); return NETDEV_TX_OK; } } hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb)); if (hdr_len >= skb->len) { dev_kfree_skb(skb); return NETDEV_TX_OK; } tcp_hdr(skb)->check = htons(shinfo->gso_size); txd_info4 |= TX4_DMA_TSO; } #endif nr_desc = DIV_ROUND_UP(skb_headlen(skb), TXD_MAX_SEG_SIZE); #if defined (CONFIG_RAETH_SG_DMA_TX) nr_frags = (u32)shinfo->nr_frags; for (i = 0; i < nr_frags; i++) { tx_frag = &shinfo->frags[i]; nr_desc += DIV_ROUND_UP(skb_frag_size(tx_frag), TXD_MAX_SEG_SIZE); } #endif txq = netdev_get_tx_queue(dev, 0); /* flush main skb part before spin_lock() */ frag_size = (u32)skb_headlen(skb); frag_addr = dma_map_single(NULL, skb->data, frag_size, DMA_TO_DEVICE); /* protect TX ring access (from eth2/eth3 queues) */ spin_lock(&ei_local->page_lock); /* check nr_desc+2 free descriptors (2 need to prevent head/tail overlap) */ if (ei_local->txd_pool_free_num < (nr_desc+2)) { spin_unlock(&ei_local->page_lock); netif_tx_stop_queue(txq); #if defined (CONFIG_RAETH_DEBUG) if (net_ratelimit()) printk("%s: QDMA TX pool is run out! (GMAC: %d)\n", RAETH_DEV_NAME, gmac_no); #endif return NETDEV_TX_BUSY; } qdma_write_skb_fragment(ei_local, frag_addr, frag_size, txd_info3, txd_info4, skb, nr_frags == 0); #if defined (CONFIG_RAETH_SG_DMA_TX) for (i = 0; i < nr_frags; i++) { tx_frag = &shinfo->frags[i]; frag_size = skb_frag_size(tx_frag); frag_addr = skb_frag_dma_map(NULL, tx_frag, 0, frag_size, DMA_TO_DEVICE); qdma_write_skb_fragment(ei_local, frag_addr, frag_size, txd_info3, txd_info4, skb, i == nr_frags - 1); } #endif #if defined (CONFIG_RAETH_BQL) netdev_tx_sent_queue(txq, skb->len); #endif #if !defined (CONFIG_RAETH_BQL) || !defined (CONFIG_SMP) /* smp_mb() already inlined in netdev_tx_sent_queue */ wmb(); #endif /* kick the QDMA TX */ sysRegWrite(QTX_CTX_PTR, (u32)get_txd_ptr_phy(ei_local, ei_local->txd_last_idx)); spin_unlock(&ei_local->page_lock); return NETDEV_TX_OK; }
static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct c2_port *c2_port = netdev_priv(netdev); struct c2_dev *c2dev = c2_port->c2dev; struct c2_ring *tx_ring = &c2_port->tx_ring; struct c2_element *elem; dma_addr_t mapaddr; u32 maplen; unsigned long flags; unsigned int i; spin_lock_irqsave(&c2_port->tx_lock, flags); if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) { netif_stop_queue(netdev); spin_unlock_irqrestore(&c2_port->tx_lock, flags); pr_debug("%s: Tx ring full when queue awake!\n", netdev->name); return NETDEV_TX_BUSY; } maplen = skb_headlen(skb); mapaddr = pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE); elem = tx_ring->to_use; elem->skb = skb; elem->mapaddr = mapaddr; elem->maplen = maplen; __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR); __raw_writew((__force u16) cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN); __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS); netdev->stats.tx_packets++; netdev->stats.tx_bytes += maplen; if (skb_shinfo(skb)->nr_frags) { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; maplen = skb_frag_size(frag); mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag, 0, maplen, DMA_TO_DEVICE); elem = elem->next; elem->skb = NULL; elem->mapaddr = mapaddr; elem->maplen = maplen; __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR); __raw_writew((__force u16) cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN); __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS); netdev->stats.tx_packets++; netdev->stats.tx_bytes += maplen; } } tx_ring->to_use = elem->next; c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1); if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) { netif_stop_queue(netdev); if (netif_msg_tx_queued(c2_port)) pr_debug("%s: transmit queue full\n", netdev->name); } spin_unlock_irqrestore(&c2_port->tx_lock, flags); netdev->trans_start = jiffies; return NETDEV_TX_OK; }
static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv, struct skb_shared_info *shinfo, struct mlx4_wqe_data_seg *data, struct sk_buff *skb, int lso_header_size, __be32 mr_key, struct mlx4_en_tx_info *tx_info) { struct device *ddev = priv->ddev; dma_addr_t dma = 0; u32 byte_count = 0; int i_frag; /* Map fragments if any */ for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { const struct skb_frag_struct *frag; frag = &shinfo->frags[i_frag]; byte_count = skb_frag_size(frag); dma = skb_frag_dma_map(ddev, frag, 0, byte_count, DMA_TO_DEVICE); if (dma_mapping_error(ddev, dma)) goto tx_drop_unmap; data->addr = cpu_to_be64(dma); data->lkey = mr_key; dma_wmb(); data->byte_count = cpu_to_be32(byte_count); --data; } /* Map linear part if needed */ if (tx_info->linear) { byte_count = skb_headlen(skb) - lso_header_size; dma = dma_map_single(ddev, skb->data + lso_header_size, byte_count, PCI_DMA_TODEVICE); if (dma_mapping_error(ddev, dma)) goto tx_drop_unmap; data->addr = cpu_to_be64(dma); data->lkey = mr_key; dma_wmb(); data->byte_count = cpu_to_be32(byte_count); } /* tx completion can avoid cache line miss for common cases */ tx_info->map0_dma = dma; tx_info->map0_byte_count = byte_count; return true; tx_drop_unmap: en_err(priv, "DMA mapping error\n"); while (++i_frag < shinfo->nr_frags) { ++data; dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr), be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); } return false; }
static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); unsigned int desc_flags; union ibmveth_buf_desc descs[6]; int last, i; int force_bounce = 0; dma_addr_t dma_addr; unsigned long mss = 0; /* * veth handles a maximum of 6 segments including the header, so * we have to linearize the skb if there are more than this. */ if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) { netdev->stats.tx_dropped++; goto out; } /* veth can't checksum offload UDP */ if (skb->ip_summed == CHECKSUM_PARTIAL && ((skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol != IPPROTO_TCP) || (skb->protocol == htons(ETH_P_IPV6) && ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) && skb_checksum_help(skb)) { netdev_err(netdev, "tx: failed to checksum packet\n"); netdev->stats.tx_dropped++; goto out; } desc_flags = IBMVETH_BUF_VALID; if (skb_is_gso(skb) && adapter->fw_large_send_support) desc_flags |= IBMVETH_BUF_LRG_SND; if (skb->ip_summed == CHECKSUM_PARTIAL) { unsigned char *buf = skb_transport_header(skb) + skb->csum_offset; desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD); /* Need to zero out the checksum */ buf[0] = 0; buf[1] = 0; } retry_bounce: memset(descs, 0, sizeof(descs)); /* * If a linear packet is below the rx threshold then * copy it into the static bounce buffer. This avoids the * cost of a TCE insert and remove. */ if (force_bounce || (!skb_is_nonlinear(skb) && (skb->len < tx_copybreak))) { skb_copy_from_linear_data(skb, adapter->bounce_buffer, skb->len); descs[0].fields.flags_len = desc_flags | skb->len; descs[0].fields.address = adapter->bounce_buffer_dma; if (ibmveth_send(adapter, descs, 0)) { adapter->tx_send_failed++; netdev->stats.tx_dropped++; } else { netdev->stats.tx_packets++; netdev->stats.tx_bytes += skb->len; } goto out; } /* Map the header */ dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) goto map_failed; descs[0].fields.flags_len = desc_flags | skb_headlen(skb); descs[0].fields.address = dma_addr; /* Map the frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) goto map_failed_frags; descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag); descs[i+1].fields.address = dma_addr; } if (skb_is_gso(skb)) { if (adapter->fw_large_send_support) { mss = (unsigned long)skb_shinfo(skb)->gso_size; adapter->tx_large_packets++; } else if (!skb_is_gso_v6(skb)) { /* Put -1 in the IP checksum to tell phyp it * is a largesend packet. Put the mss in * the TCP checksum. */ ip_hdr(skb)->check = 0xffff; tcp_hdr(skb)->check = cpu_to_be16(skb_shinfo(skb)->gso_size); adapter->tx_large_packets++; } } if (ibmveth_send(adapter, descs, mss)) { adapter->tx_send_failed++; netdev->stats.tx_dropped++; } else { netdev->stats.tx_packets++; netdev->stats.tx_bytes += skb->len; } dma_unmap_single(&adapter->vdev->dev, descs[0].fields.address, descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); out: dev_consume_skb_any(skb); return NETDEV_TX_OK; map_failed_frags: last = i+1; for (i = 0; i < last; i++) dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, DMA_TO_DEVICE); map_failed: if (!firmware_has_feature(FW_FEATURE_CMO)) netdev_err(netdev, "tx: unable to map xmit buffer\n"); adapter->tx_map_failed++; if (skb_linearize(skb)) { netdev->stats.tx_dropped++; goto out; } force_bounce = 1; goto retry_bounce; }
static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb) { struct xgbe_prv_data *pdata = channel->pdata; struct xgbe_ring *ring = channel->tx_ring; struct xgbe_ring_data *rdata; struct xgbe_packet_data *packet; struct skb_frag_struct *frag; dma_addr_t skb_dma; unsigned int start_index, cur_index; unsigned int offset, tso, vlan, datalen, len; unsigned int i; DBGPR("-->xgbe_map_tx_skb: cur = %d\n", ring->cur); offset = 0; start_index = ring->cur; cur_index = ring->cur; packet = &ring->packet_data; packet->rdesc_count = 0; packet->length = 0; tso = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, TSO_ENABLE); vlan = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, VLAN_CTAG); /* Save space for a context descriptor if needed */ if ((tso && (packet->mss != ring->tx.cur_mss)) || (vlan && (packet->vlan_ctag != ring->tx.cur_vlan_ctag))) cur_index++; rdata = XGBE_GET_DESC_DATA(ring, cur_index); if (tso) { DBGPR(" TSO packet\n"); /* Map the TSO header */ skb_dma = dma_map_single(pdata->dev, skb->data, packet->header_len, DMA_TO_DEVICE); if (dma_mapping_error(pdata->dev, skb_dma)) { netdev_alert(pdata->netdev, "dma_map_single failed\n"); goto err_out; } rdata->skb_dma = skb_dma; rdata->skb_dma_len = packet->header_len; rdata->tso_header = 1; offset = packet->header_len; packet->length += packet->header_len; cur_index++; rdata = XGBE_GET_DESC_DATA(ring, cur_index); } /* Map the (remainder of the) packet */ for (datalen = skb_headlen(skb) - offset; datalen; ) { len = min_t(unsigned int, datalen, XGBE_TX_MAX_BUF_SIZE); skb_dma = dma_map_single(pdata->dev, skb->data + offset, len, DMA_TO_DEVICE); if (dma_mapping_error(pdata->dev, skb_dma)) { netdev_alert(pdata->netdev, "dma_map_single failed\n"); goto err_out; } rdata->skb_dma = skb_dma; rdata->skb_dma_len = len; DBGPR(" skb data: index=%u, dma=0x%llx, len=%u\n", cur_index, skb_dma, len); datalen -= len; offset += len; packet->length += len; cur_index++; rdata = XGBE_GET_DESC_DATA(ring, cur_index); } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { DBGPR(" mapping frag %u\n", i); frag = &skb_shinfo(skb)->frags[i]; offset = 0; for (datalen = skb_frag_size(frag); datalen; ) { len = min_t(unsigned int, datalen, XGBE_TX_MAX_BUF_SIZE); skb_dma = skb_frag_dma_map(pdata->dev, frag, offset, len, DMA_TO_DEVICE); if (dma_mapping_error(pdata->dev, skb_dma)) { netdev_alert(pdata->netdev, "skb_frag_dma_map failed\n"); goto err_out; } rdata->skb_dma = skb_dma; rdata->skb_dma_len = len; rdata->mapped_as_page = 1; DBGPR(" skb data: index=%u, dma=0x%llx, len=%u\n", cur_index, skb_dma, len); datalen -= len; offset += len; packet->length += len; cur_index++; rdata = XGBE_GET_DESC_DATA(ring, cur_index); } } /* Save the skb address in the last entry */ rdata->skb = skb; /* Save the number of descriptor entries used */ packet->rdesc_count = cur_index - start_index; DBGPR("<--xgbe_map_tx_skb: count=%u\n", packet->rdesc_count); return packet->rdesc_count; err_out: while (start_index < cur_index) { rdata = XGBE_GET_DESC_DATA(ring, start_index++); xgbe_unmap_skb(pdata, rdata); } DBGPR("<--xgbe_map_tx_skb: count=0\n"); return 0; }
static inline int dma_xmit(struct sk_buff* skb, struct net_device *dev, END_DEVICE *ei_local, int gmac_no) { struct netdev_queue *txq; dma_addr_t frag_addr; u32 frag_size, nr_desc; u32 next_idx, desc_odd = 0; u32 txd_info2 = 0, txd_info4; #if defined (CONFIG_RAETH_SG_DMA_TX) u32 i, nr_frags; const skb_frag_t *tx_frag; const struct skb_shared_info *shinfo; #else #define nr_frags 0 #endif #if defined (CONFIG_RA_HW_NAT) || defined (CONFIG_RA_HW_NAT_MODULE) if (ra_sw_nat_hook_tx != NULL) { #if defined (CONFIG_RA_HW_NAT_WIFI) || defined (CONFIG_RA_HW_NAT_PCI) if (IS_DPORT_PPE_VALID(skb)) gmac_no = PSE_PORT_PPE; else #endif if (ra_sw_nat_hook_tx(skb, gmac_no) == 0) { dev_kfree_skb(skb); return NETDEV_TX_OK; } } #endif #if !defined (RAETH_HW_PADPKT) if (skb->len < ei_local->min_pkt_len) { if (skb_padto(skb, ei_local->min_pkt_len)) { #if defined (CONFIG_RAETH_DEBUG) if (net_ratelimit()) printk(KERN_ERR "%s: skb_padto failed\n", RAETH_DEV_NAME); #endif return NETDEV_TX_OK; } skb_put(skb, ei_local->min_pkt_len - skb->len); } #endif #if defined (CONFIG_RALINK_MT7620) if (gmac_no == PSE_PORT_PPE) txd_info4 = TX4_DMA_FP_BMAP(0x80); /* P7 */ else #if defined (CONFIG_RAETH_HAS_PORT5) && !defined (CONFIG_RAETH_HAS_PORT4) && !defined (CONFIG_RAETH_ESW) txd_info4 = TX4_DMA_FP_BMAP(0x20); /* P5 */ #elif defined (CONFIG_RAETH_HAS_PORT4) && !defined (CONFIG_RAETH_HAS_PORT5) && !defined (CONFIG_RAETH_ESW) txd_info4 = TX4_DMA_FP_BMAP(0x10); /* P4 */ #else txd_info4 = 0; /* routing by DA */ #endif #elif defined (CONFIG_RALINK_MT7621) txd_info4 = TX4_DMA_FPORT(gmac_no); #else txd_info4 = (TX4_DMA_QN(3) | TX4_DMA_PN(gmac_no)); #endif #if defined (CONFIG_RAETH_CHECKSUM_OFFLOAD) && !defined (RAETH_SDMA) if (skb->ip_summed == CHECKSUM_PARTIAL) txd_info4 |= TX4_DMA_TUI_CO(7); #endif #if defined (CONFIG_RAETH_HW_VLAN_TX) if (skb_vlan_tag_present(skb)) { #if defined (RAETH_HW_VLAN4K) txd_info4 |= (0x10000 | skb_vlan_tag_get(skb)); #else u32 vlan_tci = skb_vlan_tag_get(skb); txd_info4 |= (TX4_DMA_INSV | TX4_DMA_VPRI(vlan_tci)); txd_info4 |= (u32)ei_local->vlan_4k_map[(vlan_tci & VLAN_VID_MASK)]; #endif } #endif #if defined (CONFIG_RAETH_SG_DMA_TX) shinfo = skb_shinfo(skb); #endif #if defined (CONFIG_RAETH_TSO) /* fill MSS info in tcp checksum field */ if (shinfo->gso_size) { u32 hdr_len; if (!(shinfo->gso_type & (SKB_GSO_TCPV4|SKB_GSO_TCPV6))) { dev_kfree_skb(skb); return NETDEV_TX_OK; } if (skb_header_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { dev_kfree_skb(skb); return NETDEV_TX_OK; } } hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb)); if (hdr_len >= skb->len) { dev_kfree_skb(skb); return NETDEV_TX_OK; } tcp_hdr(skb)->check = htons(shinfo->gso_size); txd_info4 |= TX4_DMA_TSO; } #endif nr_desc = DIV_ROUND_UP(skb_headlen(skb), TXD_MAX_SEG_SIZE); #if defined (CONFIG_RAETH_SG_DMA_TX) nr_frags = (u32)shinfo->nr_frags; for (i = 0; i < nr_frags; i++) { tx_frag = &shinfo->frags[i]; nr_desc += DIV_ROUND_UP(skb_frag_size(tx_frag), TXD_MAX_SEG_SIZE); } #endif nr_desc = DIV_ROUND_UP(nr_desc, 2); txq = netdev_get_tx_queue(dev, 0); /* flush main skb part before spin_lock() */ frag_size = (u32)skb_headlen(skb); frag_addr = dma_map_single(NULL, skb->data, frag_size, DMA_TO_DEVICE); /* protect TX ring access (from eth2/eth3 queues) */ spin_lock(&ei_local->page_lock); /* check nr_desc+1 free descriptors */ next_idx = (ei_local->txd_last_idx + nr_desc) % NUM_TX_DESC; if (ei_local->txd_buff[ei_local->txd_last_idx] || ei_local->txd_buff[next_idx]) { spin_unlock(&ei_local->page_lock); netif_tx_stop_queue(txq); #if defined (CONFIG_RAETH_DEBUG) if (net_ratelimit()) printk("%s: PDMA TX ring is full! (GMAC: %d)\n", RAETH_DEV_NAME, gmac_no); #endif return NETDEV_TX_BUSY; } pdma_write_skb_fragment(ei_local, frag_addr, frag_size, &desc_odd, &txd_info2, txd_info4, skb, nr_frags == 0); #if defined (CONFIG_RAETH_SG_DMA_TX) for (i = 0; i < nr_frags; i++) { tx_frag = &shinfo->frags[i]; frag_size = skb_frag_size(tx_frag); frag_addr = skb_frag_dma_map(NULL, tx_frag, 0, frag_size, DMA_TO_DEVICE); pdma_write_skb_fragment(ei_local, frag_addr, frag_size, &desc_odd, &txd_info2, txd_info4, skb, i == nr_frags - 1); } #endif #if defined (CONFIG_RAETH_BQL) netdev_tx_sent_queue(txq, skb->len); #endif #if !defined (CONFIG_RAETH_BQL) || !defined (CONFIG_SMP) /* smp_mb() already inlined in netdev_tx_sent_queue */ wmb(); #endif /* kick the DMA TX */ sysRegWrite(TX_CTX_IDX0, cpu_to_le32(ei_local->txd_last_idx)); spin_unlock(&ei_local->page_lock); return NETDEV_TX_OK; }
static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; unsigned char *skb_data = skb->data; unsigned int skb_len = skb->len; u8 opcode = MLX5_OPCODE_SEND; dma_addr_t dma_addr = 0; unsigned int num_bytes; bool bf = false; u16 headlen; u16 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; u16 ihs; int i; memset(wqe, 0, sizeof(*wqe)); if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; if (skb->encapsulation) { eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM; sq->stats.csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats.csum_partial++; } } else sq->stats.csum_none++; if (sq->cc != sq->prev_cc) { sq->prev_cc = sq->cc; sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; } if (skb_is_gso(skb)) { eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); opcode = MLX5_OPCODE_LSO; if (skb->encapsulation) { ihs = skb_inner_transport_header(skb) - skb->data + inner_tcp_hdrlen(skb); sq->stats.tso_inner_packets++; sq->stats.tso_inner_bytes += skb->len - ihs; } else { ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); sq->stats.tso_packets++; sq->stats.tso_bytes += skb->len - ihs; } num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; } else { bf = sq->bf_budget && !skb->xmit_more && !skb_shinfo(skb)->nr_frags; ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } wi->num_bytes = num_bytes; if (skb_vlan_tag_present(skb)) { mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data, &skb_len); ihs += VLAN_HLEN; } else { memcpy(eseg->inline_hdr_start, skb_data, ihs); mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); } eseg->inline_hdr_sz = cpu_to_be16(ihs); ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start), MLX5_SEND_WQE_DS); dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; wi->num_dma = 0; headlen = skb_len - skb->data_len; if (headlen) { dma_addr = dma_map_single(sq->pdev, skb_data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); wi->num_dma++; dseg++; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); wi->num_dma++; dseg++; } ds_cnt += wi->num_dma; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->skb[pi] = skb; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += wi->num_wqebbs; if (unlikely(MLX5E_TX_HW_STAMP(sq->channel->priv, skb))) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; netdev_tx_sent_queue(sq->txq, wi->num_bytes); if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { netif_tx_stop_queue(sq->txq); sq->stats.queue_stopped++; } if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { int bf_sz = 0; if (bf && sq->uar_bf_map) bf_sz = wi->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); } sq->bf_budget = bf ? sq->bf_budget - 1 : 0; /* fill sq edge with nops to avoid wqe wrap around */ while ((sq->pc & wq->sz_m1) > sq->edge) mlx5e_send_nop(sq, false); sq->stats.packets++; sq->stats.bytes += num_bytes; return NETDEV_TX_OK; dma_unmap_wqe_err: sq->stats.queue_dropped++; mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); dev_kfree_skb_any(skb); return NETDEV_TX_OK; }
/* * Add a socket buffer to a TX queue * * This maps all fragments of a socket buffer for DMA and adds them to * the TX queue. The queue's insert pointer will be incremented by * the number of fragments in the socket buffer. * * If any DMA mapping fails, any mapped fragments will be unmapped, * the queue's insert pointer will be restored to its original value. * * This function is split out from efx_hard_start_xmit to allow the * loopback test to direct packets via specific TX queues. * * Returns NETDEV_TX_OK or NETDEV_TX_BUSY * You must hold netif_tx_lock() to call this function. */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { struct efx_nic *efx = tx_queue->efx; struct pci_dev *pci_dev = efx->pci_dev; struct efx_tx_buffer *buffer; skb_frag_t *fragment; unsigned int len, unmap_len = 0, fill_level, insert_ptr; dma_addr_t dma_addr, unmap_addr = 0; unsigned int dma_len; bool unmap_single; int q_space, i = 0; netdev_tx_t rc = NETDEV_TX_OK; EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); if (skb_shinfo(skb)->gso_size) return efx_enqueue_skb_tso(tx_queue, skb); /* Get size of the initial fragment */ len = skb_headlen(skb); /* Pad if necessary */ if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) { EFX_BUG_ON_PARANOID(skb->data_len); len = 32 + 1; if (skb_pad(skb, len - skb->len)) return NETDEV_TX_OK; } fill_level = tx_queue->insert_count - tx_queue->old_read_count; q_space = efx->txq_entries - 1 - fill_level; /* Map for DMA. Use pci_map_single rather than pci_map_page * since this is more efficient on machines with sparse * memory. */ unmap_single = true; dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE); /* Process all fragments */ while (1) { if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr))) goto pci_err; /* Store fields for marking in the per-fragment final * descriptor */ unmap_len = len; unmap_addr = dma_addr; /* Add to TX queue, splitting across DMA boundaries */ do { if (unlikely(q_space-- <= 0)) { /* It might be that completions have * happened since the xmit path last * checked. Update the xmit path's * copy of read_count. */ netif_tx_stop_queue(tx_queue->core_txq); /* This memory barrier protects the * change of queue state from the access * of read_count. */ smp_mb(); tx_queue->old_read_count = ACCESS_ONCE(tx_queue->read_count); fill_level = (tx_queue->insert_count - tx_queue->old_read_count); q_space = efx->txq_entries - 1 - fill_level; if (unlikely(q_space-- <= 0)) { rc = NETDEV_TX_BUSY; goto unwind; } smp_mb(); if (likely(!efx->loopback_selftest)) netif_tx_start_queue( tx_queue->core_txq); } insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; efx_tsoh_free(tx_queue, buffer); EFX_BUG_ON_PARANOID(buffer->tsoh); EFX_BUG_ON_PARANOID(buffer->skb); EFX_BUG_ON_PARANOID(buffer->len); EFX_BUG_ON_PARANOID(!buffer->continuation); EFX_BUG_ON_PARANOID(buffer->unmap_len); dma_len = efx_max_tx_len(efx, dma_addr); if (likely(dma_len >= len)) dma_len = len; /* Fill out per descriptor fields */ buffer->len = dma_len; buffer->dma_addr = dma_addr; len -= dma_len; dma_addr += dma_len; ++tx_queue->insert_count; } while (len); /* Transfer ownership of the unmapping to the final buffer */ buffer->unmap_single = unmap_single; buffer->unmap_len = unmap_len; unmap_len = 0; /* Get address and size of next fragment */ if (i >= skb_shinfo(skb)->nr_frags) break; fragment = &skb_shinfo(skb)->frags[i]; len = skb_frag_size(fragment); i++; /* Map for DMA */ unmap_single = false; dma_addr = skb_frag_dma_map(&pci_dev->dev, fragment, 0, len, DMA_TO_DEVICE); } /* Transfer ownership of the skb to the final buffer */ buffer->skb = skb; buffer->continuation = false; /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); return NETDEV_TX_OK; pci_err: netif_err(efx, tx_err, efx->net_dev, " TX queue %d could not map skb with %d bytes %d " "fragments for DMA\n", tx_queue->queue, skb->len, skb_shinfo(skb)->nr_frags + 1); /* Mark the packet as transmitted, and free the SKB ourselves */ dev_kfree_skb_any(skb); unwind: /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { --tx_queue->insert_count; insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; efx_dequeue_buffer(tx_queue, buffer); buffer->len = 0; } /* Free the fragment we were mid-way through pushing */ if (unmap_len) { if (unmap_single) pci_unmap_single(pci_dev, unmap_addr, unmap_len, PCI_DMA_TODEVICE); else pci_unmap_page(pci_dev, unmap_addr, unmap_len, PCI_DMA_TODEVICE); } return rc; }