/* Parse the SKB header and initialise state. */ static int tso_start(struct tso_state *st, struct efx_nic *efx, struct efx_tx_queue *tx_queue, const struct sk_buff *skb) { struct device *dma_dev = &efx->pci_dev->dev; unsigned int header_len, in_len; bool use_opt_desc = false; dma_addr_t dma_addr; if (tx_queue->tso_version == 1) use_opt_desc = true; st->ip_off = skb_network_header(skb) - skb->data; st->tcp_off = skb_transport_header(skb) - skb->data; header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); in_len = skb_headlen(skb) - header_len; st->header_len = header_len; st->in_len = in_len; if (st->protocol == htons(ETH_P_IP)) { st->ip_base_len = st->header_len - st->ip_off; st->ipv4_id = ntohs(ip_hdr(skb)->id); } else { st->ip_base_len = st->header_len - st->tcp_off; st->ipv4_id = 0; } st->seqnum = ntohl(tcp_hdr(skb)->seq); EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->urg); EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->syn); EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->rst); st->out_len = skb->len - header_len; if (!use_opt_desc) { st->header_unmap_len = 0; if (likely(in_len == 0)) { st->dma_flags = 0; st->unmap_len = 0; return 0; } dma_addr = dma_map_single(dma_dev, skb->data + header_len, in_len, DMA_TO_DEVICE); st->dma_flags = EFX_TX_BUF_MAP_SINGLE; st->dma_addr = dma_addr; st->unmap_addr = dma_addr; st->unmap_len = in_len; } else { dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); st->header_dma_addr = dma_addr; st->header_unmap_len = skb_headlen(skb); st->dma_flags = 0; st->dma_addr = dma_addr + header_len; st->unmap_len = 0; } return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0; }
static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int len) { u8 *result; EFX_WARN_ON_ONCE_PARANOID(buffer->len); EFX_WARN_ON_ONCE_PARANOID(buffer->flags); EFX_WARN_ON_ONCE_PARANOID(buffer->unmap_len); result = efx_tx_get_copy_buffer_limited(tx_queue, buffer, len); if (result) { buffer->flags = EFX_TX_BUF_CONT; } else { buffer->buf = kmalloc(NET_IP_ALIGN + len, GFP_ATOMIC); if (unlikely(!buffer->buf)) return NULL; tx_queue->tso_long_headers++; result = (u8 *)buffer->buf + NET_IP_ALIGN; buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; } buffer->len = len; return result; }
static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { unsigned int copy_len = skb->len; struct efx_tx_buffer *buffer; u8 *copy_buffer; int rc; EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE); buffer = efx_tx_queue_get_insert_buffer(tx_queue); copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer); if (unlikely(!copy_buffer)) return -ENOMEM; rc = skb_copy_bits(skb, 0, copy_buffer, copy_len); EFX_WARN_ON_PARANOID(rc); buffer->len = copy_len; buffer->skb = skb; buffer->flags = EFX_TX_BUF_SKB; ++tx_queue->insert_count; return rc; }
/** * tso_fill_packet_with_fragment - form descriptors for the current fragment * @tx_queue: Efx TX queue * @skb: Socket buffer * @st: TSO state * * Form descriptors for the current fragment, until we reach the end * of fragment or end-of-packet. */ static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, const struct sk_buff *skb, struct tso_state *st) { struct efx_tx_buffer *buffer; int n; if (st->in_len == 0) return; if (st->packet_space == 0) return; EFX_WARN_ON_ONCE_PARANOID(st->in_len <= 0); EFX_WARN_ON_ONCE_PARANOID(st->packet_space <= 0); n = min(st->in_len, st->packet_space); st->packet_space -= n; st->out_len -= n; st->in_len -= n; efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); if (st->out_len == 0) { /* Transfer ownership of the skb */ buffer->skb = skb; buffer->flags = EFX_TX_BUF_SKB; } else if (st->packet_space != 0) { buffer->flags = EFX_TX_BUF_CONT; } if (st->in_len == 0) { /* Transfer ownership of the DMA mapping */ buffer->unmap_len = st->unmap_len; buffer->dma_offset = buffer->unmap_len - buffer->len; buffer->flags |= st->dma_flags; st->unmap_len = 0; } st->dma_addr += n; }
/** * efx_tx_queue_insert - push descriptors onto the TX queue * @tx_queue: Efx TX queue * @dma_addr: DMA address of fragment * @len: Length of fragment * @final_buffer: The final buffer inserted into the queue * * Push descriptors onto the TX queue. */ static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, dma_addr_t dma_addr, unsigned int len, struct efx_tx_buffer **final_buffer) { struct efx_tx_buffer *buffer; unsigned int dma_len; EFX_WARN_ON_ONCE_PARANOID(len <= 0); while (1) { buffer = efx_tx_queue_get_insert_buffer(tx_queue); ++tx_queue->insert_count; EFX_WARN_ON_ONCE_PARANOID(tx_queue->insert_count - tx_queue->read_count >= tx_queue->efx->txq_entries); buffer->dma_addr = dma_addr; dma_len = tx_queue->efx->type->tx_limit_len(tx_queue, dma_addr, len); /* If there's space for everything this is our last buffer. */ if (dma_len >= len) break; buffer->len = dma_len; buffer->flags = EFX_TX_BUF_CONT; dma_addr += dma_len; len -= dma_len; } EFX_WARN_ON_ONCE_PARANOID(!len); buffer->len = len; *final_buffer = buffer; }
static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) { /* We need to consider both queues that the net core sees as one */ struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); struct efx_nic *efx = txq1->efx; unsigned int fill_level; fill_level = max(txq1->insert_count - txq1->old_read_count, txq2->insert_count - txq2->old_read_count); if (likely(fill_level < efx->txq_stop_thresh)) return; /* We used the stale old_read_count above, which gives us a * pessimistic estimate of the fill level (which may even * validly be >= efx->txq_entries). Now try again using * read_count (more likely to be a cache miss). * * If we read read_count and then conditionally stop the * queue, it is possible for the completion path to race with * us and complete all outstanding descriptors in the middle, * after which there will be no more completions to wake it. * Therefore we stop the queue first, then read read_count * (with a memory barrier to ensure the ordering), then * restart the queue if the fill level turns out to be low * enough. */ netif_tx_stop_queue(txq1->core_txq); smp_mb(); txq1->old_read_count = ACCESS_ONCE(txq1->read_count); txq2->old_read_count = ACCESS_ONCE(txq2->read_count); fill_level = max(txq1->insert_count - txq1->old_read_count, txq2->insert_count - txq2->old_read_count); EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries); if (likely(fill_level < efx->txq_stop_thresh)) { smp_mb(); if (likely(!efx->loopback_selftest)) netif_tx_start_queue(txq1->core_txq); } }
/** * efx_tx_tso_sw - segment and transmit a TSO socket buffer using SW or FATSOv1 * @tx_queue: Efx TX queue * @skb: Socket buffer * @data_mapped: Did we map the data? Always set to true * by this on success. * * Context: You must hold netif_tx_lock() to call this function. * * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if * @skb was not enqueued. In all cases @skb is consumed. Return * %NETDEV_TX_OK. */ int efx_tx_tso_sw(struct efx_tx_queue *tx_queue, struct sk_buff *skb, bool *data_mapped) { struct efx_nic *efx = tx_queue->efx; int frag_i, rc; struct tso_state state; #if defined(EFX_USE_KCOMPAT) && !defined(EFX_HAVE_GSO_MAX_SEGS) /* Since the stack does not limit the number of segments per * skb, we must do so. Otherwise an attacker may be able to * make the TCP produce skbs that will never fit in our TX * queue, causing repeated resets. */ if (unlikely(skb_shinfo(skb)->gso_segs > EFX_TSO_MAX_SEGS)) { unsigned int excess = (skb_shinfo(skb)->gso_segs - EFX_TSO_MAX_SEGS) * skb_shinfo(skb)->gso_size; if (__pskb_trim(skb, skb->len - excess)) return -E2BIG; } #endif prefetch(skb->data); /* Find the packet protocol and sanity-check it */ rc = efx_tso_check_protocol(skb, &state.protocol); if (rc) return rc; rc = tso_start(&state, efx, tx_queue, skb); if (rc) goto mem_err; if (likely(state.in_len == 0)) { /* Grab the first payload fragment. */ EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->nr_frags < 1); frag_i = 0; rc = tso_get_fragment(&state, efx, skb_shinfo(skb)->frags + frag_i); if (rc) goto mem_err; } else { /* Payload starts in the header area. */ frag_i = -1; } if (tso_start_new_packet(tx_queue, skb, &state, true) < 0) goto mem_err; prefetch_ptr(tx_queue); while (1) { tso_fill_packet_with_fragment(tx_queue, skb, &state); /* Move onto the next fragment? */ if (state.in_len == 0) { if (++frag_i >= skb_shinfo(skb)->nr_frags) /* End of payload reached. */ break; rc = tso_get_fragment(&state, efx, skb_shinfo(skb)->frags + frag_i); if (rc) goto mem_err; } /* Start at new packet? */ if (state.packet_space == 0 && tso_start_new_packet(tx_queue, skb, &state, false) < 0) goto mem_err; } *data_mapped = true; return 0; mem_err: netif_err(efx, tx_err, efx->net_dev, "Out of memory for TSO headers, or DMA mapping error\n"); /* Free the DMA mapping we were in the process of writing out */ if (state.unmap_len) { if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, state.unmap_len, DMA_TO_DEVICE); else dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr, state.unmap_len, DMA_TO_DEVICE); } /* Free the header DMA mapping, if using option descriptors */ if (state.header_unmap_len) dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr, state.header_unmap_len, DMA_TO_DEVICE); return -ENOMEM; }