static inline struct efx_rx_buffer * efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) { if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) return efx_rx_buffer(rx_queue, 0); else return rx_buf + 1; }
/* Handle a received packet. Second half: Touches packet payload. */ void __efx_rx_packet(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; struct efx_rx_buffer *rx_buf = efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); u8 *eh = efx_rx_buf_va(rx_buf); /* If we're in loopback test, then pass the packet directly to the * loopback layer, and free the rx_buf here */ if (unlikely(efx->loopback_selftest)) { efx_loopback_rx_packet(efx, eh, rx_buf->len); efx_free_rx_buffer(rx_buf); goto out; } if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb) efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh); else efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); out: channel->rx_pkt_n_frags = 0; }
void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) { int i; struct efx_rx_buffer *rx_buf; EFX_LOG(rx_queue->efx, "shutting down RX queue %d\n", rx_queue->queue); falcon_fini_rx(rx_queue); /* Release RX buffers NB start at index 0 not current HW ptr */ if (rx_queue->buffer) { for (i = 0; i <= rx_queue->efx->type->rxd_ring_mask; i++) { rx_buf = efx_rx_buffer(rx_queue, i); efx_fini_rx_buffer(rx_queue, rx_buf); } } /* For a page that is part-way through splitting into RX buffers */ if (rx_queue->buf_page != NULL) { pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr, efx_rx_buf_size(rx_queue->efx), PCI_DMA_FROMDEVICE); __free_pages(rx_queue->buf_page, rx_queue->efx->rx_buffer_order); rx_queue->buf_page = NULL; } }
void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, unsigned int len, bool checksummed, bool discard) { struct efx_nic *efx = rx_queue->efx; struct efx_rx_buffer *rx_buf; bool leak_packet = false; rx_buf = efx_rx_buffer(rx_queue, index); EFX_BUG_ON_PARANOID(!rx_buf->data); EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page); EFX_BUG_ON_PARANOID(!(rx_buf->skb || rx_buf->page)); /* This allows the refill path to post another buffer. * EFX_RXD_HEAD_ROOM ensures that the slot we are using * isn't overwritten yet. */ rx_queue->removed_count++; /* Validate the length encoded in the event vs the descriptor pushed */ efx_rx_packet__check_len(rx_queue, rx_buf, len, &discard, &leak_packet); EFX_TRACE(efx, "RX queue %d received id %x at %llx+%x %s%s\n", rx_queue->queue, index, (unsigned long long)rx_buf->dma_addr, len, (checksummed ? " [SUMMED]" : ""), (discard ? " [DISCARD]" : "")); /* Discard packet, if instructed to do so */ if (unlikely(discard)) { if (unlikely(leak_packet)) rx_queue->channel->n_skbuff_leaks++; else /* We haven't called efx_unmap_rx_buffer yet, * so fini the entire rx_buffer here */ efx_fini_rx_buffer(rx_queue, rx_buf); return; } /* Release card resources - assumes all RX buffers consumed in-order * per RX queue */ efx_unmap_rx_buffer(efx, rx_buf); /* Prefetch nice and early so data will (hopefully) be in cache by * the time we look at it. */ prefetch(rx_buf->data); /* Pipeline receives so that we give time for packet headers to be * prefetched into cache. */ rx_buf->len = len; if (rx_queue->channel->rx_pkt) __efx_rx_packet(rx_queue->channel, rx_queue->channel->rx_pkt, rx_queue->channel->rx_pkt_csummed); rx_queue->channel->rx_pkt = rx_buf; rx_queue->channel->rx_pkt_csummed = checksummed; }
/** * efx_init_rx_buffers_page - create EFX_RX_BATCH page-based RX buffers * * @rx_queue: Efx RX queue * * This allocates memory for EFX_RX_BATCH receive buffers, maps them for DMA, * and populates struct efx_rx_buffers for each one. Return a negative error * code or 0 on success. If a single page can be split between two buffers, * then the page will either be inserted fully, or not at at all. */ static int efx_init_rx_buffers_page(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; struct efx_rx_buffer *rx_buf; struct page *page; void *page_addr; unsigned int page_offset; struct efx_rx_page_state *state; dma_addr_t dma_addr; unsigned index, count; /* We can split a page between two buffers */ BUILD_BUG_ON(EFX_RX_BATCH & 1); for (count = 0; count < EFX_RX_BATCH; ++count) { page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC, efx->rx_buffer_order); if (unlikely(page == NULL)) return -ENOMEM; dma_addr = pci_map_page(efx->pci_dev, page, 0, efx_rx_buf_size(efx), PCI_DMA_FROMDEVICE); if (unlikely(pci_dma_mapping_error(efx->pci_dev, dma_addr))) { __free_pages(page, efx->rx_buffer_order); return -EIO; } page_addr = page_address(page); state = page_addr; state->refcnt = 0; state->dma_addr = dma_addr; page_addr += sizeof(struct efx_rx_page_state); dma_addr += sizeof(struct efx_rx_page_state); page_offset = sizeof(struct efx_rx_page_state); split: index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN; rx_buf->u.page = page; rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN; rx_buf->len = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN; rx_buf->is_page = true; ++rx_queue->added_count; ++rx_queue->alloc_page_count; ++state->refcnt; if ((~count & 1) && (efx->rx_buffer_len <= EFX_RX_HALF_PAGE)) { /* Use the second half of the page */ get_page(page); dma_addr += (PAGE_SIZE >> 1); page_addr += (PAGE_SIZE >> 1); page_offset += (PAGE_SIZE >> 1); ++count; goto split; } }
/** * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers * * @rx_queue: Efx RX queue * * This allocates a batch of pages, maps them for DMA, and populates * struct efx_rx_buffers for each one. Return a negative error code or * 0 on success. If a single page can be used for multiple buffers, * then the page will either be inserted fully, or not at all. */ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) { struct efx_nic *efx = rx_queue->efx; struct efx_rx_buffer *rx_buf; struct page *page; unsigned int page_offset; struct efx_rx_page_state *state; dma_addr_t dma_addr; unsigned index, count; count = 0; do { page = efx_reuse_page(rx_queue); if (page == NULL) { page = alloc_pages(__GFP_COLD | __GFP_COMP | (atomic ? GFP_ATOMIC : GFP_KERNEL), efx->rx_buffer_order); if (unlikely(page == NULL)) return -ENOMEM; dma_addr = dma_map_page(&efx->pci_dev->dev, page, 0, PAGE_SIZE << efx->rx_buffer_order, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&efx->pci_dev->dev, dma_addr))) { __free_pages(page, efx->rx_buffer_order); return -EIO; } state = page_address(page); state->dma_addr = dma_addr; } else { state = page_address(page); dma_addr = state->dma_addr; } dma_addr += sizeof(struct efx_rx_page_state); page_offset = sizeof(struct efx_rx_page_state); do { index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->dma_addr = dma_addr + efx->rx_ip_align; rx_buf->page = page; rx_buf->page_offset = page_offset + efx->rx_ip_align; rx_buf->len = efx->rx_dma_len; rx_buf->flags = 0; ++rx_queue->added_count; get_page(page); dma_addr += efx->rx_page_buf_step; page_offset += efx->rx_page_buf_step; } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; } while (++count < efx->rx_pages_per_batch); return 0; }
void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) { int i; struct efx_nic *efx = rx_queue->efx; struct efx_rx_buffer *rx_buf; netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); /* A flush failure might have left rx_queue->enabled */ rx_queue->enabled = false; del_timer_sync(&rx_queue->slow_fill); efx_nic_fini_rx(rx_queue); /* Release RX buffers from the current read ptr to the write ptr */ if (rx_queue->buffer) { for (i = rx_queue->removed_count; i < rx_queue->added_count; i++) { unsigned index = i & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); efx_fini_rx_buffer(rx_queue, rx_buf); } } /* Unmap and release the pages in the recycle ring. Remove the ring. */ for (i = 0; i <= rx_queue->page_ptr_mask; i++) { struct page *page = rx_queue->page_ring[i]; struct efx_rx_page_state *state; if (page == NULL) continue; state = page_address(page); dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, PAGE_SIZE << efx->rx_buffer_order, DMA_FROM_DEVICE); put_page(page); } kfree(rx_queue->page_ring); rx_queue->page_ring = NULL; }
/** * efx_init_rx_buffers_skb - create EFX_RX_BATCH skb-based RX buffers * * @rx_queue: Efx RX queue * * This allocates EFX_RX_BATCH skbs, maps them for DMA, and populates a * struct efx_rx_buffer for each one. Return a negative error code or 0 * on success. May fail having only inserted fewer than EFX_RX_BATCH * buffers. */ static int efx_init_rx_buffers_skb(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; struct net_device *net_dev = efx->net_dev; struct efx_rx_buffer *rx_buf; struct sk_buff *skb; int skb_len = efx->rx_buffer_len; unsigned index, count; for (count = 0; count < EFX_RX_BATCH; ++count) { index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->u.skb = skb = netdev_alloc_skb(net_dev, skb_len); if (unlikely(!skb)) return -ENOMEM; /* Adjust the SKB for padding and checksum */ skb_reserve(skb, NET_IP_ALIGN); rx_buf->len = skb_len - NET_IP_ALIGN; rx_buf->is_page = false; skb->ip_summed = CHECKSUM_UNNECESSARY; rx_buf->dma_addr = pci_map_single(efx->pci_dev, skb->data, rx_buf->len, PCI_DMA_FROMDEVICE); if (unlikely(pci_dma_mapping_error(efx->pci_dev, rx_buf->dma_addr))) { dev_kfree_skb_any(skb); rx_buf->u.skb = NULL; return -EIO; } ++rx_queue->added_count; ++rx_queue->alloc_skb_count; } return 0; }
/* Handle a received packet. Second half: Touches packet payload. */ void __efx_rx_packet(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; struct efx_rx_buffer *rx_buf = efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); u8 *eh = efx_rx_buf_va(rx_buf); /* Read length from the prefix if necessary. This already * excludes the length of the prefix itself. */ if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) rx_buf->len = le16_to_cpup((__le16 *) (eh + efx->rx_packet_len_offset)); /* If we're in loopback test, then pass the packet directly to the * loopback layer, and free the rx_buf here */ if (unlikely(efx->loopback_selftest)) { struct efx_rx_queue *rx_queue; efx_loopback_rx_packet(efx, eh, rx_buf->len); rx_queue = efx_channel_get_rx_queue(channel); efx_free_rx_buffers(rx_queue, rx_buf, channel->rx_pkt_n_frags); goto out; } if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb && !efx_channel_busy_polling(channel)) efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh); else efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); out: channel->rx_pkt_n_frags = 0; }
void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, unsigned int n_frags, unsigned int len, u16 flags) { struct efx_nic *efx = rx_queue->efx; struct efx_channel *channel = efx_rx_queue_channel(rx_queue); struct efx_rx_buffer *rx_buf; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->flags |= flags; /* Validate the number of fragments and completed length */ if (n_frags == 1) { efx_rx_packet__check_len(rx_queue, rx_buf, len); } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) || unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) || unlikely(!efx->rx_scatter)) { /* If this isn't an explicit discard request, either * the hardware or the driver is broken. */ WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD)); rx_buf->flags |= EFX_RX_PKT_DISCARD; } netif_vdbg(efx, rx_status, efx->net_dev, "RX queue %d received ids %x-%x len %d %s%s\n", efx_rx_queue_index(rx_queue), index, (index + n_frags - 1) & rx_queue->ptr_mask, len, (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); /* Discard packet, if instructed to do so. Process the * previous receive first. */ if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { efx_rx_flush_packet(channel); efx_discard_rx_packet(channel, rx_buf, n_frags); return; } if (n_frags == 1) rx_buf->len = len; /* Release and/or sync the DMA mapping - assumes all RX buffers * consumed in-order per RX queue. */ efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); /* Prefetch nice and early so data will (hopefully) be in cache by * the time we look at it. */ prefetch(efx_rx_buf_va(rx_buf)); rx_buf->page_offset += efx->type->rx_buffer_hash_size; rx_buf->len -= efx->type->rx_buffer_hash_size; if (n_frags > 1) { /* Release/sync DMA mapping for additional fragments. * Fix length for last fragment. */ unsigned int tail_frags = n_frags - 1; for (;;) { rx_buf = efx_rx_buf_next(rx_queue, rx_buf); if (--tail_frags == 0) break; efx_sync_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE); } rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE; efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); } /* All fragments have been DMA-synced, so recycle pages. */ rx_buf = efx_rx_buffer(rx_queue, index); efx_recycle_rx_pages(channel, rx_buf, n_frags); /* Pipeline receives so that we give time for packet headers to be * prefetched into cache. */ efx_rx_flush_packet(channel); channel->rx_pkt_n_frags = n_frags; channel->rx_pkt_index = index; }
/** * efx_fast_push_rx_descriptors - push new RX descriptors quickly * @rx_queue: RX descriptor queue * @retry: Recheck the fill level * This will aim to fill the RX descriptor queue up to * @rx_queue->@fast_fill_limit. If there is insufficient atomic * memory to do so, the caller should retry. */ static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, int retry) { struct efx_rx_buffer *rx_buf; unsigned fill_level, index; int i, space, rc = 0; /* Calculate current fill level. Do this outside the lock, * because most of the time we'll end up not wanting to do the * fill anyway. */ fill_level = (rx_queue->added_count - rx_queue->removed_count); EFX_BUG_ON_PARANOID(fill_level > rx_queue->efx->type->rxd_ring_mask + 1); /* Don't fill if we don't need to */ if (fill_level >= rx_queue->fast_fill_trigger) return 0; /* Record minimum fill level */ if (unlikely(fill_level < rx_queue->min_fill)) { if (fill_level) rx_queue->min_fill = fill_level; } /* Acquire RX add lock. If this lock is contended, then a fast * fill must already be in progress (e.g. in the refill * tasklet), so we don't need to do anything */ if (!spin_trylock_bh(&rx_queue->add_lock)) return -1; retry: /* Recalculate current fill level now that we have the lock */ fill_level = (rx_queue->added_count - rx_queue->removed_count); EFX_BUG_ON_PARANOID(fill_level > rx_queue->efx->type->rxd_ring_mask + 1); space = rx_queue->fast_fill_limit - fill_level; if (space < EFX_RX_BATCH) goto out_unlock; EFX_TRACE(rx_queue->efx, "RX queue %d fast-filling descriptor ring from" " level %d to level %d using %s allocation\n", rx_queue->queue, fill_level, rx_queue->fast_fill_limit, rx_queue->channel->rx_alloc_push_pages ? "page" : "skb"); do { for (i = 0; i < EFX_RX_BATCH; ++i) { index = (rx_queue->added_count & rx_queue->efx->type->rxd_ring_mask); rx_buf = efx_rx_buffer(rx_queue, index); rc = efx_init_rx_buffer(rx_queue, rx_buf); if (unlikely(rc)) goto out; ++rx_queue->added_count; } } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH); EFX_TRACE(rx_queue->efx, "RX queue %d fast-filled descriptor ring " "to level %d\n", rx_queue->queue, rx_queue->added_count - rx_queue->removed_count); out: /* Send write pointer to card. */ falcon_notify_rx_desc(rx_queue); /* If the fast fill is running inside from the refill tasklet, then * for SMP systems it may be running on a different CPU to * RX event processing, which means that the fill level may now be * out of date. */ if (unlikely(retry && (rc == 0))) goto retry; out_unlock: spin_unlock_bh(&rx_queue->add_lock); return rc; }