/* * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ int netvsc_recv_callback(struct net_device *net, struct vmbus_channel *channel, void *data, u32 len, const struct ndis_tcp_ip_checksum_info *csum_info, const struct ndis_pkt_8021q_info *vlan) { struct net_device_context *net_device_ctx = netdev_priv(net); struct netvsc_device *net_device; u16 q_idx = channel->offermsg.offer.sub_channel_index; struct netvsc_channel *nvchan; struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; /* * If necessary, inject this packet into the VF interface. * On Hyper-V, multicast and brodcast packets are only delivered * to the synthetic interface (after subjecting these to * policy filters on the host). Deliver these via the VF * interface in the guest. */ rcu_read_lock(); net_device = rcu_dereference(net_device_ctx->nvdev); if (unlikely(!net_device)) goto drop; nvchan = &net_device->chan_table[q_idx]; vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); if (vf_netdev && (vf_netdev->flags & IFF_UP)) net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, &nvchan->napi, csum_info, vlan, data, len); if (unlikely(!skb)) { drop: ++net->stats.rx_dropped; rcu_read_unlock(); return NVSP_STAT_FAIL; } if (net != vf_netdev) skb_record_rx_queue(skb, q_idx); /* * Even if injecting the packet, record the statistics * on the synthetic device because modifying the VF device * statistics will not work correctly. */ rx_stats = &nvchan->rx_stats; u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += len; if (skb->pkt_type == PACKET_BROADCAST) ++rx_stats->broadcast; else if (skb->pkt_type == PACKET_MULTICAST) ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); napi_gro_receive(&nvchan->napi, skb); rcu_read_unlock(); return 0; }
static void ri_tasklet(unsigned long dev) { struct net_device *_dev = (struct net_device *)dev; struct ifb_private *dp = netdev_priv(_dev); struct netdev_queue *txq; struct sk_buff *skb; txq = netdev_get_tx_queue(_dev, 0); if ((skb = skb_peek(&dp->tq)) == NULL) { if (__netif_tx_trylock(txq)) { skb_queue_splice_tail_init(&dp->rq, &dp->tq); __netif_tx_unlock(txq); } else { /* reschedule */ goto resched; } } while ((skb = __skb_dequeue(&dp->tq)) != NULL) { u32 from = G_TC_FROM(skb->tc_verd); skb->tc_verd = 0; skb->tc_verd = SET_TC_NCLS(skb->tc_verd); u64_stats_update_begin(&dp->tsync); dp->tx_packets++; dp->tx_bytes += skb->len; u64_stats_update_end(&dp->tsync); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif); if (!skb->dev) { rcu_read_unlock(); dev_kfree_skb(skb); _dev->stats.tx_dropped++; if (skb_queue_len(&dp->tq) != 0) goto resched; break; } rcu_read_unlock(); skb->skb_iif = _dev->ifindex; if (from & AT_EGRESS) { dev_queue_xmit(skb); } else if (from & AT_INGRESS) { skb_pull(skb, skb->dev->hard_header_len); netif_receive_skb(skb); } else BUG(); } if (__netif_tx_trylock(txq)) { if ((skb = skb_peek(&dp->rq)) == NULL) { dp->tasklet_pending = 0; if (netif_queue_stopped(_dev)) netif_wake_queue(_dev); } else { __netif_tx_unlock(txq); goto resched; } __netif_tx_unlock(txq); } else { resched: dp->tasklet_pending = 1; tasklet_schedule(&dp->ifb_tasklet); } }
bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); if (!vlan_dev) { /* Only the last call to vlan_do_receive() should change * pkt_type to PACKET_OTHERHOST */ if (vlan_id && last_handler) skb->pkt_type = PACKET_OTHERHOST; return false; } skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return false; skb->dev = vlan_dev; if (skb->pkt_type == PACKET_OTHERHOST) { /* Our lower layer thinks this is not local, let's make sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, vlan_dev->dev_addr)) skb->pkt_type = PACKET_HOST; } if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { unsigned int offset = skb->data - skb_mac_header(skb); /* * vlan_insert_tag expect skb->data pointing to mac header. * So change skb->data before calling it and change back to * original position later */ skb_push(skb, offset); skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); skb_reset_mac_len(skb); } skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; u64_stats_update_end(&rx_stats->syncp); return true; }
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error) { struct pcpu_tstats *tstats; const struct iphdr *iph = ip_hdr(skb); int err; secpath_reset(skb); skb->protocol = tpi->proto; skb->mac_header = skb->network_header; __pskb_pull(skb, hdr_len); skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ if (rt_is_output_route(skb_rtable(skb))) goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { tunnel->dev->stats.rx_crc_errors++; tunnel->dev->stats.rx_errors++; goto drop; } if (tunnel->parms.i_flags&TUNNEL_SEQ) { if (!(tpi->flags&TUNNEL_SEQ) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { tunnel->dev->stats.rx_fifo_errors++; tunnel->dev->stats.rx_errors++; goto drop; } tunnel->i_seqno = ntohl(tpi->seq) + 1; } /* Warning: All skb pointers will be invalidated! */ if (tunnel->dev->type == ARPHRD_ETHER) { if (!pskb_may_pull(skb, ETH_HLEN)) { tunnel->dev->stats.rx_length_errors++; tunnel->dev->stats.rx_errors++; goto drop; } iph = ip_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } skb->pkt_type = PACKET_HOST; __skb_tunnel_rx(skb, tunnel->dev); skb_reset_network_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", &iph->saddr, iph->tos); if (err > 1) { ++tunnel->dev->stats.rx_frame_errors; ++tunnel->dev->stats.rx_errors; goto drop; } } tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); gro_cells_receive(&tunnel->gro_cells, skb); return 0; drop: kfree_skb(skb); return 0; }
static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; struct mlxsw_sx_port_pcpu_stats *pcpu_stats; const struct mlxsw_tx_info tx_info = { .local_port = mlxsw_sx_port->local_port, .is_emad = false, }; u64 len; int err; if (mlxsw_core_skb_transmit_busy(mlxsw_sx, &tx_info)) return NETDEV_TX_BUSY; if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { struct sk_buff *skb_orig = skb; skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); if (!skb) { this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); dev_kfree_skb_any(skb_orig); return NETDEV_TX_OK; } } mlxsw_sx_txhdr_construct(skb, &tx_info); len = skb->len; /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ err = mlxsw_core_skb_transmit(mlxsw_sx, skb, &tx_info); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); pcpu_stats->tx_packets++; pcpu_stats->tx_bytes += len; u64_stats_update_end(&pcpu_stats->syncp); } else { this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); dev_kfree_skb_any(skb); } return NETDEV_TX_OK; } static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu) { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); int err; err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu); if (err) return err; dev->mtu = mtu; return 0; } static struct rtnl_link_stats64 * mlxsw_sx_port_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx_port_pcpu_stats *p; u64 rx_packets, rx_bytes, tx_packets, tx_bytes; u32 tx_dropped = 0; unsigned int start; int i; for_each_possible_cpu(i) { p = per_cpu_ptr(mlxsw_sx_port->pcpu_stats, i); do { start = u64_stats_fetch_begin_irq(&p->syncp); rx_packets = p->rx_packets; rx_bytes = p->rx_bytes; tx_packets = p->tx_packets; tx_bytes = p->tx_bytes; } while (u64_stats_fetch_retry_irq(&p->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; stats->tx_packets += tx_packets; stats->tx_bytes += tx_bytes; /* tx_dropped is u32, updated without syncp protection. */ tx_dropped += p->tx_dropped; } stats->tx_dropped = tx_dropped; return stats; } static const struct net_device_ops mlxsw_sx_port_netdev_ops = { .ndo_open = mlxsw_sx_port_open, .ndo_stop = mlxsw_sx_port_stop, .ndo_start_xmit = mlxsw_sx_port_xmit, .ndo_change_mtu = mlxsw_sx_port_change_mtu, .ndo_get_stats64 = mlxsw_sx_port_get_stats64, }; static void mlxsw_sx_port_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; strlcpy(drvinfo->driver, mlxsw_sx_driver_name, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, mlxsw_sx_driver_version, sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%d", mlxsw_sx->bus_info->fw_rev.major, mlxsw_sx->bus_info->fw_rev.minor, mlxsw_sx->bus_info->fw_rev.subminor); strlcpy(drvinfo->bus_info, mlxsw_sx->bus_info->device_name, sizeof(drvinfo->bus_info)); } struct mlxsw_sx_port_hw_stats { char str[ETH_GSTRING_LEN]; u64 (*getter)(char *payload); }; static const struct mlxsw_sx_port_hw_stats mlxsw_sx_port_hw_stats[] = { { .str = "a_frames_transmitted_ok", .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get, }, { .str = "a_frames_received_ok",
/* * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, struct ndis_tcp_ip_checksum_info *csum_info) { struct net_device *net; struct net_device_context *net_device_ctx; struct sk_buff *skb; struct netvsc_stats *rx_stats; net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; if (!net || net->reg_state != NETREG_REGISTERED) { packet->status = NVSP_STAT_FAIL; return 0; } net_device_ctx = netdev_priv(net); rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); /* Allocate a skb - TODO direct I/O to pages? */ skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); if (unlikely(!skb)) { ++net->stats.rx_dropped; packet->status = NVSP_STAT_FAIL; return 0; } /* * Copy to skb. This copy is needed here since the memory pointed by * hv_netvsc_packet cannot be deallocated */ memcpy(skb_put(skb, packet->total_data_buflen), packet->data, packet->total_data_buflen); skb->protocol = eth_type_trans(skb, net); if (csum_info) { /* We only look at the IP checksum here. * Should we be dropping the packet if checksum * failed? How do we deal with other checksums - TCP/UDP? */ if (csum_info->receive.ip_checksum_succeeded) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb->ip_summed = CHECKSUM_NONE; } if (packet->vlan_tci & VLAN_TAG_PRESENT) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), packet->vlan_tci); skb_record_rx_queue(skb, packet->channel-> offermsg.offer.sub_channel_index); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; u64_stats_update_end(&rx_stats->syncp); /* * Pass the skb back up. Network stack will deallocate the skb when it * is done. * TODO - use NAPI? */ netif_rx(skb); return 0; }
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_netvsc_packet *packet = NULL; int ret; unsigned int num_data_pgs; struct rndis_message *rndis_msg; struct rndis_packet *rndis_pkt; u32 rndis_msg_size; struct rndis_per_packet_info *ppi; u32 hash; u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number * of pages in a single packet. If skb is scattered around * more pages we try linearizing it. */ skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { ++net_device_ctx->eth_stats.tx_scattered; if (skb_linearize(skb)) goto no_memory; num_data_pgs = netvsc_get_slots(skb) + 2; if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { ++net_device_ctx->eth_stats.tx_too_big; goto drop; } } /* * Place the rndis header in the skb head room and * the skb->cb will be used for hv_netvsc_packet * structure. */ ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); if (ret) goto no_memory; /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > FIELD_SIZEOF(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; /* TODO: This will likely evaluate to false, since RH7 and * below kernels will set next pointer to NULL before calling * into here. Should find another way to set this flag. */ packet->xmit_more = (skb->next != NULL); packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; rndis_msg = (struct rndis_message *)skb->head; memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); packet->send_completion_ctx = packet; /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; rndis_pkt = &rndis_msg->msg.pkt; rndis_pkt->data_offset = sizeof(struct rndis_packet); rndis_pkt->data_len = packet->total_data_buflen; rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); #ifdef NOTYET // Divergence from upstream commit: // 307f099520b66504cf6c5638f3f404c48b9fb45b hash = skb_get_hash_raw(skb); #endif hash = skb_get_hash(skb); if (hash != 0 && net->real_num_tx_queues > 1) { rndis_msg_size += NDIS_HASH_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, NBL_HASH_VALUE); *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; } if (skb_vlan_tag_present(skb)) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, IEEE_8021Q_INFO); vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + ppi->ppi_offset); vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } if (skb_is_gso(skb)) { struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, TCP_LARGESEND_PKTINFO); lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + ppi->ppi_offset); lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (skb->protocol == htons(ETH_P_IP)) { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip_hdr(skb)->tot_len = 0; ip_hdr(skb)->check = 0; tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } else { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb); lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) { struct ndis_tcp_ip_checksum_info *csum_info; rndis_msg_size += NDIS_CSUM_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, TCPIP_CHKSUM_PKTINFO); csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + ppi->ppi_offset); csum_info->transmit.tcp_header_offset = skb_transport_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { csum_info->transmit.is_ipv4 = 1; if (ip_hdr(skb)->protocol == IPPROTO_TCP) csum_info->transmit.tcp_checksum = 1; else csum_info->transmit.udp_checksum = 1; } else { csum_info->transmit.is_ipv6 = 1; if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) csum_info->transmit.tcp_checksum = 1; else csum_info->transmit.udp_checksum = 1; } } else { /* Can't do offload of this type of checksum */ if (skb_checksum_help(skb)) goto drop; } } /* Start filling in the page buffers with the rndis hdr */ rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, packet, &pb); /* timestamp packet in software */ skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); if (likely(ret == 0)) { struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets++; tx_stats->bytes += skb_length; u64_stats_update_end(&tx_stats->syncp); return NETDEV_TX_OK; } if (ret == -EAGAIN) { ++net_device_ctx->eth_stats.tx_busy; return NETDEV_TX_BUSY; } if (ret == -ENOSPC) ++net_device_ctx->eth_stats.tx_no_space; drop: dev_kfree_skb_any(skb); net->stats.tx_dropped++; return NETDEV_TX_OK; no_memory: ++net_device_ctx->eth_stats.tx_no_memory; goto drop; }
/* * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, void **data, struct ndis_tcp_ip_checksum_info *csum_info, struct vmbus_channel *channel, u16 vlan_tci) { struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); struct sk_buff *skb; struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; struct netvsc_device *netvsc_dev = net_device_ctx->nvdev; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; if (READ_ONCE(netvsc_dev->vf_inject)) { atomic_inc(&netvsc_dev->vf_use_cnt); if (!READ_ONCE(netvsc_dev->vf_inject)) { /* * We raced; just move on. */ atomic_dec(&netvsc_dev->vf_use_cnt); goto vf_injection_done; } /* * Inject this packet into the VF inerface. * On Hyper-V, multicast and brodcast packets * are only delivered on the synthetic interface * (after subjecting these to policy filters on * the host). Deliver these via the VF interface * in the guest. */ vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet, csum_info, *data, vlan_tci); if (vf_skb != NULL) { ++netvsc_dev->vf_netdev->stats.rx_packets; netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; ret = NVSP_STAT_FAIL; } atomic_dec(&netvsc_dev->vf_use_cnt); return ret; } vf_injection_done: rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); if (unlikely(!skb)) { ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } skb_record_rx_queue(skb, channel-> offermsg.offer.sub_channel_index); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; u64_stats_update_end(&rx_stats->syncp); /* * Pass the skb back up. Network stack will deallocate the skb when it * is done. * TODO - use NAPI? */ netif_rx(skb); return 0; }
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { struct iphdr *ip4h = ip_hdr(skb); int ret = NET_XMIT_DROP; struct flowi4 fl4 = { /* needed to match OIF rule */ .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, .saddr = ip4h->saddr, }; struct net *net = dev_net(vrf_dev); struct rtable *rt; rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; skb_dst_drop(skb); /* if dst.dev is loopback or the VRF device again this is locally * originated traffic destined to a local address. Short circuit * to Rx path */ if (rt->dst.dev == vrf_dev) return vrf_local_xmit(skb, vrf_dev, &rt->dst); skb_dst_set(skb, &rt->dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); if (!ip4h->saddr) { ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0, RT_SCOPE_LINK); } ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; out: return ret; err: vrf_tx_error(vrf_dev, skb); goto out; } static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) { switch (skb->protocol) { case htons(ETH_P_IP): return vrf_process_v4_outbound(skb, dev); case htons(ETH_P_IPV6): return vrf_process_v6_outbound(skb, dev); default: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } } static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); dstats->tx_pkts++; dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { this_cpu_inc(dev->dstats->tx_drps); } return ret; }
/* * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ int netvsc_recv_callback(struct net_device *net, struct vmbus_channel *channel, void *data, u32 len, const struct ndis_tcp_ip_checksum_info *csum_info, const struct ndis_pkt_8021q_info *vlan) { struct net_device_context *net_device_ctx = netdev_priv(net); struct netvsc_device *net_device = net_device_ctx->nvdev; u16 q_idx = channel->offermsg.offer.sub_channel_index; struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; struct sk_buff *skb; struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; int ret = 0; if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; if (READ_ONCE(net_device_ctx->vf_inject)) { atomic_inc(&net_device_ctx->vf_use_cnt); if (!READ_ONCE(net_device_ctx->vf_inject)) { /* * We raced; just move on. */ atomic_dec(&net_device_ctx->vf_use_cnt); goto vf_injection_done; } /* * Inject this packet into the VF inerface. * On Hyper-V, multicast and brodcast packets * are only delivered on the synthetic interface * (after subjecting these to policy filters on * the host). Deliver these via the VF interface * in the guest. */ vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, csum_info, vlan, data, len); if (vf_skb != NULL) { ++net_device_ctx->vf_netdev->stats.rx_packets; net_device_ctx->vf_netdev->stats.rx_bytes += len; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; ret = NVSP_STAT_FAIL; } atomic_dec(&net_device_ctx->vf_use_cnt); return ret; } vf_injection_done: rx_stats = &nvchan->rx_stats; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len); if (unlikely(!skb)) { ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } skb_record_rx_queue(skb, q_idx); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += len; if (skb->pkt_type == PACKET_BROADCAST) ++rx_stats->broadcast; else if (skb->pkt_type == PACKET_MULTICAST) ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); net->stats.rx_packets++; net->stats.rx_bytes += len; napi_gro_receive(&nvchan->napi, skb); return 0; }
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_netvsc_packet *packet = NULL; int ret; unsigned int num_data_pgs; struct rndis_message *rndis_msg; struct rndis_packet *rndis_pkt; u32 rndis_msg_size; bool isvlan; bool linear = false; struct rndis_per_packet_info *ppi; struct ndis_tcp_ip_checksum_info *csum_info; struct ndis_tcp_lso_info *lso_info; int hdr_offset; u32 net_trans_info; u32 hash; u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number * of pages in a single packet. If skb is scattered around * more pages we try linearizing it. */ check_size: skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) { net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", num_data_pgs, skb->len); ret = -EFAULT; goto drop; } else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { if (skb_linearize(skb)) { net_alert_ratelimited("failed to linearize skb\n"); ret = -ENOMEM; goto drop; } linear = true; goto check_size; } /* * Place the rndis header in the skb head room and * the skb->cb will be used for hv_netvsc_packet * structure. */ ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); if (ret) { netdev_err(net, "unable to alloc hv_netvsc_packet\n"); ret = -ENOMEM; goto drop; } /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > FIELD_SIZEOF(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; rndis_msg = (struct rndis_message *)skb->head; memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); isvlan = skb->vlan_tci & VLAN_TAG_PRESENT; /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; rndis_pkt = &rndis_msg->msg.pkt; rndis_pkt->data_offset = sizeof(struct rndis_packet); rndis_pkt->data_len = packet->total_data_buflen; rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); hash = skb_get_hash_raw(skb); if (hash != 0 && net->real_num_tx_queues > 1) { rndis_msg_size += NDIS_HASH_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, NBL_HASH_VALUE); *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; } if (isvlan) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, IEEE_8021Q_INFO); vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + ppi->ppi_offset); vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } net_trans_info = get_net_transport_info(skb, &hdr_offset); if (net_trans_info == TRANSPORT_INFO_NOT_IP) goto do_send; /* * Setup the sendside checksum offload only if this is not a * GSO packet. */ if (skb_is_gso(skb)) goto do_lso; if ((skb->ip_summed == CHECKSUM_NONE) || (skb->ip_summed == CHECKSUM_UNNECESSARY)) goto do_send; rndis_msg_size += NDIS_CSUM_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, TCPIP_CHKSUM_PKTINFO); csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + ppi->ppi_offset); if (net_trans_info & (INFO_IPV4 << 16)) csum_info->transmit.is_ipv4 = 1; else csum_info->transmit.is_ipv6 = 1; if (net_trans_info & INFO_TCP) { csum_info->transmit.tcp_checksum = 1; csum_info->transmit.tcp_header_offset = hdr_offset; } else if (net_trans_info & INFO_UDP) { /* UDP checksum offload is not supported on ws2008r2. * Furthermore, on ws2012 and ws2012r2, there are some * issues with udp checksum offload from Linux guests. * (these are host issues). * For now compute the checksum here. */ struct udphdr *uh; u16 udp_len; ret = skb_cow_head(skb, 0); if (ret) goto drop; uh = udp_hdr(skb); udp_len = ntohs(uh->len); uh->check = 0; uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, udp_len, IPPROTO_UDP, csum_partial(uh, udp_len, 0)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; csum_info->transmit.udp_checksum = 0; } goto do_send; do_lso: rndis_msg_size += NDIS_LSO_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, TCP_LARGESEND_PKTINFO); lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + ppi->ppi_offset); lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (net_trans_info & (INFO_IPV4 << 16)) { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip_hdr(skb)->tot_len = 0; ip_hdr(skb)->check = 0; tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } else { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; do_send: /* Start filling in the page buffers with the rndis hdr */ rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, packet, &pb); /* timestamp packet in software */ skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); drop: if (ret == 0) { u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets++; tx_stats->bytes += skb_length; u64_stats_update_end(&tx_stats->syncp); } else { if (ret != -EAGAIN) { dev_kfree_skb_any(skb); net->stats.tx_dropped++; } } return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; }
static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned short id; struct netfront_info *np = netdev_priv(dev); struct netfront_stats *stats = this_cpu_ptr(np->stats); struct xen_netif_tx_request *tx; struct xen_netif_extra_info *extra; char *data = skb->data; RING_IDX i; grant_ref_t ref; unsigned long mfn; int notify; int frags = skb_shinfo(skb)->nr_frags; unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); unsigned long flags; frags += DIV_ROUND_UP(offset + len, PAGE_SIZE); if (unlikely(frags > MAX_SKB_FRAGS + 1)) { printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", frags); dump_stack(); goto drop; } spin_lock_irqsave(&np->tx_lock, flags); if (unlikely(!netif_carrier_ok(dev) || (frags > 1 && !xennet_can_sg(dev)) || netif_needs_gso(skb, netif_skb_features(skb)))) { spin_unlock_irqrestore(&np->tx_lock, flags); goto drop; } i = np->tx.req_prod_pvt; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb; tx = RING_GET_REQUEST(&np->tx, i); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref( ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; extra = NULL; tx->flags = 0; if (skb->ip_summed == CHECKSUM_PARTIAL) tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) tx->flags |= XEN_NETTXF_data_validated; if (skb_shinfo(skb)->gso_size) { struct xen_netif_extra_info *gso; gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&np->tx, ++i); if (extra) extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; else tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; extra = gso; } np->tx.req_prod_pvt = i + 1; xennet_make_frags(skb, dev, tx); tx->size = skb->len; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); if (notify) notify_remote_via_irq(np->netdev->irq); u64_stats_update_begin(&stats->syncp); stats->tx_bytes += skb->len; stats->tx_packets++; u64_stats_update_end(&stats->syncp); xennet_tx_buf_gc(dev); if (!netfront_tx_slot_available(np)) netif_stop_queue(dev); spin_unlock_irqrestore(&np->tx_lock, flags); return NETDEV_TX_OK; drop: dev->stats.tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; }
/* * Determine the packet's protocol ID. The rule here is that we * assume 802.3 if the type field is short enough to be a length. * This is normal practice and works for any 'now in use' protocol. * * Also, at this point we assume that we ARE dealing exclusively with * VLAN packets, or packets that should be made into VLAN packets based * on a default VLAN ID. * * NOTE: Should be similar to ethernet/eth.c. * * SANITY NOTE: This method is called when a packet is moving up the stack * towards userland. To get here, it would have already passed * through the ethernet/eth.c eth_type_trans() method. * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be * stored UNALIGNED in the memory. RISC systems don't like * such cases very much... * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be * aligned, so there doesn't need to be any of the unaligned * stuff. It has been commented out now... --Ben * */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct vlan_hdr *vhdr; struct vlan_rx_stats *rx_stats; struct net_device *vlan_dev; u16 vlan_id; u16 vlan_tci; #if defined(CONFIG_TCSUPPORT_VLAN_TAG) u16 *proto = NULL; #endif #if defined(CONFIG_TCSUPPORT_PON_VLAN) int ret = 0; #endif skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) goto err_free; if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) goto err_free; vhdr = (struct vlan_hdr *)skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); vlan_id = vlan_tci & VLAN_VID_MASK; #ifdef CONFIG_TCSUPPORT_PON_VLAN #if 0 if(orig_dev->name[0] == 'e') skb->pon_vlan_flag |= PON_PKT_FROM_LAN; else if(orig_dev->name[0] == 'n') { skb->pon_vlan_flag |= PON_PKT_FROM_WAN; skb->pon_vlan_flag |= PON_PKT_ROUTING_FLAG; } #endif if(ptype->type != ETH_P_8021Q) { goto Pon_Handle; } #endif rcu_read_lock(); vlan_dev = __find_vlan_dev(dev, vlan_id); /* If the VLAN device is defined, we use it. * If not, and the VID is 0, it is a 802.1p packet (not * really a VLAN), so we will just netif_rx it later to the * original interface, but with the skb->proto set to the * wrapped proto: we do nothing here. */ if (!vlan_dev) { #ifdef CONFIG_TCSUPPORT_PON_VLAN Pon_Handle: if(pon_store_tag_hook) { ret = pon_store_tag_hook(skb, orig_dev); if(ret == 0) { netif_rx(skb); return 0; } else if(ret == -1) { kfree_skb(skb); return -1; } else { //HGU mode,do nothing } } #endif #ifdef CONFIG_TCSUPPORT_VLAN_TAG if (check_vtag_hook && (check_vtag_hook() == 1)) { if (remove_vtag_hook) { if (remove_vtag_hook(skb, orig_dev) == -1) { /* must free skb !! */ kfree_skb(skb); rcu_read_unlock(); return -1; } else { netif_rx(skb); rcu_read_unlock(); return 0; } } else { goto Normal_Handle; } } else { Normal_Handle: #if !defined(CONFIG_TCSUPPORT_CT) if((orig_dev != NULL) && ((orig_dev->name[0] == 'b') || (orig_dev->name[0] == 'n'))) #endif { proto = vhdr->h_vlan_encapsulated_proto; skb->protocol = proto; /* Take off the VLAN header (4 bytes currently) */ skb_pull_rcsum(skb, VLAN_HLEN); skb->dev = orig_dev; netif_rx(skb); rcu_read_unlock(); return 0; } else { pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", __func__, vlan_id, dev->name); kfree_skb(skb); rcu_read_unlock(); return -1; } } #else if (vlan_id) { pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", __func__, vlan_id, dev->name); goto err_unlock; } rx_stats = NULL; #endif } else { skb->dev = vlan_dev; #if !defined(CONFIG_TCSUPPORT_CT) #ifdef CONFIG_PORT_BINDING if (skb->dev->name[0] == 'e') { // skb->mark |= MASK_ORIGIN_DEV; skb->portbind_mark |= MASK_ORIGIN_DEV; memcpy(skb->orig_dev_name, skb->dev->name, IFNAMSIZ); //printk("vlan_skb_recv: begin orig_dev name is [%s], orig_dev name is [%s]\n", skb->orig_dev_name, orig_dev->name); } #endif #endif rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, smp_processor_id()); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); pr_debug("%s: priority: %u for TCI: %hu\n", __func__, skb->priority, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ /* stats->broadcast ++; // no such counter :-( */ break; case PACKET_MULTICAST: rx_stats->rx_multicast++; break; case PACKET_OTHERHOST: /* Our lower layer thinks this is not local, let's make * sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, skb->dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; default: break; } u64_stats_update_end(&rx_stats->syncp); } skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); if (vlan_dev) { skb = vlan_check_reorder_header(skb); if (!skb) { rx_stats->rx_errors++; goto err_unlock; } } netif_rx(skb); rcu_read_unlock(); return NET_RX_SUCCESS; err_unlock: rcu_read_unlock(); err_free: kfree_skb(skb); return NET_RX_DROP; }
static void ifb_ri_tasklet(unsigned long _txp) { struct ifb_q_private *txp = (struct ifb_q_private *)_txp; struct netdev_queue *txq; struct sk_buff *skb; txq = netdev_get_tx_queue(txp->dev, txp->txqnum); skb = skb_peek(&txp->tq); if (!skb) { if (!__netif_tx_trylock(txq)) goto resched; skb_queue_splice_tail_init(&txp->rq, &txp->tq); __netif_tx_unlock(txq); } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { skb->tc_redirected = 0; skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); txp->tx_packets++; txp->tx_bytes += skb->len; u64_stats_update_end(&txp->tsync); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif); if (!skb->dev) { rcu_read_unlock(); dev_kfree_skb(skb); txp->dev->stats.tx_dropped++; if (skb_queue_len(&txp->tq) != 0) goto resched; break; } rcu_read_unlock(); skb->skb_iif = txp->dev->ifindex; if (!skb->tc_from_ingress) { dev_queue_xmit(skb); } else { skb_pull_rcsum(skb, skb->mac_len); netif_receive_skb(skb); } } if (__netif_tx_trylock(txq)) { skb = skb_peek(&txp->rq); if (!skb) { txp->tasklet_pending = 0; if (netif_tx_queue_stopped(txq)) netif_tx_wake_queue(txq); } else { __netif_tx_unlock(txq); goto resched; } __netif_tx_unlock(txq); } else { resched: txp->tasklet_pending = 1; tasklet_schedule(&txp->ifb_tasklet); } }
int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbe_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbe_desc_unused(rx_ring); unsigned int xdp_res, xdp_xmit = 0; bool failure = false; struct sk_buff *skb; struct xdp_buff xdp; xdp.rxq = &rx_ring->xdp_rxq; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { failure = failure || !ixgbe_alloc_rx_buffers_fast_zc(rx_ring, cleaned_count); cleaned_count = 0; } rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.upper.length); if (!size) break; /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the * descriptor has been written back */ dma_rmb(); bi = ixgbe_get_rx_buffer_zc(rx_ring, size); if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) { struct ixgbe_rx_buffer *next_bi; ixgbe_reuse_rx_buffer_zc(rx_ring, bi); ixgbe_inc_ntc(rx_ring); next_bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; next_bi->skb = ERR_PTR(-EINVAL); continue; } if (unlikely(bi->skb)) { ixgbe_reuse_rx_buffer_zc(rx_ring, bi); ixgbe_inc_ntc(rx_ring); continue; } xdp.data = bi->addr; xdp.data_meta = xdp.data; xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; xdp.data_end = xdp.data + size; xdp.handle = bi->handle; xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, &xdp); if (xdp_res) { if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) { xdp_xmit |= xdp_res; bi->addr = NULL; bi->skb = NULL; } else { ixgbe_reuse_rx_buffer_zc(rx_ring, bi); } total_rx_packets++; total_rx_bytes += size; cleaned_count++; ixgbe_inc_ntc(rx_ring); continue; } /* XDP_PASS path */ skb = ixgbe_construct_skb_zc(rx_ring, bi, &xdp); if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } cleaned_count++; ixgbe_inc_ntc(rx_ring); if (eth_skb_pad(skb)) continue; total_rx_bytes += skb->len; total_rx_packets++; ixgbe_process_skb_fields(rx_ring, rx_desc, skb); ixgbe_rx_skb(q_vector, skb); } if (xdp_xmit & IXGBE_XDP_REDIR) xdp_do_flush_map(); if (xdp_xmit & IXGBE_XDP_TX) { struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. */ wmb(); writel(ring->next_to_use, ring->tail); } u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; u64_stats_update_end(&rx_ring->syncp); q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; return failure ? budget : (int)total_rx_packets; }
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, bool log_ecn_error) { struct pcpu_sw_netstats *tstats; const struct iphdr *iph = ip_hdr(skb); int err; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { tunnel->dev->stats.rx_crc_errors++; tunnel->dev->stats.rx_errors++; goto drop; } if (tunnel->parms.i_flags&TUNNEL_SEQ) { if (!(tpi->flags&TUNNEL_SEQ) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { tunnel->dev->stats.rx_fifo_errors++; tunnel->dev->stats.rx_errors++; goto drop; } tunnel->i_seqno = ntohl(tpi->seq) + 1; } skb_reset_network_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", &iph->saddr, iph->tos); if (err > 1) { ++tunnel->dev->stats.rx_frame_errors; ++tunnel->dev->stats.rx_errors; goto drop; } } tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); if (tunnel->dev->type == ARPHRD_ETHER) { skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; } gro_cells_receive(&tunnel->gro_cells, skb); return 0; drop: kfree_skb(skb); return 0; }
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *tx_ring, int napi_budget) { unsigned int total_packets = 0, total_bytes = 0; u32 i = tx_ring->next_to_clean, xsk_frames = 0; unsigned int budget = q_vector->tx.work_limit; struct xdp_umem *umem = tx_ring->xsk_umem; union ixgbe_adv_tx_desc *tx_desc; struct ixgbe_tx_buffer *tx_bi; bool xmit_done; tx_bi = &tx_ring->tx_buffer_info[i]; tx_desc = IXGBE_TX_DESC(tx_ring, i); i -= tx_ring->count; do { if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; total_bytes += tx_bi->bytecount; total_packets += tx_bi->gso_segs; if (tx_bi->xdpf) ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi); else xsk_frames++; tx_bi->xdpf = NULL; total_bytes += tx_bi->bytecount; tx_bi++; tx_desc++; i++; if (unlikely(!i)) { i -= tx_ring->count; tx_bi = tx_ring->tx_buffer_info; tx_desc = IXGBE_TX_DESC(tx_ring, 0); } /* issue prefetch for next Tx descriptor */ prefetch(tx_desc); /* update budget accounting */ budget--; } while (likely(budget)); i += tx_ring->count; tx_ring->next_to_clean = i; u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; tx_ring->stats.packets += total_packets; u64_stats_update_end(&tx_ring->syncp); q_vector->tx.total_bytes += total_bytes; q_vector->tx.total_packets += total_packets; if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); return budget > 0 && xmit_done; }
static void deth_nl_recv_msg(struct sk_buff* skb) { struct nlmsghdr* nlh = (struct nlmsghdr*) skb->data; int pktsize = 0; const char* ptr = nlmsg_data(nlh); u32 index = *(u32*)ptr; struct deth_priv* priv = NULL; struct deth_net_stats* stats = NULL; struct sk_buff* out_skb = NULL; struct net* net = sock_net(skb->sk); struct net_device* dev = dev_get_by_index(net, index); ptr += sizeof(u32); if (dev == NULL) { printk(KERN_ERR "Error retrieving the interface with index %d.\n", index); return; } switch (nlh->nlmsg_type) { case PACKET: break; case LINK_CHANGE: { if (*(u32*)(ptr)) { netif_carrier_on(dev); } else { netif_carrier_off(dev); } goto cleanup; } case LINK_STATS: { priv = netdev_priv(dev); stats = this_cpu_ptr(priv->stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets = *(u64*)(ptr); ptr += sizeof(u64); stats->rx_bytes = *(u64*)(ptr); ptr += sizeof(u64); stats->tx_packets = *(u64*)(ptr); ptr += sizeof(u64); stats->tx_bytes = *(u64*)(ptr); u64_stats_update_end(&stats->syncp); goto cleanup; } default: goto cleanup; } pktsize = nlh->nlmsg_len - sizeof(u32); out_skb = netdev_alloc_skb(dev, pktsize + 2); if (out_skb == NULL) { printk(KERN_ERR "Error allocating a sk_buff (%d bytes).\n", pktsize); goto rx_drop; } memcpy(skb_put(out_skb, pktsize), ptr, pktsize); out_skb->ip_summed = CHECKSUM_UNNECESSARY; if (dev_forward_skb(dev, out_skb) != NET_RX_SUCCESS) { printk(KERN_ERR "dev_forward_skb failed to forward a packet of %d bytes.\n", pktsize); goto rx_drop; } priv = netdev_priv(dev); stats = this_cpu_ptr(priv->stats); u64_stats_update_begin(&stats->syncp); stats->rx_bytes += pktsize; stats->rx_packets++; u64_stats_update_end(&stats->syncp); goto cleanup; rx_drop: u64_stats_update_begin(&stats->syncp); stats->tx_dropped++; u64_stats_update_end(&stats->syncp); cleanup: dev_put(dev); }