static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; /* Handle non-VLAN frames if they are sent to us, for example by DHCP. * * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ if (veth->h_vlan_proto != vlan->vlan_proto || vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority); __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci); } skb->dev = vlan->real_dev; len = skb->len; if (unlikely(netpoll_tx_running(dev))) return vlan_netpoll_send_skb(vlan, skb); ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *stats; stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); } return ret; }
/** * ovs_vport_send - send a packet on a device * * @vport: vport on which to send the packet * @skb: skb to send * * Sends the given packet and returns the length of data sent. Either ovs * lock or rcu_read_lock must be held. */ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) { int sent = vport->ops->send(vport, skb); if (likely(sent > 0)) { struct pcpu_sw_netstats *stats; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += sent; u64_stats_update_end(&stats->syncp); } else if (sent < 0) { ovs_vport_record_error(vport, VPORT_E_TX_ERROR); } else { ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); } return sent; }
static noinline void nft_update_chain_stats(const struct nft_chain *chain, const struct nft_pktinfo *pkt) { struct nft_base_chain *base_chain; struct nft_stats *stats; base_chain = nft_base_chain(chain); if (!base_chain->stats) return; local_bh_disable(); stats = this_cpu_ptr(rcu_dereference(base_chain->stats)); if (stats) { u64_stats_update_begin(&stats->syncp); stats->pkts++; stats->bytes += pkt->skb->len; u64_stats_update_end(&stats->syncp); } local_bh_enable(); }
/* Bridge group multicast address 802.1d (pg 51). */ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; int qos_enable = 0; /* added pling 03/13/2007 */ static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; brstats->rx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); indev = skb->dev; /* wklin added, 2010/06/15 @attach_dev */ if (htons(ETH_P_ARP) == eth_hdr(skb)->h_proto) *(pp_bridge_indev(skb)) = indev;/*backup incoming port to be used in arp.c */ skb->dev = brdev; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); }
/* Called with rcu_read_lock_bh. */ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { int len, err; len = skb->len; rcu_read_lock(); err = ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); rcu_read_unlock(); if (likely(!err)) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(netdev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); } else { netdev->stats.tx_errors++; } return 0; }
static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; if (!tunnel) return 1; dev = tunnel->dev; if (err) { dev->stats.rx_errors++; dev->stats.rx_dropped++; return 0; } x = xfrm_input_state(skb); family = x->inner_mode->afinfo->family; if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) return -EPERM; skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); skb->dev = dev; tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); return 0; }
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; int dp_ifindex; int err; if (upcall_info->pid == 0) { err = -ENOTCONN; goto err; } dp_ifindex = get_dpifindex(dp); if (!dp_ifindex) { err = -ENODEV; goto err; } forward_ip_summed(skb, true); if (!skb_is_gso(skb)) err = queue_userspace_packet(dp_ifindex, skb, upcall_info); else err = queue_gso_packets(dp_ifindex, skb, upcall_info); if (err) goto err; return 0; err: stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); u64_stats_update_begin(&stats->sync); stats->n_lost++; u64_stats_update_end(&stats->sync); return err; }
/** * ovs_vport_receive - pass up received packet to the datapath for processing * * @vport: vport that received the packet * @skb: skb that was received * @tun_key: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, const struct ovs_tunnel_info *tun_info) { struct pcpu_sw_netstats *stats; struct sw_flow_key key; int error; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->egress_tun_info = NULL; /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { kfree_skb(skb); return; } ovs_dp_process_packet(skb, &key); }
/* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). */ static netdev_tx_t loopback_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_lstats *lb_stats; int len; skb_orphan(skb); skb->protocol = eth_type_trans(skb, dev); /* it's OK to use per_cpu_ptr() because BHs are off */ lb_stats = this_cpu_ptr(dev->lstats); len = skb->len; if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { u64_stats_update_begin(&lb_stats->syncp); lb_stats->bytes += len; lb_stats->packets++; u64_stats_update_end(&lb_stats->syncp); } return NETDEV_TX_OK; }
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; int dp_ifindex; int err; if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } dp_ifindex = get_dpifindex(dp); if (!dp_ifindex) { err = -ENODEV; goto err; } if (!skb_is_gso(skb)) err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); else err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); if (err) goto err; return 0; err: stats = this_cpu_ptr(dp->stats_percpu); u64_stats_update_begin(&stats->sync); stats->n_lost++; u64_stats_update_end(&stats->sync); return err; }
static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); struct net_bridge_vlan_group *vg; struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; brstats->rx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); vg = br_vlan_group_rcu(br); /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc modue when someone * may be running packet capture. */ if (!(brdev->flags & IFF_PROMISC) && !br_allowed_egress(vg, skb)) { kfree_skb(skb); return NET_RX_DROP; } indev = skb->dev; skb->dev = brdev; skb = br_handle_vlan(br, vg, skb); if (!skb) return NET_RX_DROP; /* update the multicast stats if the packet is IGMP/MLD */ br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), BR_MCAST_DIR_TX); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL, skb, indev, NULL, br_netif_receive_skb); }
/* derived from ip_tunnel_rcv(). */ void ovs_ip_tunnel_rcv(struct net_device *dev, struct sk_buff *skb, struct metadata_dst *tun_dst) { struct pcpu_sw_netstats *tstats; tstats = this_cpu_ptr((struct pcpu_sw_netstats __percpu *)dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); skb_reset_mac_header(skb); skb_scrub_packet(skb, false); skb->protocol = eth_type_trans(skb, dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); ovs_skb_dst_set(skb, (struct dst_entry *)tun_dst); #ifndef USE_UPSTREAM_TUNNEL netdev_port_receive(skb, &tun_dst->u.tun_info); #else netif_rx(skb); #endif }
/* * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, void **data, struct ndis_tcp_ip_checksum_info *csum_info, struct vmbus_channel *channel, u16 vlan_tci) { struct net_device *net; struct net_device_context *net_device_ctx; struct sk_buff *skb; struct netvsc_stats *rx_stats; net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; if (!net || net->reg_state != NETREG_REGISTERED) { return NVSP_STAT_FAIL; } net_device_ctx = netdev_priv(net); rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); /* Allocate a skb - TODO direct I/O to pages? */ skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); if (unlikely(!skb)) { ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } /* * Copy to skb. This copy is needed here since the memory pointed by * hv_netvsc_packet cannot be deallocated */ memcpy(skb_put(skb, packet->total_data_buflen), *data, packet->total_data_buflen); skb->protocol = eth_type_trans(skb, net); if (csum_info) { /* We only look at the IP checksum here. * Should we be dropping the packet if checksum * failed? How do we deal with other checksums - TCP/UDP? */ if (csum_info->receive.ip_checksum_succeeded) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb->ip_summed = CHECKSUM_NONE; } if (vlan_tci & VLAN_TAG_PRESENT) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); skb_record_rx_queue(skb, channel-> offermsg.offer.sub_channel_index); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; u64_stats_update_end(&rx_stats->syncp); /* * Pass the skb back up. Network stack will deallocate the skb when it * is done. * TODO - use NAPI? */ netif_rx(skb); return 0; }
static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned short id; struct netfront_info *np = netdev_priv(dev); struct netfront_stats *stats = this_cpu_ptr(np->stats); struct xen_netif_tx_request *tx; struct xen_netif_extra_info *extra; char *data = skb->data; RING_IDX i; grant_ref_t ref; unsigned long mfn; int notify; int slots; unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); unsigned long flags; /* If skb->len is too big for wire format, drop skb and alert * user about misconfiguration. */ if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) { net_alert_ratelimited( "xennet: skb->len = %u, too big for wire format\n", skb->len); goto drop; } slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) + xennet_count_skb_frag_slots(skb); if (unlikely(slots > MAX_SKB_FRAGS + 1)) { net_alert_ratelimited( "xennet: skb rides the rocket: %d slots\n", slots); goto drop; } spin_lock_irqsave(&np->tx_lock, flags); if (unlikely(!netif_carrier_ok(dev) || (slots > 1 && !xennet_can_sg(dev)) || netif_needs_gso(skb, netif_skb_features(skb)))) { spin_unlock_irqrestore(&np->tx_lock, flags); goto drop; } i = np->tx.req_prod_pvt; id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); np->tx_skbs[id].skb = skb; tx = RING_GET_REQUEST(&np->tx, i); tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref( ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; extra = NULL; tx->flags = 0; if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) /* remote but checksummed. */ tx->flags |= XEN_NETTXF_data_validated; if (skb_shinfo(skb)->gso_size) { struct xen_netif_extra_info *gso; gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&np->tx, ++i); if (extra) extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; else tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; extra = gso; } np->tx.req_prod_pvt = i + 1; xennet_make_frags(skb, dev, tx); tx->size = skb->len; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); if (notify) notify_remote_via_irq(np->netdev->irq); u64_stats_update_begin(&stats->syncp); stats->tx_bytes += skb->len; stats->tx_packets++; u64_stats_update_end(&stats->syncp); /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */ xennet_tx_buf_gc(dev); if (!netfront_tx_slot_available(np)) netif_stop_queue(dev); spin_unlock_irqrestore(&np->tx_lock, flags); return NETDEV_TX_OK; drop: dev->stats.tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; }
/* * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, void **data, struct ndis_tcp_ip_checksum_info *csum_info, struct vmbus_channel *channel, u16 vlan_tci) { struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; /* * If necessary, inject this packet into the VF interface. * On Hyper-V, multicast and brodcast packets are only delivered * to the synthetic interface (after subjecting these to * policy filters on the host). Deliver these via the VF * interface in the guest. */ vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); if (vf_netdev && (vf_netdev->flags & IFF_UP)) net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); if (unlikely(!skb)) { ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } if (net != vf_netdev) skb_record_rx_queue(skb, channel->offermsg.offer.sub_channel_index); /* * Even if injecting the packet, record the statistics * on the synthetic device because modifying the VF device * statistics will not work correctly. */ rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; if (skb->pkt_type == PACKET_BROADCAST) ++rx_stats->broadcast; else if (skb->pkt_type == PACKET_MULTICAST) ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); /* * Pass the skb back up. Network stack will deallocate the skb when it * is done. * TODO - use NAPI? */ netif_rx(skb); return 0; }
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_netvsc_packet *packet = NULL; int ret; unsigned int num_data_pgs; struct rndis_message *rndis_msg; struct rndis_packet *rndis_pkt; u32 rndis_msg_size; struct rndis_per_packet_info *ppi; struct ndis_tcp_ip_checksum_info *csum_info; int hdr_offset; u32 net_trans_info; u32 hash; u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number * of pages in a single packet. If skb is scattered around * more pages we try linearizing it. */ skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { ++net_device_ctx->eth_stats.tx_scattered; if (skb_linearize(skb)) goto no_memory; num_data_pgs = netvsc_get_slots(skb) + 2; if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { ++net_device_ctx->eth_stats.tx_too_big; goto drop; } } /* * Place the rndis header in the skb head room and * the skb->cb will be used for hv_netvsc_packet * structure. */ ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); if (ret) goto no_memory; /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > FIELD_SIZEOF(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; rndis_msg = (struct rndis_message *)skb->head; memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; rndis_pkt = &rndis_msg->msg.pkt; rndis_pkt->data_offset = sizeof(struct rndis_packet); rndis_pkt->data_len = packet->total_data_buflen; rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); hash = skb_get_hash_raw(skb); if (hash != 0 && net->real_num_tx_queues > 1) { rndis_msg_size += NDIS_HASH_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, NBL_HASH_VALUE); *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; } if (skb_vlan_tag_present(skb)) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, IEEE_8021Q_INFO); vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + ppi->ppi_offset); vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } net_trans_info = get_net_transport_info(skb, &hdr_offset); /* * Setup the sendside checksum offload only if this is not a * GSO packet. */ if ((net_trans_info & (INFO_TCP | INFO_UDP)) && skb_is_gso(skb)) { struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, TCP_LARGESEND_PKTINFO); lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + ppi->ppi_offset); lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (net_trans_info & (INFO_IPV4 << 16)) { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip_hdr(skb)->tot_len = 0; ip_hdr(skb)->check = 0; tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } else { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (net_trans_info & INFO_TCP) { rndis_msg_size += NDIS_CSUM_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, TCPIP_CHKSUM_PKTINFO); csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + ppi->ppi_offset); if (net_trans_info & (INFO_IPV4 << 16)) csum_info->transmit.is_ipv4 = 1; else csum_info->transmit.is_ipv6 = 1; csum_info->transmit.tcp_checksum = 1; csum_info->transmit.tcp_header_offset = hdr_offset; } else { /* UDP checksum (and other) offload is not supported. */ if (skb_checksum_help(skb)) goto drop; } } /* Start filling in the page buffers with the rndis hdr */ rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, packet, &pb); /* timestamp packet in software */ skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); if (likely(ret == 0)) { struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets++; tx_stats->bytes += skb_length; u64_stats_update_end(&tx_stats->syncp); return NETDEV_TX_OK; } if (ret == -EAGAIN) { ++net_device_ctx->eth_stats.tx_busy; return NETDEV_TX_BUSY; } if (ret == -ENOSPC) ++net_device_ctx->eth_stats.tx_no_space; drop: dev_kfree_skb_any(skb); net->stats.tx_dropped++; return NETDEV_TX_OK; no_memory: ++net_device_ctx->eth_stats.tx_no_memory; goto drop; }
int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbe_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbe_desc_unused(rx_ring); unsigned int xdp_res, xdp_xmit = 0; bool failure = false; struct sk_buff *skb; struct xdp_buff xdp; xdp.rxq = &rx_ring->xdp_rxq; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { failure = failure || !ixgbe_alloc_rx_buffers_fast_zc(rx_ring, cleaned_count); cleaned_count = 0; } rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.upper.length); if (!size) break; /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the * descriptor has been written back */ dma_rmb(); bi = ixgbe_get_rx_buffer_zc(rx_ring, size); if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) { struct ixgbe_rx_buffer *next_bi; ixgbe_reuse_rx_buffer_zc(rx_ring, bi); ixgbe_inc_ntc(rx_ring); next_bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; next_bi->skb = ERR_PTR(-EINVAL); continue; } if (unlikely(bi->skb)) { ixgbe_reuse_rx_buffer_zc(rx_ring, bi); ixgbe_inc_ntc(rx_ring); continue; } xdp.data = bi->addr; xdp.data_meta = xdp.data; xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; xdp.data_end = xdp.data + size; xdp.handle = bi->handle; xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, &xdp); if (xdp_res) { if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) { xdp_xmit |= xdp_res; bi->addr = NULL; bi->skb = NULL; } else { ixgbe_reuse_rx_buffer_zc(rx_ring, bi); } total_rx_packets++; total_rx_bytes += size; cleaned_count++; ixgbe_inc_ntc(rx_ring); continue; } /* XDP_PASS path */ skb = ixgbe_construct_skb_zc(rx_ring, bi, &xdp); if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } cleaned_count++; ixgbe_inc_ntc(rx_ring); if (eth_skb_pad(skb)) continue; total_rx_bytes += skb->len; total_rx_packets++; ixgbe_process_skb_fields(rx_ring, rx_desc, skb); ixgbe_rx_skb(q_vector, skb); } if (xdp_xmit & IXGBE_XDP_REDIR) xdp_do_flush_map(); if (xdp_xmit & IXGBE_XDP_TX) { struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. */ wmb(); writel(ring->next_to_use, ring->tail); } u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; u64_stats_update_end(&rx_ring->syncp); q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; return failure ? budget : (int)total_rx_packets; }
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *tx_ring, int napi_budget) { unsigned int total_packets = 0, total_bytes = 0; u32 i = tx_ring->next_to_clean, xsk_frames = 0; unsigned int budget = q_vector->tx.work_limit; struct xdp_umem *umem = tx_ring->xsk_umem; union ixgbe_adv_tx_desc *tx_desc; struct ixgbe_tx_buffer *tx_bi; bool xmit_done; tx_bi = &tx_ring->tx_buffer_info[i]; tx_desc = IXGBE_TX_DESC(tx_ring, i); i -= tx_ring->count; do { if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; total_bytes += tx_bi->bytecount; total_packets += tx_bi->gso_segs; if (tx_bi->xdpf) ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi); else xsk_frames++; tx_bi->xdpf = NULL; total_bytes += tx_bi->bytecount; tx_bi++; tx_desc++; i++; if (unlikely(!i)) { i -= tx_ring->count; tx_bi = tx_ring->tx_buffer_info; tx_desc = IXGBE_TX_DESC(tx_ring, 0); } /* issue prefetch for next Tx descriptor */ prefetch(tx_desc); /* update budget accounting */ budget--; } while (likely(budget)); i += tx_ring->count; tx_ring->next_to_clean = i; u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; tx_ring->stats.packets += total_packets; u64_stats_update_end(&tx_ring->syncp); q_vector->tx.total_bytes += total_bytes; q_vector->tx.total_packets += total_packets; if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); return budget > 0 && xmit_done; }
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { struct iphdr *ip4h = ip_hdr(skb); int ret = NET_XMIT_DROP; struct flowi4 fl4 = { /* needed to match OIF rule */ .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, .saddr = ip4h->saddr, }; struct net *net = dev_net(vrf_dev); struct rtable *rt; rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; skb_dst_drop(skb); /* if dst.dev is loopback or the VRF device again this is locally * originated traffic destined to a local address. Short circuit * to Rx path using our local dst */ if (rt->dst.dev == net->loopback_dev || rt->dst.dev == vrf_dev) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct rtable *rth_local; struct dst_entry *dst = NULL; ip_rt_put(rt); rcu_read_lock(); rth_local = rcu_dereference(vrf->rth_local); if (likely(rth_local)) { dst = &rth_local->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) goto err; return vrf_local_xmit(skb, vrf_dev, dst); } skb_dst_set(skb, &rt->dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); if (!ip4h->saddr) { ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0, RT_SCOPE_LINK); } ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; out: return ret; err: vrf_tx_error(vrf_dev, skb); goto out; } static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) { switch (skb->protocol) { case htons(ETH_P_IP): return vrf_process_v4_outbound(skb, dev); case htons(ETH_P_IPV6): return vrf_process_v6_outbound(skb, dev); default: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } } static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { netdev_tx_t ret = is_ip_tx_frame(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); dstats->tx_pkts++; dstats->tx_bytes += skb->len; u64_stats_update_end(&dstats->syncp); } else { this_cpu_inc(dev->dstats->tx_drps); } return ret; }
/* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst; struct geneve_dev *geneve = NULL; #ifdef HAVE_DEV_TSTATS struct pcpu_sw_netstats *stats; #endif struct iphdr *iph; u8 *vni; __be32 addr; int err; union { struct metadata_dst dst; char buf[sizeof(struct metadata_dst) + 256]; } buf; iph = ip_hdr(skb); /* outer IP header... */ if (gs->collect_md) { static u8 zero_vni[3]; vni = zero_vni; addr = 0; } else { vni = gnvh->vni; addr = iph->saddr; } geneve = geneve_lookup(gs, addr, vni); if (!geneve) goto drop; if (ip_tunnel_collect_metadata() || gs->collect_md) { __be16 flags; flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | (gnvh->oam ? TUNNEL_OAM : 0) | (gnvh->critical ? TUNNEL_CRIT_OPT : 0); tun_dst = &buf.dst; ovs_udp_tun_rx_dst(&tun_dst->u.tun_info, skb, AF_INET, flags, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); /* Update tunnel dst according to Geneve options. */ ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4); } else { /* Drop packets w/ critical options, * since we don't support any... */ tun_dst = NULL; if (gnvh->critical) goto drop; } skb_reset_mac_header(skb); skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); skb->protocol = eth_type_trans(skb, geneve->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); if (tun_dst) ovs_skb_dst_set(skb, &tun_dst->dst); else goto drop; /* Ignore packet loops (and multicast echo) */ if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) goto drop; skb_reset_network_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (err > 1) { ++geneve->dev->stats.rx_frame_errors; ++geneve->dev->stats.rx_errors; goto drop; } } #ifdef HAVE_DEV_TSTATS stats = this_cpu_ptr((struct pcpu_sw_netstats __percpu *)geneve->dev->tstats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; stats->rx_bytes += skb->len; u64_stats_update_end(&stats->syncp); #endif netdev_port_receive(skb, &tun_dst->u.tun_info); return; drop: /* Consume bad packet */ kfree_skb(skb); }
bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); if (!vlan_dev) { /* */ if (vlan_id && last_handler) skb->pkt_type = PACKET_OTHERHOST; return false; } skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return false; skb->dev = vlan_dev; if (skb->pkt_type == PACKET_OTHERHOST) { /* */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, vlan_dev->dev_addr)) skb->pkt_type = PACKET_HOST; } if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { unsigned int offset = skb->data - skb_mac_header(skb); /* */ skb_push(skb, offset); skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); skb_reset_mac_len(skb); } skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; u64_stats_update_end(&rx_stats->syncp); return true; }
/** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from * @budget: NAPI budget * * Note, this function is separated out from the napi poll function to * more cleanly separate packet receive code from other bookkeeping * functions performed in the napi poll function. * * There are differences between the NFP-3200 firmware and the * NFP-6000 firmware. The NFP-3200 firmware uses a dedicated RX queue * to indicate that new packets have arrived. The NFP-6000 does not * have this queue and uses the DD bit in the RX descriptor. This * method cannot be used on the NFP-3200 as it causes a race * condition: The RX ring write pointer on the NFP-3200 is updated * after packets (and descriptors) have been DMAed. If the DD bit is * used and subsequently the read pointer is updated this may lead to * the RX queue to underflow (if the firmware has not yet update the * write pointer). Therefore we use slightly ugly conditional code * below to handle the differences. We may, in the future update the * NFP-3200 firmware to behave the same as the firmware on the * NFP-6000. * * Return: Number of packets received. */ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) { struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; unsigned int data_len, meta_len; int avail = 0, pkts_polled = 0; struct sk_buff *skb, *new_skb; struct nfp_net_rx_desc *rxd; dma_addr_t new_dma_addr; u32 qcp_wr_p; int idx; if (nn->is_nfp3200) { /* Work out how many packets arrived */ qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); idx = rx_ring->rd_p % rx_ring->cnt; if (qcp_wr_p == idx) /* No new packets */ return 0; if (qcp_wr_p > idx) avail = qcp_wr_p - idx; else avail = qcp_wr_p + rx_ring->cnt - idx; } else { avail = budget + 1; } while (avail > 0 && pkts_polled < budget) { idx = rx_ring->rd_p % rx_ring->cnt; rxd = &rx_ring->rxds[idx]; if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) { if (nn->is_nfp3200) nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n", rx_ring->idx, idx, rxd->vals[0], rxd->vals[1]); break; } /* Memory barrier to ensure that we won't do other reads * before the DD bit. */ dma_rmb(); rx_ring->rd_p++; pkts_polled++; avail--; skb = rx_ring->rxbufs[idx].skb; new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr, nn->fl_bufsz); if (!new_skb) { nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb, rx_ring->rxbufs[idx].dma_addr); u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_drops++; u64_stats_update_end(&r_vec->rx_sync); continue; } dma_unmap_single(&nn->pdev->dev, rx_ring->rxbufs[idx].dma_addr, nn->fl_bufsz, DMA_FROM_DEVICE); nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); /* < meta_len > * <-- [rx_offset] --> * --------------------------------------------------------- * | [XX] | metadata | packet | XXXX | * --------------------------------------------------------- * <---------------- data_len ---------------> * * The rx_offset is fixed for all packets, the meta_len can vary * on a packet by packet basis. If rx_offset is set to zero * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the * buffer and is immediately followed by the packet (no [XX]). */ meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; data_len = le16_to_cpu(rxd->rxd.data_len); if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) skb_reserve(skb, meta_len); else skb_reserve(skb, nn->rx_offset); skb_put(skb, data_len - meta_len); nfp_net_set_hash(nn->netdev, skb, rxd); /* Pad small frames to minimum */ if (skb_put_padto(skb, 60)) break; /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; r_vec->rx_bytes += skb->len; u64_stats_update_end(&r_vec->rx_sync); skb_record_rx_queue(skb, rx_ring->idx); skb->protocol = eth_type_trans(skb, nn->netdev); nfp_net_rx_csum(nn, r_vec, rxd, skb); if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(rxd->rxd.vlan)); napi_gro_receive(&rx_ring->r_vec->napi, skb); } if (nn->is_nfp3200) nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled); return pkts_polled; }
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_netvsc_packet *packet = NULL; int ret; unsigned int num_data_pgs; struct rndis_message *rndis_msg; struct rndis_packet *rndis_pkt; u32 rndis_msg_size; bool isvlan; bool linear = false; struct rndis_per_packet_info *ppi; struct ndis_tcp_ip_checksum_info *csum_info; struct ndis_tcp_lso_info *lso_info; int hdr_offset; u32 net_trans_info; u32 hash; u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number * of pages in a single packet. If skb is scattered around * more pages we try linearizing it. */ check_size: skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) { net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", num_data_pgs, skb->len); ret = -EFAULT; goto drop; } else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { if (skb_linearize(skb)) { net_alert_ratelimited("failed to linearize skb\n"); ret = -ENOMEM; goto drop; } linear = true; goto check_size; } /* * Place the rndis header in the skb head room and * the skb->cb will be used for hv_netvsc_packet * structure. */ ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); if (ret) { netdev_err(net, "unable to alloc hv_netvsc_packet\n"); ret = -ENOMEM; goto drop; } /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > FIELD_SIZEOF(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; rndis_msg = (struct rndis_message *)skb->head; memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); isvlan = skb->vlan_tci & VLAN_TAG_PRESENT; /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; rndis_pkt = &rndis_msg->msg.pkt; rndis_pkt->data_offset = sizeof(struct rndis_packet); rndis_pkt->data_len = packet->total_data_buflen; rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); hash = skb_get_hash_raw(skb); if (hash != 0 && net->real_num_tx_queues > 1) { rndis_msg_size += NDIS_HASH_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, NBL_HASH_VALUE); *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; } if (isvlan) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, IEEE_8021Q_INFO); vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + ppi->ppi_offset); vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } net_trans_info = get_net_transport_info(skb, &hdr_offset); if (net_trans_info == TRANSPORT_INFO_NOT_IP) goto do_send; /* * Setup the sendside checksum offload only if this is not a * GSO packet. */ if (skb_is_gso(skb)) goto do_lso; if ((skb->ip_summed == CHECKSUM_NONE) || (skb->ip_summed == CHECKSUM_UNNECESSARY)) goto do_send; rndis_msg_size += NDIS_CSUM_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, TCPIP_CHKSUM_PKTINFO); csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + ppi->ppi_offset); if (net_trans_info & (INFO_IPV4 << 16)) csum_info->transmit.is_ipv4 = 1; else csum_info->transmit.is_ipv6 = 1; if (net_trans_info & INFO_TCP) { csum_info->transmit.tcp_checksum = 1; csum_info->transmit.tcp_header_offset = hdr_offset; } else if (net_trans_info & INFO_UDP) { /* UDP checksum offload is not supported on ws2008r2. * Furthermore, on ws2012 and ws2012r2, there are some * issues with udp checksum offload from Linux guests. * (these are host issues). * For now compute the checksum here. */ struct udphdr *uh; u16 udp_len; ret = skb_cow_head(skb, 0); if (ret) goto drop; uh = udp_hdr(skb); udp_len = ntohs(uh->len); uh->check = 0; uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, udp_len, IPPROTO_UDP, csum_partial(uh, udp_len, 0)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; csum_info->transmit.udp_checksum = 0; } goto do_send; do_lso: rndis_msg_size += NDIS_LSO_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, TCP_LARGESEND_PKTINFO); lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + ppi->ppi_offset); lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; if (net_trans_info & (INFO_IPV4 << 16)) { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip_hdr(skb)->tot_len = 0; ip_hdr(skb)->check = 0; tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } else { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; do_send: /* Start filling in the page buffers with the rndis hdr */ rndis_msg->msg_len += rndis_msg_size; packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, packet, &pb); /* timestamp packet in software */ skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); drop: if (ret == 0) { u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets++; tx_stats->bytes += skb_length; u64_stats_update_end(&tx_stats->syncp); } else { if (ret != -EAGAIN) { dev_kfree_skb_any(skb); net->stats.tx_dropped++; } } return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; }
/** * nfp_net_tx_complete() - Handled completed TX packets * @tx_ring: TX ring structure * * Return: Number of completed TX descriptors */ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; const struct skb_frag_struct *frag; struct netdev_queue *nd_q; u32 done_pkts = 0, done_bytes = 0; struct sk_buff *skb; int todo, nr_frags; u32 qcp_rd_p; int fidx; int idx; /* Work out how many descriptors have been transmitted */ qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); if (qcp_rd_p == tx_ring->qcp_rd_p) return; if (qcp_rd_p > tx_ring->qcp_rd_p) todo = qcp_rd_p - tx_ring->qcp_rd_p; else todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; while (todo--) { idx = tx_ring->rd_p % tx_ring->cnt; tx_ring->rd_p++; skb = tx_ring->txbufs[idx].skb; if (!skb) continue; nr_frags = skb_shinfo(skb)->nr_frags; fidx = tx_ring->txbufs[idx].fidx; if (fidx == -1) { /* unmap head */ dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[idx].dma_addr, skb_headlen(skb), DMA_TO_DEVICE); done_pkts += tx_ring->txbufs[idx].pkt_cnt; done_bytes += tx_ring->txbufs[idx].real_len; } else { /* unmap fragment */ frag = &skb_shinfo(skb)->frags[fidx]; dma_unmap_page(&nn->pdev->dev, tx_ring->txbufs[idx].dma_addr, skb_frag_size(frag), DMA_TO_DEVICE); } /* check for last gather fragment */ if (fidx == nr_frags - 1) dev_kfree_skb_any(skb); tx_ring->txbufs[idx].dma_addr = 0; tx_ring->txbufs[idx].skb = NULL; tx_ring->txbufs[idx].fidx = -2; } tx_ring->qcp_rd_p = qcp_rd_p; u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_bytes += done_bytes; r_vec->tx_pkts += done_pkts; u64_stats_update_end(&r_vec->tx_sync); nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); if (nfp_net_tx_ring_should_wake(tx_ring)) { /* Make sure TX thread will see updated tx_ring->rd_p */ smp_mb(); if (unlikely(netif_tx_queue_stopped(nd_q))) netif_tx_wake_queue(nd_q); } WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); }
/** * nfp_net_tx() - Main transmit entry point * @skb: SKB to transmit * @netdev: netdev structure * * Return: NETDEV_TX_OK on success. */ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); const struct skb_frag_struct *frag; struct nfp_net_r_vector *r_vec; struct nfp_net_tx_desc *txd, txdg; struct nfp_net_tx_buf *txbuf; struct nfp_net_tx_ring *tx_ring; struct netdev_queue *nd_q; dma_addr_t dma_addr; unsigned int fsize; int f, nr_frags; int wr_idx; u16 qidx; qidx = skb_get_queue_mapping(skb); tx_ring = &nn->tx_rings[qidx]; r_vec = tx_ring->r_vec; nd_q = netdev_get_tx_queue(nn->netdev, qidx); nr_frags = skb_shinfo(skb)->nr_frags; if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n", qidx, tx_ring->wr_p, tx_ring->rd_p); netif_tx_stop_queue(nd_q); u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_busy++; u64_stats_update_end(&r_vec->tx_sync); return NETDEV_TX_BUSY; } /* Start with the head skbuf */ dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_free; wr_idx = tx_ring->wr_p % tx_ring->cnt; /* Stash the soft descriptor of the head then initialize it */ txbuf = &tx_ring->txbufs[wr_idx]; txbuf->skb = skb; txbuf->dma_addr = dma_addr; txbuf->fidx = -1; txbuf->pkt_cnt = 1; txbuf->real_len = skb->len; /* Build TX descriptor */ txd = &tx_ring->txds[wr_idx]; txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0; txd->dma_len = cpu_to_le16(skb_headlen(skb)); nfp_desc_set_dma_addr(txd, dma_addr); txd->data_len = cpu_to_le16(skb->len); txd->flags = 0; txd->mss = 0; txd->l4_offset = 0; nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb); nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb); if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { txd->flags |= PCIE_DESC_TX_VLAN; txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); } /* Gather DMA */ if (nr_frags > 0) { /* all descs must match except for in addr, length and eop */ txdg = *txd; for (f = 0; f < nr_frags; f++) { frag = &skb_shinfo(skb)->frags[f]; fsize = skb_frag_size(frag); dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0, fsize, DMA_TO_DEVICE); if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_unmap; wr_idx = (wr_idx + 1) % tx_ring->cnt; tx_ring->txbufs[wr_idx].skb = skb; tx_ring->txbufs[wr_idx].dma_addr = dma_addr; tx_ring->txbufs[wr_idx].fidx = f; txd = &tx_ring->txds[wr_idx]; *txd = txdg; txd->dma_len = cpu_to_le16(fsize); nfp_desc_set_dma_addr(txd, dma_addr); txd->offset_eop = (f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0; } u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_gather++; u64_stats_update_end(&r_vec->tx_sync); } netdev_tx_sent_queue(nd_q, txbuf->real_len); tx_ring->wr_p += nr_frags + 1; if (nfp_net_tx_ring_should_stop(tx_ring)) nfp_net_tx_ring_stop(nd_q, tx_ring); tx_ring->wr_ptr_add += nr_frags + 1; if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { /* force memory write before we let HW know */ wmb(); nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); tx_ring->wr_ptr_add = 0; } skb_tx_timestamp(skb); return NETDEV_TX_OK; err_unmap: --f; while (f >= 0) { frag = &skb_shinfo(skb)->frags[f]; dma_unmap_page(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, skb_frag_size(frag), DMA_TO_DEVICE); tx_ring->txbufs[wr_idx].skb = NULL; tx_ring->txbufs[wr_idx].dma_addr = 0; tx_ring->txbufs[wr_idx].fidx = -2; wr_idx = wr_idx - 1; if (wr_idx < 0) wr_idx += tx_ring->cnt; } dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, skb_headlen(skb), DMA_TO_DEVICE); tx_ring->txbufs[wr_idx].skb = NULL; tx_ring->txbufs[wr_idx].dma_addr = 0; tx_ring->txbufs[wr_idx].fidx = -2; err_free: nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n"); u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_errors++; u64_stats_update_end(&r_vec->tx_sync); dev_kfree_skb_any(skb); return NETDEV_TX_OK; }
/* * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ int netvsc_recv_callback(struct hv_device *device_obj, struct hv_netvsc_packet *packet, void **data, struct ndis_tcp_ip_checksum_info *csum_info, struct vmbus_channel *channel, u16 vlan_tci) { struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); struct sk_buff *skb; struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; struct netvsc_device *netvsc_dev = net_device_ctx->nvdev; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; if (READ_ONCE(netvsc_dev->vf_inject)) { atomic_inc(&netvsc_dev->vf_use_cnt); if (!READ_ONCE(netvsc_dev->vf_inject)) { /* * We raced; just move on. */ atomic_dec(&netvsc_dev->vf_use_cnt); goto vf_injection_done; } /* * Inject this packet into the VF inerface. * On Hyper-V, multicast and brodcast packets * are only delivered on the synthetic interface * (after subjecting these to policy filters on * the host). Deliver these via the VF interface * in the guest. */ vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet, csum_info, *data, vlan_tci); if (vf_skb != NULL) { ++netvsc_dev->vf_netdev->stats.rx_packets; netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; ret = NVSP_STAT_FAIL; } atomic_dec(&netvsc_dev->vf_use_cnt); return ret; } vf_injection_done: net_device_ctx = netdev_priv(net); rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); if (unlikely(!skb)) { ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } skb_record_rx_queue(skb, channel-> offermsg.offer.sub_channel_index); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; u64_stats_update_end(&rx_stats->syncp); /* * Pass the skb back up. Network stack will deallocate the skb when it * is done. * TODO - use NAPI? */ netif_rx(skb); return 0; }
/* * Determine the packet's protocol ID. The rule here is that we * assume 802.3 if the type field is short enough to be a length. * This is normal practice and works for any 'now in use' protocol. * * Also, at this point we assume that we ARE dealing exclusively with * VLAN packets, or packets that should be made into VLAN packets based * on a default VLAN ID. * * NOTE: Should be similar to ethernet/eth.c. * * SANITY NOTE: This method is called when a packet is moving up the stack * towards userland. To get here, it would have already passed * through the ethernet/eth.c eth_type_trans() method. * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be * stored UNALIGNED in the memory. RISC systems don't like * such cases very much... * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be * aligned, so there doesn't need to be any of the unaligned * stuff. It has been commented out now... --Ben * */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct vlan_hdr *vhdr; struct vlan_pcpu_stats *rx_stats; struct net_device *vlan_dev; u16 vlan_id; u16 vlan_tci; skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) goto err_free; if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) goto err_free; vhdr = (struct vlan_hdr *)skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); vlan_id = vlan_tci & VLAN_VID_MASK; rcu_read_lock(); vlan_dev = vlan_find_dev(dev, vlan_id); /* If the VLAN device is defined, we use it. * If not, and the VID is 0, it is a 802.1p packet (not * really a VLAN), so we will just netif_rx it later to the * original interface, but with the skb->proto set to the * wrapped proto: we do nothing here. */ if (!vlan_dev) { if (vlan_id) { pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", __func__, vlan_id, dev->name); goto err_unlock; } rx_stats = NULL; } else { skb->dev = vlan_dev; rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); pr_debug("%s: priority: %u for TCI: %hu\n", __func__, skb->priority, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ /* stats->broadcast ++; // no such counter :-( */ break; case PACKET_MULTICAST: rx_stats->rx_multicast++; break; case PACKET_OTHERHOST: /* Our lower layer thinks this is not local, let's make * sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, skb->dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; default: break; } u64_stats_update_end(&rx_stats->syncp); } skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); if (vlan_dev) { skb = vlan_check_reorder_header(skb); if (!skb) { rx_stats->rx_errors++; goto err_unlock; } } netif_rx(skb); rcu_read_unlock(); return NET_RX_SUCCESS; err_unlock: rcu_read_unlock(); err_free: atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; }
bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); if (!vlan_dev) { /* Only the last call to vlan_do_receive() should change * pkt_type to PACKET_OTHERHOST */ if (vlan_id && last_handler) skb->pkt_type = PACKET_OTHERHOST; return false; } skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return false; skb->dev = vlan_dev; if (skb->pkt_type == PACKET_OTHERHOST) { /* Our lower layer thinks this is not local, let's make sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, vlan_dev->dev_addr)) skb->pkt_type = PACKET_HOST; } if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { unsigned int offset = skb->data - skb_mac_header(skb); /* * vlan_insert_tag expect skb->data pointing to mac header. * So change skb->data before calling it and change back to * original position later */ skb_push(skb, offset); skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); skb_reset_mac_len(skb); } skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; u64_stats_update_end(&rx_stats->syncp); return true; }
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, bool log_ecn_error) { struct pcpu_tstats *tstats; const struct iphdr *iph = ip_hdr(skb); int err; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ if (rt_is_output_route(skb_rtable(skb))) goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { tunnel->dev->stats.rx_crc_errors++; tunnel->dev->stats.rx_errors++; goto drop; } if (tunnel->parms.i_flags&TUNNEL_SEQ) { if (!(tpi->flags&TUNNEL_SEQ) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { tunnel->dev->stats.rx_fifo_errors++; tunnel->dev->stats.rx_errors++; goto drop; } tunnel->i_seqno = ntohl(tpi->seq) + 1; } err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", &iph->saddr, iph->tos); if (err > 1) { ++tunnel->dev->stats.rx_frame_errors; ++tunnel->dev->stats.rx_errors; goto drop; } } tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); if (tunnel->net != dev_net(tunnel->dev)) skb_scrub_packet(skb); if (tunnel->dev->type == ARPHRD_ETHER) { skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; } gro_cells_receive(&tunnel->gro_cells, skb); return 0; drop: kfree_skb(skb); return 0; }
static void ri_tasklet(unsigned long dev) { struct net_device *_dev = (struct net_device *)dev; struct ifb_private *dp = netdev_priv(_dev); struct netdev_queue *txq; struct sk_buff *skb; txq = netdev_get_tx_queue(_dev, 0); if ((skb = skb_peek(&dp->tq)) == NULL) { if (__netif_tx_trylock(txq)) { skb_queue_splice_tail_init(&dp->rq, &dp->tq); __netif_tx_unlock(txq); } else { /* reschedule */ goto resched; } } while ((skb = __skb_dequeue(&dp->tq)) != NULL) { u32 from = G_TC_FROM(skb->tc_verd); skb->tc_verd = 0; skb->tc_verd = SET_TC_NCLS(skb->tc_verd); u64_stats_update_begin(&dp->tsync); dp->tx_packets++; dp->tx_bytes += skb->len; u64_stats_update_end(&dp->tsync); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(dev_net(_dev), skb->skb_iif); if (!skb->dev) { rcu_read_unlock(); dev_kfree_skb(skb); _dev->stats.tx_dropped++; if (skb_queue_len(&dp->tq) != 0) goto resched; break; } rcu_read_unlock(); skb->skb_iif = _dev->ifindex; if (from & AT_EGRESS) { dev_queue_xmit(skb); } else if (from & AT_INGRESS) { skb_pull(skb, skb->dev->hard_header_len); netif_receive_skb(skb); } else BUG(); } if (__netif_tx_trylock(txq)) { if ((skb = skb_peek(&dp->rq)) == NULL) { dp->tasklet_pending = 0; if (netif_queue_stopped(_dev)) netif_wake_queue(_dev); } else { __netif_tx_unlock(txq); goto resched; } __netif_tx_unlock(txq); } else { resched: dp->tasklet_pending = 1; tasklet_schedule(&dp->ifb_tasklet); } }