Exemple #1
0
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
 */
int netvsc_recv_callback(struct net_device *net,
			 struct vmbus_channel *channel,
			 void  *data, u32 len,
			 const struct ndis_tcp_ip_checksum_info *csum_info,
			 const struct ndis_pkt_8021q_info *vlan)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct netvsc_device *net_device;
	u16 q_idx = channel->offermsg.offer.sub_channel_index;
	struct netvsc_channel *nvchan;
	struct net_device *vf_netdev;
	struct sk_buff *skb;
	struct netvsc_stats *rx_stats;

	if (net->reg_state != NETREG_REGISTERED)
		return NVSP_STAT_FAIL;

	/*
	 * If necessary, inject this packet into the VF interface.
	 * On Hyper-V, multicast and brodcast packets are only delivered
	 * to the synthetic interface (after subjecting these to
	 * policy filters on the host). Deliver these via the VF
	 * interface in the guest.
	 */
	rcu_read_lock();
	net_device = rcu_dereference(net_device_ctx->nvdev);
	if (unlikely(!net_device))
		goto drop;

	nvchan = &net_device->chan_table[q_idx];
	vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
	if (vf_netdev && (vf_netdev->flags & IFF_UP))
		net = vf_netdev;

	/* Allocate a skb - TODO direct I/O to pages? */
	skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
				    csum_info, vlan, data, len);
	if (unlikely(!skb)) {
drop:
		++net->stats.rx_dropped;
		rcu_read_unlock();
		return NVSP_STAT_FAIL;
	}

	if (net != vf_netdev)
		skb_record_rx_queue(skb, q_idx);

	/*
	 * Even if injecting the packet, record the statistics
	 * on the synthetic device because modifying the VF device
	 * statistics will not work correctly.
	 */
	rx_stats = &nvchan->rx_stats;
	u64_stats_update_begin(&rx_stats->syncp);
	rx_stats->packets++;
	rx_stats->bytes += len;

	if (skb->pkt_type == PACKET_BROADCAST)
		++rx_stats->broadcast;
	else if (skb->pkt_type == PACKET_MULTICAST)
		++rx_stats->multicast;
	u64_stats_update_end(&rx_stats->syncp);

	napi_gro_receive(&nvchan->napi, skb);
	rcu_read_unlock();

	return 0;
}
Exemple #2
0
static void ri_tasklet(unsigned long dev)
{
	struct net_device *_dev = (struct net_device *)dev;
	struct ifb_private *dp = netdev_priv(_dev);
	struct netdev_queue *txq;
	struct sk_buff *skb;

	txq = netdev_get_tx_queue(_dev, 0);
	if ((skb = skb_peek(&dp->tq)) == NULL) {
		if (__netif_tx_trylock(txq)) {
			skb_queue_splice_tail_init(&dp->rq, &dp->tq);
			__netif_tx_unlock(txq);
		} else {
			/* reschedule */
			goto resched;
		}
	}

	while ((skb = __skb_dequeue(&dp->tq)) != NULL) {
		u32 from = G_TC_FROM(skb->tc_verd);

		skb->tc_verd = 0;
		skb->tc_verd = SET_TC_NCLS(skb->tc_verd);

		u64_stats_update_begin(&dp->tsync);
		dp->tx_packets++;
		dp->tx_bytes += skb->len;
		u64_stats_update_end(&dp->tsync);

		rcu_read_lock();
		skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif);
		if (!skb->dev) {
			rcu_read_unlock();
			dev_kfree_skb(skb);
			_dev->stats.tx_dropped++;
			if (skb_queue_len(&dp->tq) != 0)
				goto resched;
			break;
		}
		rcu_read_unlock();
		skb->skb_iif = _dev->ifindex;

		if (from & AT_EGRESS) {
			dev_queue_xmit(skb);
		} else if (from & AT_INGRESS) {
			skb_pull(skb, skb->dev->hard_header_len);
			netif_receive_skb(skb);
		} else
			BUG();
	}

	if (__netif_tx_trylock(txq)) {
		if ((skb = skb_peek(&dp->rq)) == NULL) {
			dp->tasklet_pending = 0;
			if (netif_queue_stopped(_dev))
				netif_wake_queue(_dev);
		} else {
			__netif_tx_unlock(txq);
			goto resched;
		}
		__netif_tx_unlock(txq);
	} else {
resched:
		dp->tasklet_pending = 1;
		tasklet_schedule(&dp->ifb_tasklet);
	}

}
Exemple #3
0
bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
{
	struct sk_buff *skb = *skbp;
	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
	struct net_device *vlan_dev;
	struct vlan_pcpu_stats *rx_stats;

	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
	if (!vlan_dev) {
		/* Only the last call to vlan_do_receive() should change
		 * pkt_type to PACKET_OTHERHOST
		 */
		if (vlan_id && last_handler)
			skb->pkt_type = PACKET_OTHERHOST;
		return false;
	}

	skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
	if (unlikely(!skb))
		return false;

	skb->dev = vlan_dev;
	if (skb->pkt_type == PACKET_OTHERHOST) {
		/* Our lower layer thinks this is not local, let's make sure.
		 * This allows the VLAN to have a different MAC than the
		 * underlying device, and still route correctly. */
		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
					vlan_dev->dev_addr))
			skb->pkt_type = PACKET_HOST;
	}

	if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
		unsigned int offset = skb->data - skb_mac_header(skb);

		/*
		 * vlan_insert_tag expect skb->data pointing to mac header.
		 * So change skb->data before calling it and change back to
		 * original position later
		 */
		skb_push(skb, offset);
		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci);
		if (!skb)
			return false;
		skb_pull(skb, offset + VLAN_HLEN);
		skb_reset_mac_len(skb);
	}

	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
	skb->vlan_tci = 0;

	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);

	u64_stats_update_begin(&rx_stats->syncp);
	rx_stats->rx_packets++;
	rx_stats->rx_bytes += skb->len;
	if (skb->pkt_type == PACKET_MULTICAST)
		rx_stats->rx_multicast++;
	u64_stats_update_end(&rx_stats->syncp);

	return true;
}
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
		  const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error)
{
	struct pcpu_tstats *tstats;
	const struct iphdr *iph = ip_hdr(skb);
	int err;

	secpath_reset(skb);

	skb->protocol = tpi->proto;

	skb->mac_header = skb->network_header;
	__pskb_pull(skb, hdr_len);
	skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
#ifdef CONFIG_NET_IPGRE_BROADCAST
	if (ipv4_is_multicast(iph->daddr)) {
		/* Looped back packet, drop it! */
		if (rt_is_output_route(skb_rtable(skb)))
			goto drop;
		tunnel->dev->stats.multicast++;
		skb->pkt_type = PACKET_BROADCAST;
	}
#endif

	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
		tunnel->dev->stats.rx_crc_errors++;
		tunnel->dev->stats.rx_errors++;
		goto drop;
	}

	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
		if (!(tpi->flags&TUNNEL_SEQ) ||
		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
			tunnel->dev->stats.rx_fifo_errors++;
			tunnel->dev->stats.rx_errors++;
			goto drop;
		}
		tunnel->i_seqno = ntohl(tpi->seq) + 1;
	}

	/* Warning: All skb pointers will be invalidated! */
	if (tunnel->dev->type == ARPHRD_ETHER) {
		if (!pskb_may_pull(skb, ETH_HLEN)) {
			tunnel->dev->stats.rx_length_errors++;
			tunnel->dev->stats.rx_errors++;
			goto drop;
		}

		iph = ip_hdr(skb);
		skb->protocol = eth_type_trans(skb, tunnel->dev);
		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
	}

	skb->pkt_type = PACKET_HOST;
	__skb_tunnel_rx(skb, tunnel->dev);

	skb_reset_network_header(skb);
	err = IP_ECN_decapsulate(iph, skb);
	if (unlikely(err)) {
		if (log_ecn_error)
			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
					&iph->saddr, iph->tos);
		if (err > 1) {
			++tunnel->dev->stats.rx_frame_errors;
			++tunnel->dev->stats.rx_errors;
			goto drop;
		}
	}

	tstats = this_cpu_ptr(tunnel->dev->tstats);
	u64_stats_update_begin(&tstats->syncp);
	tstats->rx_packets++;
	tstats->rx_bytes += skb->len;
	u64_stats_update_end(&tstats->syncp);

	gro_cells_receive(&tunnel->gro_cells, skb);
	return 0;

drop:
	kfree_skb(skb);
	return 0;
}
static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
				      struct net_device *dev)
{
	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
	struct mlxsw_sx_port_pcpu_stats *pcpu_stats;
	const struct mlxsw_tx_info tx_info = {
		.local_port = mlxsw_sx_port->local_port,
		.is_emad = false,
	};
	u64 len;
	int err;

	if (mlxsw_core_skb_transmit_busy(mlxsw_sx, &tx_info))
		return NETDEV_TX_BUSY;

	if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) {
		struct sk_buff *skb_orig = skb;

		skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN);
		if (!skb) {
			this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped);
			dev_kfree_skb_any(skb_orig);
			return NETDEV_TX_OK;
		}
	}
	mlxsw_sx_txhdr_construct(skb, &tx_info);
	len = skb->len;
	/* Due to a race we might fail here because of a full queue. In that
	 * unlikely case we simply drop the packet.
	 */
	err = mlxsw_core_skb_transmit(mlxsw_sx, skb, &tx_info);

	if (!err) {
		pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats);
		u64_stats_update_begin(&pcpu_stats->syncp);
		pcpu_stats->tx_packets++;
		pcpu_stats->tx_bytes += len;
		u64_stats_update_end(&pcpu_stats->syncp);
	} else {
		this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped);
		dev_kfree_skb_any(skb);
	}
	return NETDEV_TX_OK;
}

static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu)
{
	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
	int err;

	err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
	if (err)
		return err;
	dev->mtu = mtu;
	return 0;
}

static struct rtnl_link_stats64 *
mlxsw_sx_port_get_stats64(struct net_device *dev,
			  struct rtnl_link_stats64 *stats)
{
	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
	struct mlxsw_sx_port_pcpu_stats *p;
	u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
	u32 tx_dropped = 0;
	unsigned int start;
	int i;

	for_each_possible_cpu(i) {
		p = per_cpu_ptr(mlxsw_sx_port->pcpu_stats, i);
		do {
			start = u64_stats_fetch_begin_irq(&p->syncp);
			rx_packets	= p->rx_packets;
			rx_bytes	= p->rx_bytes;
			tx_packets	= p->tx_packets;
			tx_bytes	= p->tx_bytes;
		} while (u64_stats_fetch_retry_irq(&p->syncp, start));

		stats->rx_packets	+= rx_packets;
		stats->rx_bytes		+= rx_bytes;
		stats->tx_packets	+= tx_packets;
		stats->tx_bytes		+= tx_bytes;
		/* tx_dropped is u32, updated without syncp protection. */
		tx_dropped	+= p->tx_dropped;
	}
	stats->tx_dropped	= tx_dropped;
	return stats;
}

static const struct net_device_ops mlxsw_sx_port_netdev_ops = {
	.ndo_open		= mlxsw_sx_port_open,
	.ndo_stop		= mlxsw_sx_port_stop,
	.ndo_start_xmit		= mlxsw_sx_port_xmit,
	.ndo_change_mtu		= mlxsw_sx_port_change_mtu,
	.ndo_get_stats64	= mlxsw_sx_port_get_stats64,
};

static void mlxsw_sx_port_get_drvinfo(struct net_device *dev,
				      struct ethtool_drvinfo *drvinfo)
{
	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;

	strlcpy(drvinfo->driver, mlxsw_sx_driver_name, sizeof(drvinfo->driver));
	strlcpy(drvinfo->version, mlxsw_sx_driver_version,
		sizeof(drvinfo->version));
	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
		 "%d.%d.%d",
		 mlxsw_sx->bus_info->fw_rev.major,
		 mlxsw_sx->bus_info->fw_rev.minor,
		 mlxsw_sx->bus_info->fw_rev.subminor);
	strlcpy(drvinfo->bus_info, mlxsw_sx->bus_info->device_name,
		sizeof(drvinfo->bus_info));
}

struct mlxsw_sx_port_hw_stats {
	char str[ETH_GSTRING_LEN];
	u64 (*getter)(char *payload);
};

static const struct mlxsw_sx_port_hw_stats mlxsw_sx_port_hw_stats[] = {
	{
		.str = "a_frames_transmitted_ok",
		.getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get,
	},
	{
		.str = "a_frames_received_ok",
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
 */
int netvsc_recv_callback(struct hv_device *device_obj,
				struct hv_netvsc_packet *packet,
				struct ndis_tcp_ip_checksum_info *csum_info)
{
	struct net_device *net;
	struct net_device_context *net_device_ctx;
	struct sk_buff *skb;
	struct netvsc_stats *rx_stats;

	net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
	if (!net || net->reg_state != NETREG_REGISTERED) {
		packet->status = NVSP_STAT_FAIL;
		return 0;
	}
	net_device_ctx = netdev_priv(net);
	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);

	/* Allocate a skb - TODO direct I/O to pages? */
	skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
		packet->status = NVSP_STAT_FAIL;
		return 0;
	}

	/*
	 * Copy to skb. This copy is needed here since the memory pointed by
	 * hv_netvsc_packet cannot be deallocated
	 */
	memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
		packet->total_data_buflen);

	skb->protocol = eth_type_trans(skb, net);
	if (csum_info) {
		/* We only look at the IP checksum here.
		 * Should we be dropping the packet if checksum
		 * failed? How do we deal with other checksums - TCP/UDP?
		 */
		if (csum_info->receive.ip_checksum_succeeded)
			skb->ip_summed = CHECKSUM_UNNECESSARY;
		else
			skb->ip_summed = CHECKSUM_NONE;
	}

	if (packet->vlan_tci & VLAN_TAG_PRESENT)
		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
				       packet->vlan_tci);

	skb_record_rx_queue(skb, packet->channel->
			    offermsg.offer.sub_channel_index);

	u64_stats_update_begin(&rx_stats->syncp);
	rx_stats->packets++;
	rx_stats->bytes += packet->total_data_buflen;
	u64_stats_update_end(&rx_stats->syncp);

	/*
	 * Pass the skb back up. Network stack will deallocate the skb when it
	 * is done.
	 * TODO - use NAPI?
	 */
	netif_rx(skb);

	return 0;
}
Exemple #7
0
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct hv_netvsc_packet *packet = NULL;
	int ret;
	unsigned int num_data_pgs;
	struct rndis_message *rndis_msg;
	struct rndis_packet *rndis_pkt;
	u32 rndis_msg_size;
	struct rndis_per_packet_info *ppi;
	u32 hash;
	u32 skb_length;
	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
	struct hv_page_buffer *pb = page_buf;

	/* We will atmost need two pages to describe the rndis
	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
	 * of pages in a single packet. If skb is scattered around
	 * more pages we try linearizing it.
	 */

	skb_length = skb->len;
	num_data_pgs = netvsc_get_slots(skb) + 2;

	if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
		++net_device_ctx->eth_stats.tx_scattered;

		if (skb_linearize(skb))
			goto no_memory;

		num_data_pgs = netvsc_get_slots(skb) + 2;
		if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
			++net_device_ctx->eth_stats.tx_too_big;
			goto drop;
		}
	}

	/*
	 * Place the rndis header in the skb head room and
	 * the skb->cb will be used for hv_netvsc_packet
	 * structure.
	 */
	ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
	if (ret)
		goto no_memory;

	/* Use the skb control buffer for building up the packet */
	BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
			FIELD_SIZEOF(struct sk_buff, cb));
	packet = (struct hv_netvsc_packet *)skb->cb;

	/* TODO: This will likely evaluate to false, since RH7 and
	 * below kernels will set next pointer to NULL before calling
	 * into here. Should find another way to set this flag.
	 */
	packet->xmit_more = (skb->next != NULL);
	
	packet->q_idx = skb_get_queue_mapping(skb);

	packet->total_data_buflen = skb->len;

	rndis_msg = (struct rndis_message *)skb->head;

	memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);

	packet->send_completion_ctx = packet;

	/* Add the rndis header */
	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
	rndis_msg->msg_len = packet->total_data_buflen;
	rndis_pkt = &rndis_msg->msg.pkt;
	rndis_pkt->data_offset = sizeof(struct rndis_packet);
	rndis_pkt->data_len = packet->total_data_buflen;
	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);

	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);

#ifdef NOTYET
	// Divergence from upstream commit:
	// 307f099520b66504cf6c5638f3f404c48b9fb45b
	hash = skb_get_hash_raw(skb);
#endif
	hash = skb_get_hash(skb);
	if (hash != 0 && net->real_num_tx_queues > 1) {
		rndis_msg_size += NDIS_HASH_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
				    NBL_HASH_VALUE);
		*(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
	}

	if (skb_vlan_tag_present(skb)) {
		struct ndis_pkt_8021q_info *vlan;

		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
					IEEE_8021Q_INFO);
		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
						ppi->ppi_offset);
		vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
		vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
				VLAN_PRIO_SHIFT;
	}

	if (skb_is_gso(skb)) {
		struct ndis_tcp_lso_info *lso_info;

		rndis_msg_size += NDIS_LSO_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
				    TCP_LARGESEND_PKTINFO);

		lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
							ppi->ppi_offset);

		lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
		if (skb->protocol == htons(ETH_P_IP)) {
			lso_info->lso_v2_transmit.ip_version =
				NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
			ip_hdr(skb)->tot_len = 0;
			ip_hdr(skb)->check = 0;
			tcp_hdr(skb)->check =
				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
						   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
		} else {
			lso_info->lso_v2_transmit.ip_version =
				NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
			ipv6_hdr(skb)->payload_len = 0;
			tcp_hdr(skb)->check =
				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
						 &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
		}
		lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb);
		lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;

	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
		if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) {
			struct ndis_tcp_ip_checksum_info *csum_info;

			rndis_msg_size += NDIS_CSUM_PPI_SIZE;
			ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
					    TCPIP_CHKSUM_PKTINFO);

			csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
									 ppi->ppi_offset);

			csum_info->transmit.tcp_header_offset = skb_transport_offset(skb);

			if (skb->protocol == htons(ETH_P_IP)) {
				csum_info->transmit.is_ipv4 = 1;
				if (ip_hdr(skb)->protocol == IPPROTO_TCP)
					csum_info->transmit.tcp_checksum = 1;
				else
					csum_info->transmit.udp_checksum = 1;
			} else {
				csum_info->transmit.is_ipv6 = 1;

				if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
					csum_info->transmit.tcp_checksum = 1;
				else
					csum_info->transmit.udp_checksum = 1;
			}
		} else {
			/* Can't do offload of this type of checksum */
			if (skb_checksum_help(skb))
				goto drop;
		}
	}

	/* Start filling in the page buffers with the rndis hdr */
	rndis_msg->msg_len += rndis_msg_size;
	packet->total_data_buflen = rndis_msg->msg_len;
	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
					       skb, packet, &pb);

	/* timestamp packet in software */
	skb_tx_timestamp(skb);

	ret = netvsc_send(net_device_ctx->device_ctx, packet,
			  rndis_msg, &pb, skb);

	if (likely(ret == 0)) {
		struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);

		u64_stats_update_begin(&tx_stats->syncp);
		tx_stats->packets++;
		tx_stats->bytes += skb_length;
		u64_stats_update_end(&tx_stats->syncp);
		return NETDEV_TX_OK;
	}

	if (ret == -EAGAIN) {
		++net_device_ctx->eth_stats.tx_busy;
		return NETDEV_TX_BUSY;
	}

	if (ret == -ENOSPC)
		++net_device_ctx->eth_stats.tx_no_space;

drop:
	dev_kfree_skb_any(skb);
	net->stats.tx_dropped++;

	return NETDEV_TX_OK;

no_memory:
	++net_device_ctx->eth_stats.tx_no_memory;
	goto drop;
}
Exemple #8
0
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
 */
int netvsc_recv_callback(struct hv_device *device_obj,
				struct hv_netvsc_packet *packet,
				void **data,
				struct ndis_tcp_ip_checksum_info *csum_info,
				struct vmbus_channel *channel,
				u16 vlan_tci)
{
	struct net_device *net = hv_get_drvdata(device_obj);
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct sk_buff *skb;
	struct sk_buff *vf_skb;
	struct netvsc_stats *rx_stats;
	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
	u32 bytes_recvd = packet->total_data_buflen;
	int ret = 0;

	if (!net || net->reg_state != NETREG_REGISTERED)
		return NVSP_STAT_FAIL;

	if (READ_ONCE(netvsc_dev->vf_inject)) {
		atomic_inc(&netvsc_dev->vf_use_cnt);
		if (!READ_ONCE(netvsc_dev->vf_inject)) {
			/*
			 * We raced; just move on.
			 */
			atomic_dec(&netvsc_dev->vf_use_cnt);
			goto vf_injection_done;
		}

		/*
		 * Inject this packet into the VF inerface.
		 * On Hyper-V, multicast and brodcast packets
		 * are only delivered on the synthetic interface
		 * (after subjecting these to policy filters on
		 * the host). Deliver these via the VF interface
		 * in the guest.
		 */
		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
					       csum_info, *data, vlan_tci);
		if (vf_skb != NULL) {
			++netvsc_dev->vf_netdev->stats.rx_packets;
			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
			netif_receive_skb(vf_skb);
		} else {
			++net->stats.rx_dropped;
			ret = NVSP_STAT_FAIL;
		}
		atomic_dec(&netvsc_dev->vf_use_cnt);
		return ret;
	}

vf_injection_done:
	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);

	/* Allocate a skb - TODO direct I/O to pages? */
	skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
		return NVSP_STAT_FAIL;
	}
	skb_record_rx_queue(skb, channel->
			    offermsg.offer.sub_channel_index);

	u64_stats_update_begin(&rx_stats->syncp);
	rx_stats->packets++;
	rx_stats->bytes += packet->total_data_buflen;
	u64_stats_update_end(&rx_stats->syncp);

	/*
	 * Pass the skb back up. Network stack will deallocate the skb when it
	 * is done.
	 * TODO - use NAPI?
	 */
	netif_rx(skb);

	return 0;
}
Exemple #9
0
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
					   struct net_device *vrf_dev)
{
	struct iphdr *ip4h = ip_hdr(skb);
	int ret = NET_XMIT_DROP;
	struct flowi4 fl4 = {
		/* needed to match OIF rule */
		.flowi4_oif = vrf_dev->ifindex,
		.flowi4_iif = LOOPBACK_IFINDEX,
		.flowi4_tos = RT_TOS(ip4h->tos),
		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
		.flowi4_proto = ip4h->protocol,
		.daddr = ip4h->daddr,
		.saddr = ip4h->saddr,
	};
	struct net *net = dev_net(vrf_dev);
	struct rtable *rt;

	rt = ip_route_output_flow(net, &fl4, NULL);
	if (IS_ERR(rt))
		goto err;

	skb_dst_drop(skb);

	/* if dst.dev is loopback or the VRF device again this is locally
	 * originated traffic destined to a local address. Short circuit
	 * to Rx path
	 */
	if (rt->dst.dev == vrf_dev)
		return vrf_local_xmit(skb, vrf_dev, &rt->dst);

	skb_dst_set(skb, &rt->dst);

	/* strip the ethernet header added for pass through VRF device */
	__skb_pull(skb, skb_network_offset(skb));

	if (!ip4h->saddr) {
		ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0,
					       RT_SCOPE_LINK);
	}

	ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
	if (unlikely(net_xmit_eval(ret)))
		vrf_dev->stats.tx_errors++;
	else
		ret = NET_XMIT_SUCCESS;

out:
	return ret;
err:
	vrf_tx_error(vrf_dev, skb);
	goto out;
}

static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
{
	switch (skb->protocol) {
	case htons(ETH_P_IP):
		return vrf_process_v4_outbound(skb, dev);
	case htons(ETH_P_IPV6):
		return vrf_process_v6_outbound(skb, dev);
	default:
		vrf_tx_error(dev, skb);
		return NET_XMIT_DROP;
	}
}

static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
{
	int len = skb->len;
	netdev_tx_t ret = is_ip_tx_frame(skb, dev);

	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
		struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);

		u64_stats_update_begin(&dstats->syncp);
		dstats->tx_pkts++;
		dstats->tx_bytes += len;
		u64_stats_update_end(&dstats->syncp);
	} else {
		this_cpu_inc(dev->dstats->tx_drps);
	}

	return ret;
}
Exemple #10
0
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
 */
int netvsc_recv_callback(struct net_device *net,
			 struct vmbus_channel *channel,
			 void  *data, u32 len,
			 const struct ndis_tcp_ip_checksum_info *csum_info,
			 const struct ndis_pkt_8021q_info *vlan)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct netvsc_device *net_device = net_device_ctx->nvdev;
	u16 q_idx = channel->offermsg.offer.sub_channel_index;
	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
	struct sk_buff *skb;
	struct sk_buff *vf_skb;
	struct netvsc_stats *rx_stats;
	int ret = 0;

	if (!net || net->reg_state != NETREG_REGISTERED)
		return NVSP_STAT_FAIL;

	if (READ_ONCE(net_device_ctx->vf_inject)) {
		atomic_inc(&net_device_ctx->vf_use_cnt);
		if (!READ_ONCE(net_device_ctx->vf_inject)) {
			/*
			 * We raced; just move on.
			 */
			atomic_dec(&net_device_ctx->vf_use_cnt);
			goto vf_injection_done;
		}

		/*
		 * Inject this packet into the VF inerface.
		 * On Hyper-V, multicast and brodcast packets
		 * are only delivered on the synthetic interface
		 * (after subjecting these to policy filters on
		 * the host). Deliver these via the VF interface
		 * in the guest.
		 */
		vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
					       csum_info, vlan, data, len);
		if (vf_skb != NULL) {
			++net_device_ctx->vf_netdev->stats.rx_packets;
			net_device_ctx->vf_netdev->stats.rx_bytes +=
				len;
			netif_receive_skb(vf_skb);
		} else {
			++net->stats.rx_dropped;
			ret = NVSP_STAT_FAIL;
		}
		atomic_dec(&net_device_ctx->vf_use_cnt);
		return ret;
	}

vf_injection_done:
	rx_stats = &nvchan->rx_stats;

	/* Allocate a skb - TODO direct I/O to pages? */
	skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len);
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
		return NVSP_STAT_FAIL;
	}
	skb_record_rx_queue(skb, q_idx);

	u64_stats_update_begin(&rx_stats->syncp);
	rx_stats->packets++;
	rx_stats->bytes += len;

	if (skb->pkt_type == PACKET_BROADCAST)
		++rx_stats->broadcast;
	else if (skb->pkt_type == PACKET_MULTICAST)
		++rx_stats->multicast;
	u64_stats_update_end(&rx_stats->syncp);

	net->stats.rx_packets++;
	net->stats.rx_bytes += len;

	napi_gro_receive(&nvchan->napi, skb);

	return 0;
}
Exemple #11
0
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct hv_netvsc_packet *packet = NULL;
	int ret;
	unsigned int num_data_pgs;
	struct rndis_message *rndis_msg;
	struct rndis_packet *rndis_pkt;
	u32 rndis_msg_size;
	bool isvlan;
	bool linear = false;
	struct rndis_per_packet_info *ppi;
	struct ndis_tcp_ip_checksum_info *csum_info;
	struct ndis_tcp_lso_info *lso_info;
	int  hdr_offset;
	u32 net_trans_info;
	u32 hash;
	u32 skb_length;
	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
	struct hv_page_buffer *pb = page_buf;
	struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);

	/* We will atmost need two pages to describe the rndis
	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
	 * of pages in a single packet. If skb is scattered around
	 * more pages we try linearizing it.
	 */

check_size:
	skb_length = skb->len;
	num_data_pgs = netvsc_get_slots(skb) + 2;
	if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) {
		net_alert_ratelimited("packet too big: %u pages (%u bytes)\n",
				      num_data_pgs, skb->len);
		ret = -EFAULT;
		goto drop;
	} else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
		if (skb_linearize(skb)) {
			net_alert_ratelimited("failed to linearize skb\n");
			ret = -ENOMEM;
			goto drop;
		}
		linear = true;
		goto check_size;
	}

	/*
	 * Place the rndis header in the skb head room and
	 * the skb->cb will be used for hv_netvsc_packet
	 * structure.
	 */
	ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
	if (ret) {
		netdev_err(net, "unable to alloc hv_netvsc_packet\n");
		ret = -ENOMEM;
		goto drop;
	}
	/* Use the skb control buffer for building up the packet */
	BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
			FIELD_SIZEOF(struct sk_buff, cb));
	packet = (struct hv_netvsc_packet *)skb->cb;


	packet->q_idx = skb_get_queue_mapping(skb);

	packet->total_data_buflen = skb->len;

	rndis_msg = (struct rndis_message *)skb->head;

	memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);

	isvlan = skb->vlan_tci & VLAN_TAG_PRESENT;

	/* Add the rndis header */
	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
	rndis_msg->msg_len = packet->total_data_buflen;
	rndis_pkt = &rndis_msg->msg.pkt;
	rndis_pkt->data_offset = sizeof(struct rndis_packet);
	rndis_pkt->data_len = packet->total_data_buflen;
	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);

	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);

	hash = skb_get_hash_raw(skb);
	if (hash != 0 && net->real_num_tx_queues > 1) {
		rndis_msg_size += NDIS_HASH_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
				    NBL_HASH_VALUE);
		*(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
	}

	if (isvlan) {
		struct ndis_pkt_8021q_info *vlan;

		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
					IEEE_8021Q_INFO);
		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
						ppi->ppi_offset);
		vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
		vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
				VLAN_PRIO_SHIFT;
	}

	net_trans_info = get_net_transport_info(skb, &hdr_offset);
	if (net_trans_info == TRANSPORT_INFO_NOT_IP)
		goto do_send;

	/*
	 * Setup the sendside checksum offload only if this is not a
	 * GSO packet.
	 */
	if (skb_is_gso(skb))
		goto do_lso;

	if ((skb->ip_summed == CHECKSUM_NONE) ||
	    (skb->ip_summed == CHECKSUM_UNNECESSARY))
		goto do_send;

	rndis_msg_size += NDIS_CSUM_PPI_SIZE;
	ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
			    TCPIP_CHKSUM_PKTINFO);

	csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
			ppi->ppi_offset);

	if (net_trans_info & (INFO_IPV4 << 16))
		csum_info->transmit.is_ipv4 = 1;
	else
		csum_info->transmit.is_ipv6 = 1;

	if (net_trans_info & INFO_TCP) {
		csum_info->transmit.tcp_checksum = 1;
		csum_info->transmit.tcp_header_offset = hdr_offset;
	} else if (net_trans_info & INFO_UDP) {
		/* UDP checksum offload is not supported on ws2008r2.
		 * Furthermore, on ws2012 and ws2012r2, there are some
		 * issues with udp checksum offload from Linux guests.
		 * (these are host issues).
		 * For now compute the checksum here.
		 */
		struct udphdr *uh;
		u16 udp_len;

		ret = skb_cow_head(skb, 0);
		if (ret)
			goto drop;

		uh = udp_hdr(skb);
		udp_len = ntohs(uh->len);
		uh->check = 0;
		uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr,
					      ip_hdr(skb)->daddr,
					      udp_len, IPPROTO_UDP,
					      csum_partial(uh, udp_len, 0));
		if (uh->check == 0)
			uh->check = CSUM_MANGLED_0;

		csum_info->transmit.udp_checksum = 0;
	}
	goto do_send;

do_lso:
	rndis_msg_size += NDIS_LSO_PPI_SIZE;
	ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
			    TCP_LARGESEND_PKTINFO);

	lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
			ppi->ppi_offset);

	lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
	if (net_trans_info & (INFO_IPV4 << 16)) {
		lso_info->lso_v2_transmit.ip_version =
			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
		ip_hdr(skb)->tot_len = 0;
		ip_hdr(skb)->check = 0;
		tcp_hdr(skb)->check =
		~csum_tcpudp_magic(ip_hdr(skb)->saddr,
				   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
	} else {
		lso_info->lso_v2_transmit.ip_version =
			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
		ipv6_hdr(skb)->payload_len = 0;
		tcp_hdr(skb)->check =
		~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
				&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
	}
	lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
	lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;

do_send:
	/* Start filling in the page buffers with the rndis hdr */
	rndis_msg->msg_len += rndis_msg_size;
	packet->total_data_buflen = rndis_msg->msg_len;
	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
					       skb, packet, &pb);

	/* timestamp packet in software */
	skb_tx_timestamp(skb);
	ret = netvsc_send(net_device_ctx->device_ctx, packet,
			  rndis_msg, &pb, skb);

drop:
	if (ret == 0) {
		u64_stats_update_begin(&tx_stats->syncp);
		tx_stats->packets++;
		tx_stats->bytes += skb_length;
		u64_stats_update_end(&tx_stats->syncp);
	} else {
		if (ret != -EAGAIN) {
			dev_kfree_skb_any(skb);
			net->stats.tx_dropped++;
		}
	}

	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
}
static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
	unsigned short id;
	struct netfront_info *np = netdev_priv(dev);
	struct netfront_stats *stats = this_cpu_ptr(np->stats);
	struct xen_netif_tx_request *tx;
	struct xen_netif_extra_info *extra;
	char *data = skb->data;
	RING_IDX i;
	grant_ref_t ref;
	unsigned long mfn;
	int notify;
	int frags = skb_shinfo(skb)->nr_frags;
	unsigned int offset = offset_in_page(data);
	unsigned int len = skb_headlen(skb);
	unsigned long flags;

	frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
	if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
		printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
		       frags);
		dump_stack();
		goto drop;
	}

	spin_lock_irqsave(&np->tx_lock, flags);

	if (unlikely(!netif_carrier_ok(dev) ||
		     (frags > 1 && !xennet_can_sg(dev)) ||
		     netif_needs_gso(skb, netif_skb_features(skb)))) {
		spin_unlock_irqrestore(&np->tx_lock, flags);
		goto drop;
	}

	i = np->tx.req_prod_pvt;

	id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
	np->tx_skbs[id].skb = skb;

	tx = RING_GET_REQUEST(&np->tx, i);

	tx->id   = id;
	ref = gnttab_claim_grant_reference(&np->gref_tx_head);
	BUG_ON((signed short)ref < 0);
	mfn = virt_to_mfn(data);
	gnttab_grant_foreign_access_ref(
		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
	tx->gref = np->grant_tx_ref[id] = ref;
	tx->offset = offset;
	tx->size = len;
	extra = NULL;

	tx->flags = 0;
	if (skb->ip_summed == CHECKSUM_PARTIAL)
		
		tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
		
		tx->flags |= XEN_NETTXF_data_validated;

	if (skb_shinfo(skb)->gso_size) {
		struct xen_netif_extra_info *gso;

		gso = (struct xen_netif_extra_info *)
			RING_GET_REQUEST(&np->tx, ++i);

		if (extra)
			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
		else
			tx->flags |= XEN_NETTXF_extra_info;

		gso->u.gso.size = skb_shinfo(skb)->gso_size;
		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
		gso->u.gso.pad = 0;
		gso->u.gso.features = 0;

		gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
		gso->flags = 0;
		extra = gso;
	}

	np->tx.req_prod_pvt = i + 1;

	xennet_make_frags(skb, dev, tx);
	tx->size = skb->len;

	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
	if (notify)
		notify_remote_via_irq(np->netdev->irq);

	u64_stats_update_begin(&stats->syncp);
	stats->tx_bytes += skb->len;
	stats->tx_packets++;
	u64_stats_update_end(&stats->syncp);

	
	xennet_tx_buf_gc(dev);

	if (!netfront_tx_slot_available(np))
		netif_stop_queue(dev);

	spin_unlock_irqrestore(&np->tx_lock, flags);

	return NETDEV_TX_OK;

 drop:
	dev->stats.tx_dropped++;
	dev_kfree_skb(skb);
	return NETDEV_TX_OK;
}
Exemple #13
0
/*
 *	Determine the packet's protocol ID. The rule here is that we
 *	assume 802.3 if the type field is short enough to be a length.
 *	This is normal practice and works for any 'now in use' protocol.
 *
 *  Also, at this point we assume that we ARE dealing exclusively with
 *  VLAN packets, or packets that should be made into VLAN packets based
 *  on a default VLAN ID.
 *
 *  NOTE:  Should be similar to ethernet/eth.c.
 *
 *  SANITY NOTE:  This method is called when a packet is moving up the stack
 *                towards userland.  To get here, it would have already passed
 *                through the ethernet/eth.c eth_type_trans() method.
 *  SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
 *                 stored UNALIGNED in the memory.  RISC systems don't like
 *                 such cases very much...
 *  SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
 *  		    aligned, so there doesn't need to be any of the unaligned
 *  		    stuff.  It has been commented out now...  --Ben
 *
 */
int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
		  struct packet_type *ptype, struct net_device *orig_dev)
{
	struct vlan_hdr *vhdr;
	struct vlan_rx_stats *rx_stats;
	struct net_device *vlan_dev;
	u16 vlan_id;
	u16 vlan_tci;
#if defined(CONFIG_TCSUPPORT_VLAN_TAG)
	u16 *proto = NULL;
#endif
#if defined(CONFIG_TCSUPPORT_PON_VLAN)
	int ret = 0;
#endif

	skb = skb_share_check(skb, GFP_ATOMIC);
	if (skb == NULL)
		goto err_free;

	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
		goto err_free;

	vhdr = (struct vlan_hdr *)skb->data;
	vlan_tci = ntohs(vhdr->h_vlan_TCI);
	vlan_id = vlan_tci & VLAN_VID_MASK;

#ifdef CONFIG_TCSUPPORT_PON_VLAN
#if 0
	if(orig_dev->name[0] == 'e')
		skb->pon_vlan_flag |= PON_PKT_FROM_LAN;
	else if(orig_dev->name[0] == 'n')
	{
		skb->pon_vlan_flag |= PON_PKT_FROM_WAN;
		skb->pon_vlan_flag |= PON_PKT_ROUTING_FLAG;
	}
#endif
	if(ptype->type != ETH_P_8021Q)
	{
		goto Pon_Handle;
	}
#endif

	rcu_read_lock();
	vlan_dev = __find_vlan_dev(dev, vlan_id);

	/* If the VLAN device is defined, we use it.
	 * If not, and the VID is 0, it is a 802.1p packet (not
	 * really a VLAN), so we will just netif_rx it later to the
	 * original interface, but with the skb->proto set to the
	 * wrapped proto: we do nothing here.
	 */

	if (!vlan_dev) {
#ifdef CONFIG_TCSUPPORT_PON_VLAN
Pon_Handle:
	if(pon_store_tag_hook)
	{
		ret = pon_store_tag_hook(skb, orig_dev);
		if(ret == 0)
		{
			netif_rx(skb);
			return 0;
		}
		else if(ret == -1)
		{
			kfree_skb(skb);
			return -1;
		}
		else
		{
			//HGU mode,do nothing
		}
	}
#endif
	#ifdef CONFIG_TCSUPPORT_VLAN_TAG
		if (check_vtag_hook && (check_vtag_hook() == 1)) {
			if (remove_vtag_hook) {
				 if (remove_vtag_hook(skb, orig_dev) == -1) {
				 	/* must free skb !! */
					kfree_skb(skb);
					rcu_read_unlock();
					return -1;
				 }
				 else {
				 	netif_rx(skb);
					rcu_read_unlock();
					return 0;
				 }
			}
			else {
				goto Normal_Handle;
			}
		}
		else {
Normal_Handle:
#if !defined(CONFIG_TCSUPPORT_CT) 
			if((orig_dev != NULL) && ((orig_dev->name[0] == 'b') || (orig_dev->name[0] == 'n'))) 
#endif
			{
				proto = vhdr->h_vlan_encapsulated_proto;
				skb->protocol = proto;
				/* Take off the VLAN header (4 bytes currently) */
				skb_pull_rcsum(skb, VLAN_HLEN);
				skb->dev = orig_dev;
				netif_rx(skb);
				rcu_read_unlock();
				return 0;
			}
			else {
				pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
					 __func__, vlan_id, dev->name);
				kfree_skb(skb);
				rcu_read_unlock();
				return -1;

			}
		}
	#else
		if (vlan_id) {
			pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
				 __func__, vlan_id, dev->name);
			goto err_unlock;
		}
		rx_stats = NULL;
	#endif
	} else {
		skb->dev = vlan_dev;

#if !defined(CONFIG_TCSUPPORT_CT) 
#ifdef CONFIG_PORT_BINDING
		if (skb->dev->name[0] == 'e') {
		//	skb->mark |= MASK_ORIGIN_DEV;
			skb->portbind_mark |= MASK_ORIGIN_DEV;
			memcpy(skb->orig_dev_name, skb->dev->name, IFNAMSIZ);
			//printk("vlan_skb_recv: begin orig_dev name is [%s], orig_dev name is [%s]\n", skb->orig_dev_name, orig_dev->name);
		}
#endif
#endif

		rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
					smp_processor_id());
		u64_stats_update_begin(&rx_stats->syncp);
		rx_stats->rx_packets++;
		rx_stats->rx_bytes += skb->len;

		skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);

		pr_debug("%s: priority: %u for TCI: %hu\n",
			 __func__, skb->priority, vlan_tci);

		switch (skb->pkt_type) {
		case PACKET_BROADCAST:
			/* Yeah, stats collect these together.. */
			/* stats->broadcast ++; // no such counter :-( */
			break;

		case PACKET_MULTICAST:
			rx_stats->rx_multicast++;
			break;

		case PACKET_OTHERHOST:
			/* Our lower layer thinks this is not local, let's make
			 * sure.
			 * This allows the VLAN to have a different MAC than the
			 * underlying device, and still route correctly.
			 */
			if (!compare_ether_addr(eth_hdr(skb)->h_dest,
						skb->dev->dev_addr))
				skb->pkt_type = PACKET_HOST;
			break;
		default:
			break;
		}
		u64_stats_update_end(&rx_stats->syncp);
	}

	skb_pull_rcsum(skb, VLAN_HLEN);
	vlan_set_encap_proto(skb, vhdr);

	if (vlan_dev) {
		skb = vlan_check_reorder_header(skb);
		if (!skb) {
			rx_stats->rx_errors++;
			goto err_unlock;
		}
	}

	netif_rx(skb);
	rcu_read_unlock();
	return NET_RX_SUCCESS;

err_unlock:
	rcu_read_unlock();
err_free:
	kfree_skb(skb);
	return NET_RX_DROP;
}
Exemple #14
0
static void ifb_ri_tasklet(unsigned long _txp)
{
	struct ifb_q_private *txp = (struct ifb_q_private *)_txp;
	struct netdev_queue *txq;
	struct sk_buff *skb;

	txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
	skb = skb_peek(&txp->tq);
	if (!skb) {
		if (!__netif_tx_trylock(txq))
			goto resched;
		skb_queue_splice_tail_init(&txp->rq, &txp->tq);
		__netif_tx_unlock(txq);
	}

	while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
		skb->tc_redirected = 0;
		skb->tc_skip_classify = 1;

		u64_stats_update_begin(&txp->tsync);
		txp->tx_packets++;
		txp->tx_bytes += skb->len;
		u64_stats_update_end(&txp->tsync);

		rcu_read_lock();
		skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
		if (!skb->dev) {
			rcu_read_unlock();
			dev_kfree_skb(skb);
			txp->dev->stats.tx_dropped++;
			if (skb_queue_len(&txp->tq) != 0)
				goto resched;
			break;
		}
		rcu_read_unlock();
		skb->skb_iif = txp->dev->ifindex;

		if (!skb->tc_from_ingress) {
			dev_queue_xmit(skb);
		} else {
			skb_pull_rcsum(skb, skb->mac_len);
			netif_receive_skb(skb);
		}
	}

	if (__netif_tx_trylock(txq)) {
		skb = skb_peek(&txp->rq);
		if (!skb) {
			txp->tasklet_pending = 0;
			if (netif_tx_queue_stopped(txq))
				netif_tx_wake_queue(txq);
		} else {
			__netif_tx_unlock(txq);
			goto resched;
		}
		__netif_tx_unlock(txq);
	} else {
resched:
		txp->tasklet_pending = 1;
		tasklet_schedule(&txp->ifb_tasklet);
	}

}
Exemple #15
0
int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
			  struct ixgbe_ring *rx_ring,
			  const int budget)
{
	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
	struct ixgbe_adapter *adapter = q_vector->adapter;
	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
	unsigned int xdp_res, xdp_xmit = 0;
	bool failure = false;
	struct sk_buff *skb;
	struct xdp_buff xdp;

	xdp.rxq = &rx_ring->xdp_rxq;

	while (likely(total_rx_packets < budget)) {
		union ixgbe_adv_rx_desc *rx_desc;
		struct ixgbe_rx_buffer *bi;
		unsigned int size;

		/* return some buffers to hardware, one at a time is too slow */
		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
			failure = failure ||
				  !ixgbe_alloc_rx_buffers_fast_zc(rx_ring,
								 cleaned_count);
			cleaned_count = 0;
		}

		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
		size = le16_to_cpu(rx_desc->wb.upper.length);
		if (!size)
			break;

		/* This memory barrier is needed to keep us from reading
		 * any other fields out of the rx_desc until we know the
		 * descriptor has been written back
		 */
		dma_rmb();

		bi = ixgbe_get_rx_buffer_zc(rx_ring, size);

		if (unlikely(!ixgbe_test_staterr(rx_desc,
						 IXGBE_RXD_STAT_EOP))) {
			struct ixgbe_rx_buffer *next_bi;

			ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
			ixgbe_inc_ntc(rx_ring);
			next_bi =
			       &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
			next_bi->skb = ERR_PTR(-EINVAL);
			continue;
		}

		if (unlikely(bi->skb)) {
			ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
			ixgbe_inc_ntc(rx_ring);
			continue;
		}

		xdp.data = bi->addr;
		xdp.data_meta = xdp.data;
		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
		xdp.data_end = xdp.data + size;
		xdp.handle = bi->handle;

		xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, &xdp);

		if (xdp_res) {
			if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
				xdp_xmit |= xdp_res;
				bi->addr = NULL;
				bi->skb = NULL;
			} else {
				ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
			}
			total_rx_packets++;
			total_rx_bytes += size;

			cleaned_count++;
			ixgbe_inc_ntc(rx_ring);
			continue;
		}

		/* XDP_PASS path */
		skb = ixgbe_construct_skb_zc(rx_ring, bi, &xdp);
		if (!skb) {
			rx_ring->rx_stats.alloc_rx_buff_failed++;
			break;
		}

		cleaned_count++;
		ixgbe_inc_ntc(rx_ring);

		if (eth_skb_pad(skb))
			continue;

		total_rx_bytes += skb->len;
		total_rx_packets++;

		ixgbe_process_skb_fields(rx_ring, rx_desc, skb);
		ixgbe_rx_skb(q_vector, skb);
	}

	if (xdp_xmit & IXGBE_XDP_REDIR)
		xdp_do_flush_map();

	if (xdp_xmit & IXGBE_XDP_TX) {
		struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];

		/* Force memory writes to complete before letting h/w
		 * know there are new descriptors to fetch.
		 */
		wmb();
		writel(ring->next_to_use, ring->tail);
	}

	u64_stats_update_begin(&rx_ring->syncp);
	rx_ring->stats.packets += total_rx_packets;
	rx_ring->stats.bytes += total_rx_bytes;
	u64_stats_update_end(&rx_ring->syncp);
	q_vector->rx.total_packets += total_rx_packets;
	q_vector->rx.total_bytes += total_rx_bytes;

	return failure ? budget : (int)total_rx_packets;
}
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
		  const struct tnl_ptk_info *tpi, bool log_ecn_error)
{
	struct pcpu_sw_netstats *tstats;
	const struct iphdr *iph = ip_hdr(skb);
	int err;

#ifdef CONFIG_NET_IPGRE_BROADCAST
	if (ipv4_is_multicast(iph->daddr)) {
		tunnel->dev->stats.multicast++;
		skb->pkt_type = PACKET_BROADCAST;
	}
#endif

	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
		tunnel->dev->stats.rx_crc_errors++;
		tunnel->dev->stats.rx_errors++;
		goto drop;
	}

	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
		if (!(tpi->flags&TUNNEL_SEQ) ||
		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
			tunnel->dev->stats.rx_fifo_errors++;
			tunnel->dev->stats.rx_errors++;
			goto drop;
		}
		tunnel->i_seqno = ntohl(tpi->seq) + 1;
	}

	skb_reset_network_header(skb);

	err = IP_ECN_decapsulate(iph, skb);
	if (unlikely(err)) {
		if (log_ecn_error)
			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
					&iph->saddr, iph->tos);
		if (err > 1) {
			++tunnel->dev->stats.rx_frame_errors;
			++tunnel->dev->stats.rx_errors;
			goto drop;
		}
	}

	tstats = this_cpu_ptr(tunnel->dev->tstats);
	u64_stats_update_begin(&tstats->syncp);
	tstats->rx_packets++;
	tstats->rx_bytes += skb->len;
	u64_stats_update_end(&tstats->syncp);

	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));

	if (tunnel->dev->type == ARPHRD_ETHER) {
		skb->protocol = eth_type_trans(skb, tunnel->dev);
		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
	} else {
		skb->dev = tunnel->dev;
	}

	gro_cells_receive(&tunnel->gro_cells, skb);
	return 0;

drop:
	kfree_skb(skb);
	return 0;
}
Exemple #17
0
bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
			    struct ixgbe_ring *tx_ring, int napi_budget)
{
	unsigned int total_packets = 0, total_bytes = 0;
	u32 i = tx_ring->next_to_clean, xsk_frames = 0;
	unsigned int budget = q_vector->tx.work_limit;
	struct xdp_umem *umem = tx_ring->xsk_umem;
	union ixgbe_adv_tx_desc *tx_desc;
	struct ixgbe_tx_buffer *tx_bi;
	bool xmit_done;

	tx_bi = &tx_ring->tx_buffer_info[i];
	tx_desc = IXGBE_TX_DESC(tx_ring, i);
	i -= tx_ring->count;

	do {
		if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
			break;

		total_bytes += tx_bi->bytecount;
		total_packets += tx_bi->gso_segs;

		if (tx_bi->xdpf)
			ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
		else
			xsk_frames++;

		tx_bi->xdpf = NULL;
		total_bytes += tx_bi->bytecount;

		tx_bi++;
		tx_desc++;
		i++;
		if (unlikely(!i)) {
			i -= tx_ring->count;
			tx_bi = tx_ring->tx_buffer_info;
			tx_desc = IXGBE_TX_DESC(tx_ring, 0);
		}

		/* issue prefetch for next Tx descriptor */
		prefetch(tx_desc);

		/* update budget accounting */
		budget--;
	} while (likely(budget));

	i += tx_ring->count;
	tx_ring->next_to_clean = i;

	u64_stats_update_begin(&tx_ring->syncp);
	tx_ring->stats.bytes += total_bytes;
	tx_ring->stats.packets += total_packets;
	u64_stats_update_end(&tx_ring->syncp);
	q_vector->tx.total_bytes += total_bytes;
	q_vector->tx.total_packets += total_packets;

	if (xsk_frames)
		xsk_umem_complete_tx(umem, xsk_frames);

	xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
	return budget > 0 && xmit_done;
}
Exemple #18
0
static void deth_nl_recv_msg(struct sk_buff* skb)
{
    struct nlmsghdr* nlh = (struct nlmsghdr*) skb->data;
    
    int pktsize = 0;

    const char* ptr = nlmsg_data(nlh);
    u32 index = *(u32*)ptr;

    struct deth_priv* priv = NULL;
    struct deth_net_stats* stats = NULL;
    struct sk_buff* out_skb = NULL;
    struct net* net = sock_net(skb->sk);
    struct net_device* dev = dev_get_by_index(net, index);

    ptr += sizeof(u32);

    if (dev == NULL)
    {
        printk(KERN_ERR "Error retrieving the interface with index %d.\n", index);

        return;
    }

    switch (nlh->nlmsg_type)
    {
    case PACKET:
        break;
    case LINK_CHANGE:
        {
            if (*(u32*)(ptr))
            {
                netif_carrier_on(dev);
            }
            else
            {
                netif_carrier_off(dev);
            }
    
            goto cleanup;
        }
	case LINK_STATS:
		{
			priv = netdev_priv(dev);
			stats = this_cpu_ptr(priv->stats);
			u64_stats_update_begin(&stats->syncp);

			stats->rx_packets = *(u64*)(ptr);
			ptr += sizeof(u64);
			stats->rx_bytes = *(u64*)(ptr);
			ptr += sizeof(u64);
			stats->tx_packets = *(u64*)(ptr);
			ptr += sizeof(u64);
			stats->tx_bytes = *(u64*)(ptr);

			u64_stats_update_end(&stats->syncp);

			goto cleanup;
		}
    default:
        goto cleanup;
    }

    pktsize = nlh->nlmsg_len - sizeof(u32);
    out_skb = netdev_alloc_skb(dev, pktsize + 2);

    if (out_skb == NULL)
    {
        printk(KERN_ERR "Error allocating a sk_buff (%d bytes).\n", pktsize);

        goto rx_drop;
    }

    memcpy(skb_put(out_skb, pktsize), ptr, pktsize);

    out_skb->ip_summed = CHECKSUM_UNNECESSARY;

    if (dev_forward_skb(dev, out_skb) != NET_RX_SUCCESS)
    {
        printk(KERN_ERR "dev_forward_skb failed to forward a packet of %d bytes.\n", pktsize);

        goto rx_drop;
    }

	priv = netdev_priv(dev);
    stats = this_cpu_ptr(priv->stats);
    u64_stats_update_begin(&stats->syncp);
    stats->rx_bytes += pktsize;
    stats->rx_packets++;
    u64_stats_update_end(&stats->syncp);

    goto cleanup;

rx_drop:
    u64_stats_update_begin(&stats->syncp);
    stats->tx_dropped++;
    u64_stats_update_end(&stats->syncp);

cleanup:
    dev_put(dev);
}