Beispiel #1
0
static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
				      struct net_device *netdev)
{
	struct ibmveth_adapter *adapter = netdev_priv(netdev);
	unsigned int desc_flags;
	union ibmveth_buf_desc descs[6];
	int last, i;
	int force_bounce = 0;
	dma_addr_t dma_addr;
	unsigned long mss = 0;

	/*
	 * veth handles a maximum of 6 segments including the header, so
	 * we have to linearize the skb if there are more than this.
	 */
	if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) {
		netdev->stats.tx_dropped++;
		goto out;
	}

	/* veth can't checksum offload UDP */
	if (skb->ip_summed == CHECKSUM_PARTIAL &&
	    ((skb->protocol == htons(ETH_P_IP) &&
	      ip_hdr(skb)->protocol != IPPROTO_TCP) ||
	     (skb->protocol == htons(ETH_P_IPV6) &&
	      ipv6_hdr(skb)->nexthdr != IPPROTO_TCP)) &&
	    skb_checksum_help(skb)) {

		netdev_err(netdev, "tx: failed to checksum packet\n");
		netdev->stats.tx_dropped++;
		goto out;
	}

	desc_flags = IBMVETH_BUF_VALID;

	if (skb_is_gso(skb) && adapter->fw_large_send_support)
		desc_flags |= IBMVETH_BUF_LRG_SND;

	if (skb->ip_summed == CHECKSUM_PARTIAL) {
		unsigned char *buf = skb_transport_header(skb) +
						skb->csum_offset;

		desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD);

		/* Need to zero out the checksum */
		buf[0] = 0;
		buf[1] = 0;
	}

retry_bounce:
	memset(descs, 0, sizeof(descs));

	/*
	 * If a linear packet is below the rx threshold then
	 * copy it into the static bounce buffer. This avoids the
	 * cost of a TCE insert and remove.
	 */
	if (force_bounce || (!skb_is_nonlinear(skb) &&
				(skb->len < tx_copybreak))) {
		skb_copy_from_linear_data(skb, adapter->bounce_buffer,
					  skb->len);

		descs[0].fields.flags_len = desc_flags | skb->len;
		descs[0].fields.address = adapter->bounce_buffer_dma;

		if (ibmveth_send(adapter, descs, 0)) {
			adapter->tx_send_failed++;
			netdev->stats.tx_dropped++;
		} else {
			netdev->stats.tx_packets++;
			netdev->stats.tx_bytes += skb->len;
		}

		goto out;
	}

	/* Map the header */
	dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
				  skb_headlen(skb), DMA_TO_DEVICE);
	if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
		goto map_failed;

	descs[0].fields.flags_len = desc_flags | skb_headlen(skb);
	descs[0].fields.address = dma_addr;

	/* Map the frags */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

		dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0,
					    skb_frag_size(frag), DMA_TO_DEVICE);

		if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
			goto map_failed_frags;

		descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag);
		descs[i+1].fields.address = dma_addr;
	}

	if (skb_is_gso(skb)) {
		if (adapter->fw_large_send_support) {
			mss = (unsigned long)skb_shinfo(skb)->gso_size;
			adapter->tx_large_packets++;
		} else if (!skb_is_gso_v6(skb)) {
			/* Put -1 in the IP checksum to tell phyp it
			 * is a largesend packet. Put the mss in
			 * the TCP checksum.
			 */
			ip_hdr(skb)->check = 0xffff;
			tcp_hdr(skb)->check =
				cpu_to_be16(skb_shinfo(skb)->gso_size);
			adapter->tx_large_packets++;
		}
	}

	if (ibmveth_send(adapter, descs, mss)) {
		adapter->tx_send_failed++;
		netdev->stats.tx_dropped++;
	} else {
		netdev->stats.tx_packets++;
		netdev->stats.tx_bytes += skb->len;
	}

	dma_unmap_single(&adapter->vdev->dev,
			 descs[0].fields.address,
			 descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
			 DMA_TO_DEVICE);

	for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
			       DMA_TO_DEVICE);

out:
	dev_consume_skb_any(skb);
	return NETDEV_TX_OK;

map_failed_frags:
	last = i+1;
	for (i = 0; i < last; i++)
		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
			       DMA_TO_DEVICE);

map_failed:
	if (!firmware_has_feature(FW_FEATURE_CMO))
		netdev_err(netdev, "tx: unable to map xmit buffer\n");
	adapter->tx_map_failed++;
	if (skb_linearize(skb)) {
		netdev->stats.tx_dropped++;
		goto out;
	}
	force_bounce = 1;
	goto retry_bounce;
}
static int netdev_send(struct vport *vport, struct sk_buff *skb)
{
	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
	int mtu = netdev_vport->dev->mtu;
	int len;

	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
		if (net_ratelimit())
			pr_warn("%s: dropped over-mtu packet: %d > %d\n",
				ovs_dp_name(vport->dp), packet_length(skb), mtu);
		goto error;
	}

	if (unlikely(skb_warn_if_lro(skb)))
		goto error;

	skb->dev = netdev_vport->dev;
	forward_ip_summed(skb, true);

	if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) {
		int features;

		features = netif_skb_features(skb);

		if (!vlan_tso)
			features &= ~(NETIF_F_TSO | NETIF_F_TSO6 |
				      NETIF_F_UFO | NETIF_F_FSO);

		if (netif_needs_gso(skb, features)) {
			struct sk_buff *nskb;

			nskb = skb_gso_segment(skb, features);
			if (!nskb) {
				if (unlikely(skb_cloned(skb) &&
				    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) {
					kfree_skb(skb);
					return 0;
				}

				skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY;
				goto tag;
			}

			if (IS_ERR(nskb)) {
				kfree_skb(skb);
				return 0;
			}
			consume_skb(skb);
			skb = nskb;

			len = 0;
			do {
				nskb = skb->next;
				skb->next = NULL;

				skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
				if (likely(skb)) {
					len += skb->len;
					vlan_set_tci(skb, 0);
					dev_queue_xmit(skb);
				}

				skb = nskb;
			} while (skb);

			return len;
		}

tag:
		skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
		if (unlikely(!skb))
			return 0;
		vlan_set_tci(skb, 0);
	}

	len = skb->len;
	dev_queue_xmit(skb);

	return len;

error:
	kfree_skb(skb);
	ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
	return 0;
}
Beispiel #3
0
int ip6_forward(struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);
	struct ipv6hdr *hdr = ipv6_hdr(skb);
	struct inet6_skb_parm *opt = IP6CB(skb);
	struct net *net = dev_net(dst->dev);
	u32 mtu;

	if (net->ipv6.devconf_all->forwarding == 0)
		goto error;

	if (skb_warn_if_lro(skb))
		goto drop;

	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
		goto drop;
	}

	if (skb->pkt_type != PACKET_HOST)
		goto drop;

	skb_forward_csum(skb);

	/*
	 *	We DO NOT make any processing on
	 *	RA packets, pushing them to user level AS IS
	 *	without ane WARRANTY that application will be able
	 *	to interpret them. The reason is that we
	 *	cannot make anything clever here.
	 *
	 *	We are not end-node, so that if packet contains
	 *	AH/ESP, we cannot make anything.
	 *	Defragmentation also would be mistake, RA packets
	 *	cannot be fragmented, because there is no warranty
	 *	that different fragments will go along one path. --ANK
	 */
	if (opt->ra) {
		u8 *ptr = skb_network_header(skb) + opt->ra;
		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
			return 0;
	}

	/*
	 *	check and decrement ttl
	 */
	if (hdr->hop_limit <= 1) {
		/* Force OUTPUT device used as source address */
		skb->dev = dst->dev;
		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);

		kfree_skb(skb);
		return -ETIMEDOUT;
	}

	/* XXX: idev->cnf.proxy_ndp? */
	if ((net->ipv6.devconf_all->proxy_ndp == 1 &&
	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0))
	    || net->ipv6.devconf_all->proxy_ndp >= 2) {
		int proxied = ip6_forward_proxy_check(skb);
		if (proxied > 0)
			return ip6_input(skb);
		else if (proxied < 0) {
			IP6_INC_STATS(net, ip6_dst_idev(dst),
				      IPSTATS_MIB_INDISCARDS);
			goto drop;
		}
	}

	if (!xfrm6_route_forward(skb)) {
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
		goto drop;
	}
	dst = skb_dst(skb);

	/* IPv6 specs say nothing about it, but it is clear that we cannot
	   send redirects to source routed frames.
	   We don't send redirects to frames decapsulated from IPsec.
	 */
	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
		struct in6_addr *target = NULL;
		struct rt6_info *rt;

		/*
		 *	incoming and outgoing devices are the same
		 *	send a redirect.
		 */

		rt = (struct rt6_info *) dst;
		if (rt->rt6i_flags & RTF_GATEWAY)
			target = &rt->rt6i_gateway;
		else
			target = &hdr->daddr;

		if (!rt->rt6i_peer)
			rt6_bind_peer(rt, 1);

		/* Limit redirects both by destination (here)
		   and by source (inside ndisc_send_redirect)
		 */
		if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
			ndisc_send_redirect(skb, target);
	} else {
		int addrtype = ipv6_addr_type(&hdr->saddr);

		/* This check is security critical. */
		if (addrtype == IPV6_ADDR_ANY ||
		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
			goto error;
		if (addrtype & IPV6_ADDR_LINKLOCAL) {
			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
				    ICMPV6_NOT_NEIGHBOUR, 0);
			goto error;
		}
	}

	mtu = dst_mtu(dst);
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;

	if (skb->len > mtu && !skb_is_gso(skb)) {
		/* Again, force OUTPUT device used as source address */
		skb->dev = dst->dev;
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
		kfree_skb(skb);
		return -EMSGSIZE;
	}

	if (skb_cow(skb, dst->dev->hard_header_len)) {
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
		goto drop;
	}

	hdr = ipv6_hdr(skb);

	/* Mangling hops number delayed to point after skb COW */

	hdr->hop_limit--;

	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
		       ip6_forward_finish);

error:
	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
drop:
	kfree_skb(skb);
	return -EINVAL;
}
Beispiel #4
0
static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
{
	struct mlx5_wq_cyc       *wq   = &sq->wq;

	u16 pi = sq->pc & wq->sz_m1;
	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);

	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
	struct mlx5_wqe_data_seg *dseg;

	u8  opcode = MLX5_OPCODE_SEND;
	dma_addr_t dma_addr = 0;
	u16 headlen;
	u16 ds_cnt;
	u16 ihs;
	int i;

	memset(wqe, 0, sizeof(*wqe));

	if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
		eseg->cs_flags	= MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
	else
		sq->stats.csum_offload_none++;

	if (skb_is_gso(skb)) {
		u32 payload_len;

		eseg->mss    = cpu_to_be16(skb_shinfo(skb)->gso_size);
		opcode       = MLX5_OPCODE_LSO;
		ihs          = skb_transport_offset(skb) + tcp_hdrlen(skb);
		payload_len  = skb->len - ihs;
		MLX5E_TX_SKB_CB(skb)->num_bytes = skb->len +
					(skb_shinfo(skb)->gso_segs - 1) * ihs;
		sq->stats.tso_packets++;
		sq->stats.tso_bytes += payload_len;
	} else {
		ihs = mlx5e_get_inline_hdr_size(sq, skb);
		MLX5E_TX_SKB_CB(skb)->num_bytes = max_t(unsigned int, skb->len,
							ETH_ZLEN);
	}

	skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs);
	skb_pull_inline(skb, ihs);

	eseg->inline_hdr_sz = cpu_to_be16(ihs);

	ds_cnt  = sizeof(*wqe) / MLX5_SEND_WQE_DS;
	ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start),
			       MLX5_SEND_WQE_DS);
	dseg    = (struct mlx5_wqe_data_seg *)cseg + ds_cnt;

	MLX5E_TX_SKB_CB(skb)->num_dma = 0;

	headlen = skb_headlen(skb);
	if (headlen) {
		dma_addr = dma_map_single(sq->pdev, skb->data, headlen,
					  DMA_TO_DEVICE);
		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
			goto dma_unmap_wqe_err;

		dseg->addr       = cpu_to_be64(dma_addr);
		dseg->lkey       = sq->mkey_be;
		dseg->byte_count = cpu_to_be32(headlen);

		mlx5e_dma_push(sq, dma_addr, headlen);
		MLX5E_TX_SKB_CB(skb)->num_dma++;

		dseg++;
	}

	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
		int fsz = skb_frag_size(frag);

		dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
					    DMA_TO_DEVICE);
		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
			goto dma_unmap_wqe_err;

		dseg->addr       = cpu_to_be64(dma_addr);
		dseg->lkey       = sq->mkey_be;
		dseg->byte_count = cpu_to_be32(fsz);

		mlx5e_dma_push(sq, dma_addr, fsz);
		MLX5E_TX_SKB_CB(skb)->num_dma++;

		dseg++;
	}

	ds_cnt += MLX5E_TX_SKB_CB(skb)->num_dma;

	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);

	sq->skb[pi] = skb;

	MLX5E_TX_SKB_CB(skb)->num_wqebbs = DIV_ROUND_UP(ds_cnt,
							MLX5_SEND_WQEBB_NUM_DS);
	sq->pc += MLX5E_TX_SKB_CB(skb)->num_wqebbs;

	netdev_tx_sent_queue(sq->txq, MLX5E_TX_SKB_CB(skb)->num_bytes);

	if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) {
		netif_tx_stop_queue(sq->txq);
		sq->stats.stopped++;
	}

	if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
		cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
		mlx5e_tx_notify_hw(sq, wqe);
	}

	/* fill sq edge with nops to avoid wqe wrap around */
	while ((sq->pc & wq->sz_m1) > sq->edge)
		mlx5e_send_nop(sq, false);

	sq->stats.packets++;
	return NETDEV_TX_OK;

dma_unmap_wqe_err:
	sq->stats.dropped++;
	mlx5e_dma_unmap_wqe_err(sq, skb);

	dev_kfree_skb_any(skb);

	return NETDEV_TX_OK;
}
Beispiel #5
0
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct xenvif *vif = netdev_priv(dev);
	struct xenvif_queue *queue = NULL;
	unsigned int num_queues = vif->num_queues;
	u16 index;
	int min_slots_needed;

	BUG_ON(skb->dev != dev);

	/* Drop the packet if queues are not set up */
	if (num_queues < 1)
		goto drop;

	/* Obtain the queue to be used to transmit this packet */
	index = skb_get_queue_mapping(skb);
	if (index >= num_queues) {
		pr_warn_ratelimited("Invalid queue %hu for packet on interface %s\n.",
				    index, vif->dev->name);
		index %= num_queues;
	}
	queue = &vif->queues[index];

	/* Drop the packet if queue is not ready */
	if (queue->task == NULL ||
	    queue->dealloc_task == NULL ||
	    !xenvif_schedulable(vif))
		goto drop;

	/* At best we'll need one slot for the header and one for each
	 * frag.
	 */
	min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;

	/* If the skb is GSO then we'll also need an extra slot for the
	 * metadata.
	 */
	if (skb_is_gso(skb))
		min_slots_needed++;

	/* If the skb can't possibly fit in the remaining slots
	 * then turn off the queue to give the ring a chance to
	 * drain.
	 */
	if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
		queue->wake_queue.function = xenvif_wake_queue_callback;
		queue->wake_queue.data = (unsigned long)queue;
		xenvif_stop_queue(queue);
		mod_timer(&queue->wake_queue,
			jiffies + rx_drain_timeout_jiffies);
	}

	skb_queue_tail(&queue->rx_queue, skb);
	xenvif_kick_thread(queue);

	return NETDEV_TX_OK;

 drop:
	vif->dev->stats.tx_dropped++;
	dev_kfree_skb(skb);
	return NETDEV_TX_OK;
}
Beispiel #6
0
/**
 *  stmmac_xmit:
 *  @skb : the socket buffer
 *  @dev : device pointer
 *  Description : Tx entry point of the driver.
 */
static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct stmmac_priv *priv = netdev_priv(dev);
	unsigned int txsize = priv->dma_tx_size;
	unsigned int entry;
	int i, csum_insertion = 0;
	int nfrags = skb_shinfo(skb)->nr_frags;
	struct dma_desc *desc, *first;

	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
		if (!netif_queue_stopped(dev)) {
			netif_stop_queue(dev);
			/* This is a hard error, log it. */
			pr_err("%s: BUG! Tx Ring full when queue awake\n",
				__func__);
		}
		return NETDEV_TX_BUSY;
	}

	entry = priv->cur_tx % txsize;

#ifdef STMMAC_XMIT_DEBUG
	if ((skb->len > ETH_FRAME_LEN) || nfrags)
		pr_info("stmmac xmit:\n"
		       "\tskb addr %p - len: %d - nopaged_len: %d\n"
		       "\tn_frags: %d - ip_summed: %d - %s gso\n",
		       skb, skb->len, skb_headlen(skb), nfrags, skb->ip_summed,
		       !skb_is_gso(skb) ? "isn't" : "is");
#endif

	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);

	desc = priv->dma_tx + entry;
	first = desc;

#ifdef STMMAC_XMIT_DEBUG
	if ((nfrags > 0) || (skb->len > ETH_FRAME_LEN))
		pr_debug("stmmac xmit: skb len: %d, nopaged_len: %d,\n"
		       "\t\tn_frags: %d, ip_summed: %d\n",
		       skb->len, skb_headlen(skb), nfrags, skb->ip_summed);
#endif
	priv->tx_skbuff[entry] = skb;
	if (unlikely(skb->len >= BUF_SIZE_4KiB)) {
		entry = stmmac_handle_jumbo_frames(skb, dev, csum_insertion);
		desc = priv->dma_tx + entry;
	} else {
		unsigned int nopaged_len = skb_headlen(skb);
		desc->des2 = dma_map_single(priv->device, skb->data,
					nopaged_len, DMA_TO_DEVICE);
		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len,
						csum_insertion);
	}

	for (i = 0; i < nfrags; i++) {
		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
		int len = frag->size;

		entry = (++priv->cur_tx) % txsize;
		desc = priv->dma_tx + entry;

		TX_DBG("\t[entry %d] segment len: %d\n", entry, len);
		desc->des2 = dma_map_page(priv->device, frag->page,
					  frag->page_offset,
					  len, DMA_TO_DEVICE);
		priv->tx_skbuff[entry] = NULL;
		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion);
		wmb();
		priv->hw->desc->set_tx_owner(desc);
	}

	/* Interrupt on completition only for the latest segment */
	priv->hw->desc->close_tx_desc(desc);

#ifdef CONFIG_STMMAC_TIMER
	/* Clean IC while using timer */
	if (likely(priv->tm->enable))
		priv->hw->desc->clear_tx_ic(desc);
#endif

	wmb();

	/* To avoid raise condition */
	priv->hw->desc->set_tx_owner(first);

	priv->cur_tx++;

#ifdef STMMAC_XMIT_DEBUG
	if (netif_msg_pktdata(priv)) {
		pr_info("stmmac xmit: current=%d, dirty=%d, entry=%d, "
		       "first=%p, nfrags=%d\n",
		       (priv->cur_tx % txsize), (priv->dirty_tx % txsize),
		       entry, first, nfrags);
		display_ring(priv->dma_tx, txsize);
		pr_info(">>> frame to be transmitted: ");
		print_pkt(skb->data, skb->len);
	}
#endif
	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
		TX_DBG("%s: stop transmitted packets\n", __func__);
		netif_stop_queue(dev);
	}

	dev->stats.tx_bytes += skb->len;

	skb_tx_timestamp(skb);

	priv->hw->dma->enable_dma_transmission(priv->ioaddr);

	return NETDEV_TX_OK;
}
Beispiel #7
0
/*
 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 * Note : socket lock is not held for SYNACK packets, but might be modified
 * by calls to skb_set_owner_w() and ipv6_local_error(),
 * which are using proper atomic operations or spinlocks.
 */
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
	     struct ipv6_txoptions *opt, int tclass)
{
	struct net *net = sock_net(sk);
	const struct ipv6_pinfo *np = inet6_sk(sk);
	struct in6_addr *first_hop = &fl6->daddr;
	struct dst_entry *dst = skb_dst(skb);
	struct ipv6hdr *hdr;
	u8  proto = fl6->flowi6_proto;
	int seg_len = skb->len;
	int hlimit = -1;
	u32 mtu;

	if (opt) {
		unsigned int head_room;

		/* First: exthdrs may take lots of space (~8K for now)
		   MAX_HEADER is not enough.
		 */
		head_room = opt->opt_nflen + opt->opt_flen;
		seg_len += head_room;
		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);

		if (skb_headroom(skb) < head_room) {
			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
			if (!skb2) {
				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
					      IPSTATS_MIB_OUTDISCARDS);
				kfree_skb(skb);
				return -ENOBUFS;
			}
			consume_skb(skb);
			skb = skb2;
			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
			 * it is safe to call in our context (socket lock not held)
			 */
			skb_set_owner_w(skb, (struct sock *)sk);
		}
		if (opt->opt_flen)
			ipv6_push_frag_opts(skb, opt, &proto);
		if (opt->opt_nflen)
			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
	}

	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	hdr = ipv6_hdr(skb);

	/*
	 *	Fill in the IPv6 header
	 */
	if (np)
		hlimit = np->hop_limit;
	if (hlimit < 0)
		hlimit = ip6_dst_hoplimit(dst);

	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
						     np->autoflowlabel, fl6));

	hdr->payload_len = htons(seg_len);
	hdr->nexthdr = proto;
	hdr->hop_limit = hlimit;

	hdr->saddr = fl6->saddr;
	hdr->daddr = *first_hop;

	skb->protocol = htons(ETH_P_IPV6);
	skb->priority = sk->sk_priority;
	skb->mark = sk->sk_mark;

	mtu = dst_mtu(dst);
	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
			      IPSTATS_MIB_OUT, skb->len);
		/* hooks should never assume socket lock is held.
		 * we promote our socket to non const
		 */
		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
			       net, (struct sock *)sk, skb, NULL, dst->dev,
			       dst_output);
	}

	skb->dev = dst->dev;
	/* ipv6_local_error() does not require socket lock,
	 * we promote our socket to non const
	 */
	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);

	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
	kfree_skb(skb);
	return -EMSGSIZE;
}
Beispiel #8
0
int
ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp)
{
	struct rt6_info *rt;		/* Route to the other host */
	int mtu;
	int local;

	EnterFunction(10);

	/* check if it is a connection of no-client-port */
	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
		__be16 _pt, *p;
		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
				       sizeof(_pt), &_pt);
		if (p == NULL)
			goto tx_error;
		ip_vs_conn_fill_cport(cp, *p);
		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
	}

	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
					 0, (IP_VS_RT_MODE_LOCAL |
					     IP_VS_RT_MODE_NON_LOCAL |
					     IP_VS_RT_MODE_RDR))))
		goto tx_error_icmp;
	local = __ip_vs_is_local_route6(rt);
	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
					 "ip_vs_nat_xmit_v6(): "
					 "stopping DNAT to local address");
			goto tx_error_put;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
				 "ip_vs_nat_xmit_v6(): "
				 "stopping DNAT to loopback address");
		goto tx_error_put;
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if (skb->len > mtu && !skb_is_gso(skb)) {
		if (!skb->dev) {
			struct net *net = dev_net(skb_dst(skb)->dev);

			skb->dev = net->loopback_dev;
		}
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
				 "ip_vs_nat_xmit_v6(): frag needed for");
		goto tx_error_put;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
		goto tx_error_put;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error_put;

	/* mangle the packet */
	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
		goto tx_error;
	ipv6_hdr(skb)->daddr = cp->daddr.in6;

	if (!local || !skb->dev) {
		/* drop the old route when skb is not shared */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	} else {
		/* destined to loopback, do we need to change route? */
		dst_release(&rt->dst);
	}

	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");

	/* FIXME: when application helper enlarges the packet and the length
	   is larger than the MTU of outgoing device, there will be still
	   MTU problem. */

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);

	LeaveFunction(10);
	return NF_STOLEN;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	LeaveFunction(10);
	kfree_skb(skb);
	return NF_STOLEN;
tx_error_put:
	dst_release(&rt->dst);
	goto tx_error;
}
Beispiel #9
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp)
{
	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
	struct rtable *rt;			/* Route to the other host */
	__be32 saddr;				/* Source for tunnel */
	struct net_device *tdev;		/* Device to other host */
	struct iphdr  *old_iph = ip_hdr(skb);
	u8     tos = old_iph->tos;
	__be16 df;
	struct iphdr  *iph;			/* Our new IP header */
	unsigned int max_headroom;		/* The extra header space needed */
	int    mtu;
	int ret;

	EnterFunction(10);

	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				      RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
						   IP_VS_RT_MODE_NON_LOCAL |
						   IP_VS_RT_MODE_CONNECT,
						   &saddr)))
		goto tx_error_icmp;
	if (rt->rt_flags & RTCF_LOCAL) {
		ip_rt_put(rt);
		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
	}

	tdev = rt->dst.dev;

	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
	if (mtu < 68) {
		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
		goto tx_error_put;
	}
	if (rt_is_output_route(skb_rtable(skb)))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);

	/* Copy DF, reset fragment offset and MF */
	df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;

	if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error_put;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

	if (skb_headroom(skb) < max_headroom
	    || skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb =
			skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			ip_rt_put(rt);
			kfree_skb(skb);
			IP_VS_ERR_RL("%s(): no memory\n", __func__);
			return NF_STOLEN;
		}
		consume_skb(skb);
		skb = new_skb;
		old_iph = ip_hdr(skb);
	}

	skb->transport_header = skb->network_header;

	/* fix old IP header checksum */
	ip_send_check(old_iph);

	skb_push(skb, sizeof(struct iphdr));
	skb_reset_network_header(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	/* drop old route */
	skb_dst_drop(skb);
	skb_dst_set(skb, &rt->dst);

	/*
	 *	Push down and install the IPIP header.
	 */
	iph			=	ip_hdr(skb);
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	IPPROTO_IPIP;
	iph->tos		=	tos;
	iph->daddr		=	cp->daddr.ip;
	iph->saddr		=	saddr;
	iph->ttl		=	old_iph->ttl;
	ip_select_ident(iph, &rt->dst, NULL);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	ret = IP_VS_XMIT_TUNNEL(skb, cp);
	if (ret == NF_ACCEPT)
		ip_local_out(skb);
	else if (ret == NF_DROP)
		kfree_skb(skb);

	LeaveFunction(10);

	return NF_STOLEN;

  tx_error_icmp:
	dst_link_failure(skb);
  tx_error:
	kfree_skb(skb);
	LeaveFunction(10);
	return NF_STOLEN;
tx_error_put:
	ip_rt_put(rt);
	goto tx_error;
}
Beispiel #10
0
int
ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
		struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
{
	struct rt6_info	*rt;	/* Route to the other host */
	int mtu;
	int rc;
	int local;
	int rt_mode;

	EnterFunction(10);

	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
	   forwarded directly here, because there is no need to
	   translate address/port back */
	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
		if (cp->packet_xmit)
			rc = cp->packet_xmit(skb, cp, pp);
		else
			rc = NF_ACCEPT;
		/* do not touch skb anymore */
		atomic_inc(&cp->in_pkts);
		goto out;
	}

	/*
	 * mangle and send the packet here (only for VS/NAT)
	 */

	/* LOCALNODE from FORWARD hook is not supported */
	rt_mode = (hooknum != NF_INET_FORWARD) ?
		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
					 0, rt_mode)))
		goto tx_error_icmp;

	local = __ip_vs_is_local_route6(rt);
	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG(10, "%s(): "
				  "stopping DNAT to local address %pI6\n",
				  __func__, &cp->daddr.in6);
			goto tx_error_put;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
		IP_VS_DBG(1, "%s(): "
			  "stopping DNAT to loopback %pI6\n",
			  __func__, &cp->daddr.in6);
		goto tx_error_put;
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if (skb->len > mtu && !skb_is_gso(skb)) {
		if (!skb->dev) {
			struct net *net = dev_net(skb_dst(skb)->dev);

			skb->dev = net->loopback_dev;
		}
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error_put;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, offset))
		goto tx_error_put;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error_put;

	ip_vs_nat_icmp_v6(skb, pp, cp, 0);

	if (!local || !skb->dev) {
		/* drop the old route when skb is not shared */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	} else {
		/* destined to loopback, do we need to change route? */
		dst_release(&rt->dst);
	}

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);

	rc = NF_STOLEN;
	goto out;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	dev_kfree_skb(skb);
	rc = NF_STOLEN;
out:
	LeaveFunction(10);
	return rc;
tx_error_put:
	dst_release(&rt->dst);
	goto tx_error;
}
Beispiel #11
0
/*
 *      NAT transmitter (only for outside-to-inside nat forwarding)
 *      Not used for related ICMP
 */
int
ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
	       struct ip_vs_protocol *pp)
{
	struct rtable *rt;		/* Route to the other host */
	int mtu;
	struct iphdr *iph = ip_hdr(skb);
	int local;

	EnterFunction(10);

	/* check if it is a connection of no-client-port */
	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
		__be16 _pt, *p;
		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
		if (p == NULL)
			goto tx_error;
		ip_vs_conn_fill_cport(cp, *p);
		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
	}

	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				      RT_TOS(iph->tos),
				      IP_VS_RT_MODE_LOCAL |
					IP_VS_RT_MODE_NON_LOCAL |
					IP_VS_RT_MODE_RDR, NULL)))
		goto tx_error_icmp;
	local = rt->rt_flags & RTCF_LOCAL;
	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
					 "ip_vs_nat_xmit(): "
					 "stopping DNAT to local address");
			goto tx_error_put;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && ipv4_is_loopback(cp->daddr.ip) &&
	    rt_is_input_route(skb_rtable(skb))) {
		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
				 "stopping DNAT to loopback address");
		goto tx_error_put;
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
	    !skb_is_gso(skb)) {
		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
		IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
				 "ip_vs_nat_xmit(): frag needed for");
		goto tx_error_put;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, sizeof(struct iphdr)))
		goto tx_error_put;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error_put;

	/* mangle the packet */
	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
		goto tx_error_put;
	ip_hdr(skb)->daddr = cp->daddr.ip;
	ip_send_check(ip_hdr(skb));

	if (!local) {
		/* drop old route */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	} else {
		ip_rt_put(rt);
		/*
		 * Some IPv4 replies get local address from routes,
		 * not from iph, so while we DNAT after routing
		 * we need this second input/output route.
		 */
		if (!__ip_vs_reroute_locally(skb))
			goto tx_error;
	}

	IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");

	/* FIXME: when application helper enlarges the packet and the length
	   is larger than the MTU of outgoing device, there will be still
	   MTU problem. */

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);

	LeaveFunction(10);
	return NF_STOLEN;

  tx_error_icmp:
	dst_link_failure(skb);
  tx_error:
	kfree_skb(skb);
	LeaveFunction(10);
	return NF_STOLEN;
  tx_error_put:
	ip_rt_put(rt);
	goto tx_error;
}
Beispiel #12
0
/*
 *	ICMP packet transmitter
 *	called by the ip_vs_in_icmp
 */
int
ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
{
	struct rtable	*rt;	/* Route to the other host */
	int mtu;
	int rc;
	int local;
	int rt_mode;

	EnterFunction(10);

	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
	   forwarded directly here, because there is no need to
	   translate address/port back */
	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
		if (cp->packet_xmit)
			rc = cp->packet_xmit(skb, cp, pp);
		else
			rc = NF_ACCEPT;
		/* do not touch skb anymore */
		atomic_inc(&cp->in_pkts);
		goto out;
	}

	/*
	 * mangle and send the packet here (only for VS/NAT)
	 */

	/* LOCALNODE from FORWARD hook is not supported */
	rt_mode = (hooknum != NF_INET_FORWARD) ?
		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				      RT_TOS(ip_hdr(skb)->tos),
				      rt_mode, NULL)))
		goto tx_error_icmp;
	local = rt->rt_flags & RTCF_LOCAL;

	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG(10, "%s(): "
				  "stopping DNAT to local address %pI4\n",
				  __func__, &cp->daddr.ip);
			goto tx_error_put;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && ipv4_is_loopback(cp->daddr.ip) &&
	    rt_is_input_route(skb_rtable(skb))) {
		IP_VS_DBG(1, "%s(): "
			  "stopping DNAT to loopback %pI4\n",
			  __func__, &cp->daddr.ip);
		goto tx_error_put;
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
	    !skb_is_gso(skb)) {
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error_put;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, offset))
		goto tx_error_put;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error_put;

	ip_vs_nat_icmp(skb, pp, cp, 0);

	if (!local) {
		/* drop the old route when skb is not shared */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	} else {
		ip_rt_put(rt);
		/*
		 * Some IPv4 replies get local address from routes,
		 * not from iph, so while we DNAT after routing
		 * we need this second input/output route.
		 */
		if (!__ip_vs_reroute_locally(skb))
			goto tx_error;
	}

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);

	rc = NF_STOLEN;
	goto out;

  tx_error_icmp:
	dst_link_failure(skb);
  tx_error:
	dev_kfree_skb(skb);
	rc = NF_STOLEN;
  out:
	LeaveFunction(10);
	return rc;
  tx_error_put:
	ip_rt_put(rt);
	goto tx_error;
}
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
	int offset, int len, int odd, struct sk_buff *skb),
	void *from, int length, int transhdrlen,
	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
	struct rt6_info *rt, unsigned int flags, int dontfrag)
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct inet_cork *cork;
	struct sk_buff *skb, *skb_prev = NULL;
	unsigned int maxfraglen, fragheaderlen, mtu;
	int exthdrlen;
	int dst_exthdrlen;
	int hh_len;
	int copy;
	int err;
	int offset = 0;
	__u8 tx_flags = 0;

	if (flags&MSG_PROBE)
		return 0;
	cork = &inet->cork.base;
	if (skb_queue_empty(&sk->sk_write_queue)) {
		/*
		 * setup for corking
		 */
		if (opt) {
			if (WARN_ON(np->cork.opt))
				return -EINVAL;

			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
			if (unlikely(np->cork.opt == NULL))
				return -ENOBUFS;

			np->cork.opt->tot_len = opt->tot_len;
			np->cork.opt->opt_flen = opt->opt_flen;
			np->cork.opt->opt_nflen = opt->opt_nflen;

			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
							    sk->sk_allocation);
			if (opt->dst0opt && !np->cork.opt->dst0opt)
				return -ENOBUFS;

			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
							    sk->sk_allocation);
			if (opt->dst1opt && !np->cork.opt->dst1opt)
				return -ENOBUFS;

			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
							   sk->sk_allocation);
			if (opt->hopopt && !np->cork.opt->hopopt)
				return -ENOBUFS;

			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
							    sk->sk_allocation);
			if (opt->srcrt && !np->cork.opt->srcrt)
				return -ENOBUFS;

			/* need source address above miyazawa*/
		}
		dst_hold(&rt->dst);
		cork->dst = &rt->dst;
		inet->cork.fl.u.ip6 = *fl6;
		np->cork.hop_limit = hlimit;
		np->cork.tclass = tclass;
		if (rt->dst.flags & DST_XFRM_TUNNEL)
			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
		else
			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
		if (np->frag_size < mtu) {
			if (np->frag_size)
				mtu = np->frag_size;
		}
		cork->fragsize = mtu;
		if (dst_allfrag(rt->dst.path))
			cork->flags |= IPCORK_ALLFRAG;
		cork->length = 0;
		exthdrlen = (opt ? opt->opt_flen : 0);
		length += exthdrlen;
		transhdrlen += exthdrlen;
		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
	} else {
		rt = (struct rt6_info *)cork->dst;
		fl6 = &inet->cork.fl.u.ip6;
		opt = np->cork.opt;
		transhdrlen = 0;
		exthdrlen = 0;
		dst_exthdrlen = 0;
		mtu = cork->fragsize;
	}

	hh_len = LL_RESERVED_SPACE(rt->dst.dev);

	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
			(opt ? opt->opt_nflen : 0);
	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);

	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
		if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
			ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
			return -EMSGSIZE;
		}
	}

	/* For UDP, check if TX timestamp is enabled */
	if (sk->sk_type == SOCK_DGRAM)
		sock_tx_timestamp(sk, &tx_flags);

	/*
	 * Let's try using as much space as possible.
	 * Use MTU if total length of the message fits into the MTU.
	 * Otherwise, we need to reserve fragment header and
	 * fragment alignment (= 8-15 octects, in total).
	 *
	 * Note that we may need to "move" the data from the tail of
	 * of the buffer to the new fragment when we split
	 * the message.
	 *
	 * FIXME: It may be fragmented into multiple chunks
	 *        at once if non-fragmentable extension headers
	 *        are too large.
	 * --yoshfuji
	 */

	if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
					   sk->sk_protocol == IPPROTO_RAW)) {
		ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
		return -EMSGSIZE;
	}

	skb = skb_peek_tail(&sk->sk_write_queue);
	cork->length += length;
	if (((length > mtu) ||
	     (skb && skb_is_gso(skb))) &&
	    (sk->sk_protocol == IPPROTO_UDP) &&
	    (rt->dst.dev->features & NETIF_F_UFO)) {
		err = ip6_ufo_append_data(sk, getfrag, from, length,
					  hh_len, fragheaderlen,
					  transhdrlen, mtu, flags, rt);
		if (err)
			goto error;
		return 0;
	}

	if (!skb)
		goto alloc_new_skb;

	while (length > 0) {
		/* Check if the remaining data fits into current packet. */
		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
		if (copy < length)
			copy = maxfraglen - skb->len;

		if (copy <= 0) {
			char *data;
			unsigned int datalen;
			unsigned int fraglen;
			unsigned int fraggap;
			unsigned int alloclen;
alloc_new_skb:
			/* There's no room in the current skb */
			if (skb)
				fraggap = skb->len - maxfraglen;
			else
				fraggap = 0;
			/* update mtu and maxfraglen if necessary */
			if (skb == NULL || skb_prev == NULL)
				ip6_append_data_mtu(&mtu, &maxfraglen,
						    fragheaderlen, skb, rt,
						    np->pmtudisc ==
						    IPV6_PMTUDISC_PROBE);

			skb_prev = skb;

			/*
			 * If remaining data exceeds the mtu,
			 * we know we need more fragment(s).
			 */
			datalen = length + fraggap;

			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
			if ((flags & MSG_MORE) &&
			    !(rt->dst.dev->features&NETIF_F_SG))
				alloclen = mtu;
			else
				alloclen = datalen + fragheaderlen;

			alloclen += dst_exthdrlen;

			if (datalen != length + fraggap) {
				/*
				 * this is not the last fragment, the trailer
				 * space is regarded as data space.
				 */
				datalen += rt->dst.trailer_len;
			}

			alloclen += rt->dst.trailer_len;
			fraglen = datalen + fragheaderlen;

			/*
			 * We just reserve space for fragment header.
			 * Note: this may be overallocation if the message
			 * (without MSG_MORE) fits into the MTU.
			 */
			alloclen += sizeof(struct frag_hdr);

			if (transhdrlen) {
				skb = sock_alloc_send_skb(sk,
						alloclen + hh_len,
						(flags & MSG_DONTWAIT), &err);
			} else {
				skb = NULL;
				if (atomic_read(&sk->sk_wmem_alloc) <=
				    2 * sk->sk_sndbuf)
					skb = sock_wmalloc(sk,
							   alloclen + hh_len, 1,
							   sk->sk_allocation);
				if (unlikely(skb == NULL))
					err = -ENOBUFS;
				else {
					/* Only the initial fragment
					 * is time stamped.
					 */
					tx_flags = 0;
				}
			}
			if (skb == NULL)
				goto error;
			/*
			 *	Fill in the control structures
			 */
			skb->protocol = htons(ETH_P_IPV6);
			skb->ip_summed = CHECKSUM_NONE;
			skb->csum = 0;
			/* reserve for fragmentation and ipsec header */
			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
				    dst_exthdrlen);

			if (sk->sk_type == SOCK_DGRAM)
				skb_shinfo(skb)->tx_flags = tx_flags;

			/*
			 *	Find where to start putting bytes
			 */
			data = skb_put(skb, fraglen);
			skb_set_network_header(skb, exthdrlen);
			data += fragheaderlen;
			skb->transport_header = (skb->network_header +
						 fragheaderlen);
			if (fraggap) {
				skb->csum = skb_copy_and_csum_bits(
					skb_prev, maxfraglen,
					data + transhdrlen, fraggap, 0);
				skb_prev->csum = csum_sub(skb_prev->csum,
							  skb->csum);
				data += fraggap;
				pskb_trim_unique(skb_prev, maxfraglen);
			}
			copy = datalen - transhdrlen - fraggap;

			if (copy < 0) {
				err = -EINVAL;
				kfree_skb(skb);
				goto error;
			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
				err = -EFAULT;
				kfree_skb(skb);
				goto error;
			}

			offset += copy;
			length -= datalen - fraggap;
			transhdrlen = 0;
			exthdrlen = 0;
			dst_exthdrlen = 0;

			/*
			 * Put the packet on the pending queue
			 */
			__skb_queue_tail(&sk->sk_write_queue, skb);
			continue;
		}

		if (copy > length)
			copy = length;

		if (!(rt->dst.dev->features&NETIF_F_SG)) {
			unsigned int off;

			off = skb->len;
			if (getfrag(from, skb_put(skb, copy),
						offset, copy, off, skb) < 0) {
				__skb_trim(skb, off);
				err = -EFAULT;
				goto error;
			}
		} else {
			int i = skb_shinfo(skb)->nr_frags;
			struct page_frag *pfrag = sk_page_frag(sk);

			err = -ENOMEM;
			if (!sk_page_frag_refill(sk, pfrag))
				goto error;

			if (!skb_can_coalesce(skb, i, pfrag->page,
					      pfrag->offset)) {
				err = -EMSGSIZE;
				if (i == MAX_SKB_FRAGS)
					goto error;

				__skb_fill_page_desc(skb, i, pfrag->page,
						     pfrag->offset, 0);
				skb_shinfo(skb)->nr_frags = ++i;
				get_page(pfrag->page);
			}
			copy = min_t(int, copy, pfrag->size - pfrag->offset);
			if (getfrag(from,
				    page_address(pfrag->page) + pfrag->offset,
				    offset, copy, skb->len, skb) < 0)
				goto error_efault;

			pfrag->offset += copy;
			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
			skb->len += copy;
			skb->data_len += copy;
			skb->truesize += copy;
			atomic_add(copy, &sk->sk_wmem_alloc);
		}
		offset += copy;
		length -= copy;
	}

	return 0;

error_efault:
	err = -EFAULT;
error:
	cork->length -= length;
	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
	return err;
}
Beispiel #14
0
/**
 * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
 * @tx_ring: outgoing context
 * @first: current data packet
 * @itd: ipsec Tx data for later use in building context descriptor
 **/
int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
		   struct ixgbe_tx_buffer *first,
		   struct ixgbe_ipsec_tx_data *itd)
{
	struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
	struct ixgbe_ipsec *ipsec = adapter->ipsec;
	struct xfrm_state *xs;
	struct tx_sa *tsa;

	if (unlikely(!first->skb->sp->len)) {
		netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
			   __func__, first->skb->sp->len);
		return 0;
	}

	xs = xfrm_input_state(first->skb);
	if (unlikely(!xs)) {
		netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
			   __func__, xs);
		return 0;
	}

	itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
	if (unlikely(itd->sa_idx > IXGBE_IPSEC_MAX_SA_COUNT)) {
		netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
			   __func__, itd->sa_idx, xs->xso.offload_handle);
		return 0;
	}

	tsa = &ipsec->tx_tbl[itd->sa_idx];
	if (unlikely(!tsa->used)) {
		netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
			   __func__, itd->sa_idx);
		return 0;
	}

	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;

	if (xs->id.proto == IPPROTO_ESP) {

		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
		if (first->protocol == htons(ETH_P_IP))
			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;

		/* The actual trailer length is authlen (16 bytes) plus
		 * 2 bytes for the proto and the padlen values, plus
		 * padlen bytes of padding.  This ends up not the same
		 * as the static value found in xs->props.trailer_len (21).
		 *
		 * ... but if we're doing GSO, don't bother as the stack
		 * doesn't add a trailer for those.
		 */
		if (!skb_is_gso(first->skb)) {
			/* The "correct" way to get the auth length would be
			 * to use
			 *    authlen = crypto_aead_authsize(xs->data);
			 * but since we know we only have one size to worry
			 * about * we can let the compiler use the constant
			 * and save us a few CPU cycles.
			 */
			const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
			struct sk_buff *skb = first->skb;
			u8 padlen;
			int ret;

			ret = skb_copy_bits(skb, skb->len - (authlen + 2),
					    &padlen, 1);
			if (unlikely(ret))
				return 0;
			itd->trailer_len = authlen + 2 + padlen;
		}
	}
	if (tsa->encrypt)
		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;

	return 1;
}
Beispiel #15
0
int ip_forward(struct sk_buff *skb)
{
	struct iphdr *iph;	/* Our header */
	struct rtable *rt;	/* Route we use */
	struct ip_options * opt	= &(IPCB(skb)->opt);

	if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
		goto drop;

	if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
		return NET_RX_SUCCESS;

	if (skb->pkt_type != PACKET_HOST)
		goto drop;

	skb_forward_csum(skb);

	/*
	 *	According to the RFC, we must first decrease the TTL field. If
	 *	that reaches zero, we must reply an ICMP control message telling
	 *	that the packet's lifetime expired.
	 */
	if (ip_hdr(skb)->ttl <= 1)
		goto too_many_hops;

	if (!xfrm4_route_forward(skb))
		goto drop;

	rt = (struct rtable*)skb->dst;

	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
		goto sr_failed;

	if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
	             (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
			  htonl(dst_mtu(&rt->u.dst)));
		goto drop;
	}

	/* We are about to mangle packet. Copy it! */
	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
		goto drop;
	iph = ip_hdr(skb);

	/* Decrease ttl after skb cow done */
	ip_decrease_ttl(iph);

	/*
	 *	We now generate an ICMP HOST REDIRECT giving the route
	 *	we calculated.
	 */
	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp)
		ip_rt_send_redirect(skb);

	skb->priority = rt_tos2priority(iph->tos);

	return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev,
		       ip_forward_finish);

sr_failed:
	/*
	 *	Strict routing permits no gatewaying
	 */
	 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0);
	 goto drop;

too_many_hops:
	/* Tell the sender its packet died... */
	IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
	icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
	kfree_skb(skb);
	return NET_RX_DROP;
}
Beispiel #16
0
int
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
		     struct ip_vs_protocol *pp)
{
	struct rt6_info *rt;		/* Route to the other host */
	struct in6_addr saddr;		/* Source for tunnel */
	struct net_device *tdev;	/* Device to other host */
	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
	struct ipv6hdr  *iph;		/* Our new IP header */
	unsigned int max_headroom;	/* The extra header space needed */
	int    mtu;
	int ret;

	EnterFunction(10);

	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
					 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
						     IP_VS_RT_MODE_NON_LOCAL))))
		goto tx_error_icmp;
	if (__ip_vs_is_local_route6(rt)) {
		dst_release(&rt->dst);
		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
	}

	tdev = rt->dst.dev;

	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
	if (mtu < IPV6_MIN_MTU) {
		IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
			     IPV6_MIN_MTU);
		goto tx_error_put;
	}
	if (skb_dst(skb))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);

	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
	    !skb_is_gso(skb)) {
		if (!skb->dev) {
			struct net *net = dev_net(skb_dst(skb)->dev);

			skb->dev = net->loopback_dev;
		}
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error_put;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);

	if (skb_headroom(skb) < max_headroom
	    || skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb =
			skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			dst_release(&rt->dst);
			kfree_skb(skb);
			IP_VS_ERR_RL("%s(): no memory\n", __func__);
			return NF_STOLEN;
		}
		consume_skb(skb);
		skb = new_skb;
		old_iph = ipv6_hdr(skb);
	}

	skb->transport_header = skb->network_header;

	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	/* drop old route */
	skb_dst_drop(skb);
	skb_dst_set(skb, &rt->dst);

	/*
	 *	Push down and install the IPIP header.
	 */
	iph			=	ipv6_hdr(skb);
	iph->version		=	6;
	iph->nexthdr		=	IPPROTO_IPV6;
	iph->payload_len	=	old_iph->payload_len;
	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
	iph->priority		=	old_iph->priority;
	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
	iph->daddr = cp->daddr.in6;
	iph->saddr = saddr;
	iph->hop_limit		=	old_iph->hop_limit;

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	ret = IP_VS_XMIT_TUNNEL(skb, cp);
	if (ret == NF_ACCEPT)
		ip6_local_out(skb);
	else if (ret == NF_DROP)
		kfree_skb(skb);

	LeaveFunction(10);

	return NF_STOLEN;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	kfree_skb(skb);
	LeaveFunction(10);
	return NF_STOLEN;
tx_error_put:
	dst_release(&rt->dst);
	goto tx_error;
}
Beispiel #17
0
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct hv_netvsc_packet *packet = NULL;
	int ret;
	unsigned int num_data_pgs;
	struct rndis_message *rndis_msg;
	struct rndis_packet *rndis_pkt;
	u32 rndis_msg_size;
	bool isvlan;
	bool linear = false;
	struct rndis_per_packet_info *ppi;
	struct ndis_tcp_ip_checksum_info *csum_info;
	struct ndis_tcp_lso_info *lso_info;
	int  hdr_offset;
	u32 net_trans_info;
	u32 hash;
	u32 skb_length;
	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
	struct hv_page_buffer *pb = page_buf;
	struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);

	/* We will atmost need two pages to describe the rndis
	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
	 * of pages in a single packet. If skb is scattered around
	 * more pages we try linearizing it.
	 */

check_size:
	skb_length = skb->len;
	num_data_pgs = netvsc_get_slots(skb) + 2;
	if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) {
		net_alert_ratelimited("packet too big: %u pages (%u bytes)\n",
				      num_data_pgs, skb->len);
		ret = -EFAULT;
		goto drop;
	} else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
		if (skb_linearize(skb)) {
			net_alert_ratelimited("failed to linearize skb\n");
			ret = -ENOMEM;
			goto drop;
		}
		linear = true;
		goto check_size;
	}

	/*
	 * Place the rndis header in the skb head room and
	 * the skb->cb will be used for hv_netvsc_packet
	 * structure.
	 */
	ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
	if (ret) {
		netdev_err(net, "unable to alloc hv_netvsc_packet\n");
		ret = -ENOMEM;
		goto drop;
	}
	/* Use the skb control buffer for building up the packet */
	BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
			FIELD_SIZEOF(struct sk_buff, cb));
	packet = (struct hv_netvsc_packet *)skb->cb;


	packet->q_idx = skb_get_queue_mapping(skb);

	packet->total_data_buflen = skb->len;

	rndis_msg = (struct rndis_message *)skb->head;

	memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);

	isvlan = skb->vlan_tci & VLAN_TAG_PRESENT;

	/* Add the rndis header */
	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
	rndis_msg->msg_len = packet->total_data_buflen;
	rndis_pkt = &rndis_msg->msg.pkt;
	rndis_pkt->data_offset = sizeof(struct rndis_packet);
	rndis_pkt->data_len = packet->total_data_buflen;
	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);

	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);

	hash = skb_get_hash_raw(skb);
	if (hash != 0 && net->real_num_tx_queues > 1) {
		rndis_msg_size += NDIS_HASH_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
				    NBL_HASH_VALUE);
		*(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
	}

	if (isvlan) {
		struct ndis_pkt_8021q_info *vlan;

		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
					IEEE_8021Q_INFO);
		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
						ppi->ppi_offset);
		vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
		vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
				VLAN_PRIO_SHIFT;
	}

	net_trans_info = get_net_transport_info(skb, &hdr_offset);
	if (net_trans_info == TRANSPORT_INFO_NOT_IP)
		goto do_send;

	/*
	 * Setup the sendside checksum offload only if this is not a
	 * GSO packet.
	 */
	if (skb_is_gso(skb))
		goto do_lso;

	if ((skb->ip_summed == CHECKSUM_NONE) ||
	    (skb->ip_summed == CHECKSUM_UNNECESSARY))
		goto do_send;

	rndis_msg_size += NDIS_CSUM_PPI_SIZE;
	ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
			    TCPIP_CHKSUM_PKTINFO);

	csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
			ppi->ppi_offset);

	if (net_trans_info & (INFO_IPV4 << 16))
		csum_info->transmit.is_ipv4 = 1;
	else
		csum_info->transmit.is_ipv6 = 1;

	if (net_trans_info & INFO_TCP) {
		csum_info->transmit.tcp_checksum = 1;
		csum_info->transmit.tcp_header_offset = hdr_offset;
	} else if (net_trans_info & INFO_UDP) {
		/* UDP checksum offload is not supported on ws2008r2.
		 * Furthermore, on ws2012 and ws2012r2, there are some
		 * issues with udp checksum offload from Linux guests.
		 * (these are host issues).
		 * For now compute the checksum here.
		 */
		struct udphdr *uh;
		u16 udp_len;

		ret = skb_cow_head(skb, 0);
		if (ret)
			goto drop;

		uh = udp_hdr(skb);
		udp_len = ntohs(uh->len);
		uh->check = 0;
		uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr,
					      ip_hdr(skb)->daddr,
					      udp_len, IPPROTO_UDP,
					      csum_partial(uh, udp_len, 0));
		if (uh->check == 0)
			uh->check = CSUM_MANGLED_0;

		csum_info->transmit.udp_checksum = 0;
	}
	goto do_send;

do_lso:
	rndis_msg_size += NDIS_LSO_PPI_SIZE;
	ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
			    TCP_LARGESEND_PKTINFO);

	lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
			ppi->ppi_offset);

	lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
	if (net_trans_info & (INFO_IPV4 << 16)) {
		lso_info->lso_v2_transmit.ip_version =
			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
		ip_hdr(skb)->tot_len = 0;
		ip_hdr(skb)->check = 0;
		tcp_hdr(skb)->check =
		~csum_tcpudp_magic(ip_hdr(skb)->saddr,
				   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
	} else {
		lso_info->lso_v2_transmit.ip_version =
			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
		ipv6_hdr(skb)->payload_len = 0;
		tcp_hdr(skb)->check =
		~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
				&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
	}
	lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
	lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;

do_send:
	/* Start filling in the page buffers with the rndis hdr */
	rndis_msg->msg_len += rndis_msg_size;
	packet->total_data_buflen = rndis_msg->msg_len;
	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
					       skb, packet, &pb);

	/* timestamp packet in software */
	skb_tx_timestamp(skb);
	ret = netvsc_send(net_device_ctx->device_ctx, packet,
			  rndis_msg, &pb, skb);

drop:
	if (ret == 0) {
		u64_stats_update_begin(&tx_stats->syncp);
		tx_stats->packets++;
		tx_stats->bytes += skb_length;
		u64_stats_update_end(&tx_stats->syncp);
	} else {
		if (ret != -EAGAIN) {
			dev_kfree_skb_any(skb);
			net->stats.tx_dropped++;
		}
	}

	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
}
Beispiel #18
0
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct hv_netvsc_packet *packet;
	int ret;
	unsigned int num_data_pgs;
	struct rndis_message *rndis_msg;
	struct rndis_packet *rndis_pkt;
	u32 rndis_msg_size;
	bool isvlan;
	struct rndis_per_packet_info *ppi;
	struct ndis_tcp_ip_checksum_info *csum_info;
	struct ndis_tcp_lso_info *lso_info;
	int  hdr_offset;
	u32 net_trans_info;


	/* We will atmost need two pages to describe the rndis
	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
	 * of pages in a single packet.
	 */
	num_data_pgs = netvsc_get_slots(skb) + 2;
	if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
		netdev_err(net, "Packet too big: %u\n", skb->len);
		dev_kfree_skb(skb);
		net->stats.tx_dropped++;
		return NETDEV_TX_OK;
	}

	/* Allocate a netvsc packet based on # of frags. */
	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
			 (num_data_pgs * sizeof(struct hv_page_buffer)) +
			 sizeof(struct rndis_message) +
			 NDIS_VLAN_PPI_SIZE +
			 NDIS_CSUM_PPI_SIZE +
			 NDIS_LSO_PPI_SIZE, GFP_ATOMIC);
	if (!packet) {
		/* out of memory, drop packet */
		netdev_err(net, "unable to allocate hv_netvsc_packet\n");

		dev_kfree_skb(skb);
		net->stats.tx_dropped++;
		return NETDEV_TX_OK;
	}

	packet->vlan_tci = skb->vlan_tci;

	packet->is_data_pkt = true;
	packet->total_data_buflen = skb->len;

	packet->rndis_msg = (struct rndis_message *)((unsigned long)packet +
				sizeof(struct hv_netvsc_packet) +
				(num_data_pgs * sizeof(struct hv_page_buffer)));

	/* Set the completion routine */
	packet->completion.send.send_completion = netvsc_xmit_completion;
	packet->completion.send.send_completion_ctx = packet;
	packet->completion.send.send_completion_tid = (unsigned long)skb;

	isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;

	/* Add the rndis header */
	rndis_msg = packet->rndis_msg;
	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
	rndis_msg->msg_len = packet->total_data_buflen;
	rndis_pkt = &rndis_msg->msg.pkt;
	rndis_pkt->data_offset = sizeof(struct rndis_packet);
	rndis_pkt->data_len = packet->total_data_buflen;
	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);

	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);

	if (isvlan) {
		struct ndis_pkt_8021q_info *vlan;

		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
					IEEE_8021Q_INFO);
		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
						ppi->ppi_offset);
		vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK;
		vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >>
				VLAN_PRIO_SHIFT;
	}

	net_trans_info = get_net_transport_info(skb, &hdr_offset);
	if (net_trans_info == TRANSPORT_INFO_NOT_IP)
		goto do_send;

	/*
	 * Setup the sendside checksum offload only if this is not a
	 * GSO packet.
	 */
	if (skb_is_gso(skb))
		goto do_lso;

	if ((skb->ip_summed == CHECKSUM_NONE) ||
	    (skb->ip_summed == CHECKSUM_UNNECESSARY))
		goto do_send;

	rndis_msg_size += NDIS_CSUM_PPI_SIZE;
	ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
			    TCPIP_CHKSUM_PKTINFO);

	csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
			ppi->ppi_offset);

	if (net_trans_info & (INFO_IPV4 << 16))
		csum_info->transmit.is_ipv4 = 1;
	else
		csum_info->transmit.is_ipv6 = 1;

	if (net_trans_info & INFO_TCP) {
		csum_info->transmit.tcp_checksum = 1;
		csum_info->transmit.tcp_header_offset = hdr_offset;
	} else if (net_trans_info & INFO_UDP) {
		/* UDP checksum offload is not supported on ws2008r2.
		 * Furthermore, on ws2012 and ws2012r2, there are some
		 * issues with udp checksum offload from Linux guests.
		 * (these are host issues).
		 * For now compute the checksum here.
		 */
		struct udphdr *uh;
		u16 udp_len;

		ret = skb_cow_head(skb, 0);
		if (ret)
			goto drop;

		uh = udp_hdr(skb);
		udp_len = ntohs(uh->len);
		uh->check = 0;
		uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr,
					      ip_hdr(skb)->daddr,
					      udp_len, IPPROTO_UDP,
					      csum_partial(uh, udp_len, 0));
		if (uh->check == 0)
			uh->check = CSUM_MANGLED_0;

		csum_info->transmit.udp_checksum = 0;
	}
	goto do_send;

do_lso:
	rndis_msg_size += NDIS_LSO_PPI_SIZE;
	ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
			    TCP_LARGESEND_PKTINFO);

	lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
			ppi->ppi_offset);

	lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
	if (net_trans_info & (INFO_IPV4 << 16)) {
		lso_info->lso_v2_transmit.ip_version =
			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
		ip_hdr(skb)->tot_len = 0;
		ip_hdr(skb)->check = 0;
		tcp_hdr(skb)->check =
		~csum_tcpudp_magic(ip_hdr(skb)->saddr,
				   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
	} else {
		lso_info->lso_v2_transmit.ip_version =
			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
		ipv6_hdr(skb)->payload_len = 0;
		tcp_hdr(skb)->check =
		~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
				&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
	}
	lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
	lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;

do_send:
	/* Start filling in the page buffers with the rndis hdr */
	rndis_msg->msg_len += rndis_msg_size;
	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
					skb, &packet->page_buf[0]);

	ret = netvsc_send(net_device_ctx->device_ctx, packet);

drop:
	if (ret == 0) {
		net->stats.tx_bytes += skb->len;
		net->stats.tx_packets++;
	} else {
		kfree(packet);
		if (ret != -EAGAIN) {
			dev_kfree_skb_any(skb);
			net->stats.tx_dropped++;
		}
	}

	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
}
Beispiel #19
0
static int __ip6_append_data(struct sock *sk,
			     struct flowi6 *fl6,
			     struct sk_buff_head *queue,
			     struct inet_cork *cork,
			     struct inet6_cork *v6_cork,
			     struct page_frag *pfrag,
			     int getfrag(void *from, char *to, int offset,
					 int len, int odd, struct sk_buff *skb),
			     void *from, int length, int transhdrlen,
			     unsigned int flags, int dontfrag)
{
	struct sk_buff *skb, *skb_prev = NULL;
	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
	int exthdrlen = 0;
	int dst_exthdrlen = 0;
	int hh_len;
	int copy;
	int err;
	int offset = 0;
	__u8 tx_flags = 0;
	u32 tskey = 0;
	struct rt6_info *rt = (struct rt6_info *)cork->dst;
	struct ipv6_txoptions *opt = v6_cork->opt;
	int csummode = CHECKSUM_NONE;
	unsigned int maxnonfragsize, headersize;

	skb = skb_peek_tail(queue);
	if (!skb) {
		exthdrlen = opt ? opt->opt_flen : 0;
		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
	}

	mtu = cork->fragsize;
	orig_mtu = mtu;

	hh_len = LL_RESERVED_SPACE(rt->dst.dev);

	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
			(opt ? opt->opt_nflen : 0);
	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
		     sizeof(struct frag_hdr);

	headersize = sizeof(struct ipv6hdr) +
		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
		     (dst_allfrag(&rt->dst) ?
		      sizeof(struct frag_hdr) : 0) +
		     rt->rt6i_nfheader_len;

	if (cork->length + length > mtu - headersize && dontfrag &&
	    (sk->sk_protocol == IPPROTO_UDP ||
	     sk->sk_protocol == IPPROTO_RAW)) {
		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
				sizeof(struct ipv6hdr));
		goto emsgsize;
	}

	if (ip6_sk_ignore_df(sk))
		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
	else
		maxnonfragsize = mtu;

	if (cork->length + length > maxnonfragsize - headersize) {
emsgsize:
		ipv6_local_error(sk, EMSGSIZE, fl6,
				 mtu - headersize +
				 sizeof(struct ipv6hdr));
		return -EMSGSIZE;
	}

	/* CHECKSUM_PARTIAL only with no extension headers and when
	 * we are not going to fragment
	 */
	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
	    headersize == sizeof(struct ipv6hdr) &&
	    length < mtu - headersize &&
	    !(flags & MSG_MORE) &&
	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
		csummode = CHECKSUM_PARTIAL;

	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
		sock_tx_timestamp(sk, &tx_flags);
		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
			tskey = sk->sk_tskey++;
	}

	/*
	 * Let's try using as much space as possible.
	 * Use MTU if total length of the message fits into the MTU.
	 * Otherwise, we need to reserve fragment header and
	 * fragment alignment (= 8-15 octects, in total).
	 *
	 * Note that we may need to "move" the data from the tail of
	 * of the buffer to the new fragment when we split
	 * the message.
	 *
	 * FIXME: It may be fragmented into multiple chunks
	 *        at once if non-fragmentable extension headers
	 *        are too large.
	 * --yoshfuji
	 */

	cork->length += length;
	if (((length > mtu) ||
	     (skb && skb_is_gso(skb))) &&
	    (sk->sk_protocol == IPPROTO_UDP) &&
	    (rt->dst.dev->features & NETIF_F_UFO) &&
	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
					  hh_len, fragheaderlen,
					  transhdrlen, mtu, flags, fl6);
		if (err)
			goto error;
		return 0;
	}

	if (!skb)
		goto alloc_new_skb;

	while (length > 0) {
		/* Check if the remaining data fits into current packet. */
		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
		if (copy < length)
			copy = maxfraglen - skb->len;

		if (copy <= 0) {
			char *data;
			unsigned int datalen;
			unsigned int fraglen;
			unsigned int fraggap;
			unsigned int alloclen;
alloc_new_skb:
			/* There's no room in the current skb */
			if (skb)
				fraggap = skb->len - maxfraglen;
			else
				fraggap = 0;
			/* update mtu and maxfraglen if necessary */
			if (!skb || !skb_prev)
				ip6_append_data_mtu(&mtu, &maxfraglen,
						    fragheaderlen, skb, rt,
						    orig_mtu);

			skb_prev = skb;

			/*
			 * If remaining data exceeds the mtu,
			 * we know we need more fragment(s).
			 */
			datalen = length + fraggap;

			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
			if ((flags & MSG_MORE) &&
			    !(rt->dst.dev->features&NETIF_F_SG))
				alloclen = mtu;
			else
				alloclen = datalen + fragheaderlen;

			alloclen += dst_exthdrlen;

			if (datalen != length + fraggap) {
				/*
				 * this is not the last fragment, the trailer
				 * space is regarded as data space.
				 */
				datalen += rt->dst.trailer_len;
			}

			alloclen += rt->dst.trailer_len;
			fraglen = datalen + fragheaderlen;

			/*
			 * We just reserve space for fragment header.
			 * Note: this may be overallocation if the message
			 * (without MSG_MORE) fits into the MTU.
			 */
			alloclen += sizeof(struct frag_hdr);

			if (transhdrlen) {
				skb = sock_alloc_send_skb(sk,
						alloclen + hh_len,
						(flags & MSG_DONTWAIT), &err);
			} else {
				skb = NULL;
				if (atomic_read(&sk->sk_wmem_alloc) <=
				    2 * sk->sk_sndbuf)
					skb = sock_wmalloc(sk,
							   alloclen + hh_len, 1,
							   sk->sk_allocation);
				if (unlikely(!skb))
					err = -ENOBUFS;
			}
			if (!skb)
				goto error;
			/*
			 *	Fill in the control structures
			 */
			skb->protocol = htons(ETH_P_IPV6);
			skb->ip_summed = csummode;
			skb->csum = 0;
			/* reserve for fragmentation and ipsec header */
			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
				    dst_exthdrlen);

			/* Only the initial fragment is time stamped */
			skb_shinfo(skb)->tx_flags = tx_flags;
			tx_flags = 0;
			skb_shinfo(skb)->tskey = tskey;
			tskey = 0;

			/*
			 *	Find where to start putting bytes
			 */
			data = skb_put(skb, fraglen);
			skb_set_network_header(skb, exthdrlen);
			data += fragheaderlen;
			skb->transport_header = (skb->network_header +
						 fragheaderlen);
			if (fraggap) {
				skb->csum = skb_copy_and_csum_bits(
					skb_prev, maxfraglen,
					data + transhdrlen, fraggap, 0);
				skb_prev->csum = csum_sub(skb_prev->csum,
							  skb->csum);
				data += fraggap;
				pskb_trim_unique(skb_prev, maxfraglen);
			}
			copy = datalen - transhdrlen - fraggap;

			if (copy < 0) {
				err = -EINVAL;
				kfree_skb(skb);
				goto error;
			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
				err = -EFAULT;
				kfree_skb(skb);
				goto error;
			}

			offset += copy;
			length -= datalen - fraggap;
			transhdrlen = 0;
			exthdrlen = 0;
			dst_exthdrlen = 0;

			/*
			 * Put the packet on the pending queue
			 */
			__skb_queue_tail(queue, skb);
			continue;
		}

		if (copy > length)
			copy = length;

		if (!(rt->dst.dev->features&NETIF_F_SG)) {
			unsigned int off;

			off = skb->len;
			if (getfrag(from, skb_put(skb, copy),
						offset, copy, off, skb) < 0) {
				__skb_trim(skb, off);
				err = -EFAULT;
				goto error;
			}
		} else {
			int i = skb_shinfo(skb)->nr_frags;

			err = -ENOMEM;
			if (!sk_page_frag_refill(sk, pfrag))
				goto error;

			if (!skb_can_coalesce(skb, i, pfrag->page,
					      pfrag->offset)) {
				err = -EMSGSIZE;
				if (i == MAX_SKB_FRAGS)
					goto error;

				__skb_fill_page_desc(skb, i, pfrag->page,
						     pfrag->offset, 0);
				skb_shinfo(skb)->nr_frags = ++i;
				get_page(pfrag->page);
			}
			copy = min_t(int, copy, pfrag->size - pfrag->offset);
			if (getfrag(from,
				    page_address(pfrag->page) + pfrag->offset,
				    offset, copy, skb->len, skb) < 0)
				goto error_efault;

			pfrag->offset += copy;
			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
			skb->len += copy;
			skb->data_len += copy;
			skb->truesize += copy;
			atomic_add(copy, &sk->sk_wmem_alloc);
		}
		offset += copy;
		length -= copy;
	}

	return 0;

error_efault:
	err = -EFAULT;
error:
	cork->length -= length;
	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
	return err;
}
Beispiel #20
0
/* Get route to destination or remote server */
static int
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
		   __be32 daddr, int rt_mode, __be32 *ret_saddr)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct netns_ipvs *ipvs = net_ipvs(net);
	struct ip_vs_dest_dst *dest_dst;
	struct rtable *rt;			/* Route to the other host */
	struct rtable *ort;			/* Original route */
	struct iphdr *iph;
	__be16 df;
	int mtu;
	int local, noref = 1;

	if (dest) {
		dest_dst = __ip_vs_dst_check(dest);
		if (likely(dest_dst))
			rt = (struct rtable *) dest_dst->dst_cache;
		else {
			dest_dst = ip_vs_dest_dst_alloc();
			spin_lock_bh(&dest->dst_lock);
			if (!dest_dst) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				goto err_unreach;
			}
			rt = do_output_route4(net, dest->addr.ip, rt_mode,
					      &dest_dst->dst_saddr.ip);
			if (!rt) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				ip_vs_dest_dst_free(dest_dst);
				goto err_unreach;
			}
			__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
			spin_unlock_bh(&dest->dst_lock);
			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
				  &dest->addr.ip, &dest_dst->dst_saddr.ip,
				  atomic_read(&rt->dst.__refcnt));
		}
		daddr = dest->addr.ip;
		if (ret_saddr)
			*ret_saddr = dest_dst->dst_saddr.ip;
	} else {
		__be32 saddr = htonl(INADDR_ANY);

		noref = 0;

		/* For such unconfigured boxes avoid many route lookups
		 * for performance reasons because we do not remember saddr
		 */
		rt_mode &= ~IP_VS_RT_MODE_CONNECT;
		rt = do_output_route4(net, daddr, rt_mode, &saddr);
		if (!rt)
			goto err_unreach;
		if (ret_saddr)
			*ret_saddr = saddr;
	}

	local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
	      rt_mode)) {
		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
			     (rt->rt_flags & RTCF_LOCAL) ?
			     "local":"non-local", &daddr);
		goto err_put;
	}
	iph = ip_hdr(skb);
	if (likely(!local)) {
		if (unlikely(ipv4_is_loopback(iph->saddr))) {
			IP_VS_DBG_RL("Stopping traffic from loopback address "
				     "%pI4 to non-local address, dest: %pI4\n",
				     &iph->saddr, &daddr);
			goto err_put;
		}
	} else {
		ort = skb_rtable(skb);
		if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
		    !(ort->rt_flags & RTCF_LOCAL)) {
			IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
				     "local requires NAT method, dest: %pI4\n",
				     &iph->daddr, &daddr);
			goto err_put;
		}
		/* skb to local stack, preserve old route */
		if (!noref)
			ip_rt_put(rt);
		return local;
	}

	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
		mtu = dst_mtu(&rt->dst);
		df = iph->frag_off & htons(IP_DF);
	} else {
		struct sock *sk = skb->sk;

		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
		if (mtu < 68) {
			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
			goto err_put;
		}
		ort = skb_rtable(skb);
		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
		/* MTU check allowed? */
		df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
	}

	/* MTU checking */
	if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
		IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
		goto err_put;
	}

	skb_dst_drop(skb);
	if (noref) {
		if (!local)
			skb_dst_set_noref_force(skb, &rt->dst);
		else
			skb_dst_set(skb, dst_clone(&rt->dst));
	} else
		skb_dst_set(skb, &rt->dst);

	return local;

err_put:
	if (!noref)
		ip_rt_put(rt);
	return -1;

err_unreach:
	dst_link_failure(skb);
	return -1;
}
Beispiel #21
0
static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	struct nf_bridge_info *nf_bridge;
	unsigned int mtu_reserved;

	mtu_reserved = nf_bridge_mtu_reduction(skb);

	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
		nf_bridge_info_free(skb);
		return br_dev_queue_push_xmit(net, sk, skb);
	}

	nf_bridge = nf_bridge_info_get(skb);

	/* This is wrong! We should preserve the original fragment
	 * boundaries by preserving frag_list rather than refragmenting.
	 */
	if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) &&
	    skb->protocol == htons(ETH_P_IP)) {
		struct brnf_frag_data *data;

		if (br_validate_ipv4(net, skb))
			goto drop;

		IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;

		nf_bridge_update_protocol(skb);

		data = this_cpu_ptr(&brnf_frag_data_storage);

		data->vlan_tci = skb->vlan_tci;
		data->vlan_proto = skb->vlan_proto;
		data->encap_size = nf_bridge_encap_header_len(skb);
		data->size = ETH_HLEN + data->encap_size;

		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
						 data->size);

		return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
	}
	if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
	    skb->protocol == htons(ETH_P_IPV6)) {
		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
		struct brnf_frag_data *data;

		if (br_validate_ipv6(net, skb))
			goto drop;

		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;

		nf_bridge_update_protocol(skb);

		data = this_cpu_ptr(&brnf_frag_data_storage);
		data->encap_size = nf_bridge_encap_header_len(skb);
		data->size = ETH_HLEN + data->encap_size;

		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
						 data->size);

		if (v6ops)
			return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);

		kfree_skb(skb);
		return -EMSGSIZE;
	}
	nf_bridge_info_free(skb);
	return br_dev_queue_push_xmit(net, sk, skb);
 drop:
	kfree_skb(skb);
	return 0;
}
Beispiel #22
0
static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
				       netdev_features_t features)
{
	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	u16 mac_offset = skb->mac_header;
	__be16 protocol = skb->protocol;
	u16 mac_len = skb->mac_len;
	int gre_offset, outer_hlen;
	bool need_csum, ufo;

	if (!skb->encapsulation)
		goto out;

	if (unlikely(tnl_hlen < sizeof(struct gre_base_hdr)))
		goto out;

	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
		goto out;

	/* setup inner skb. */
	skb->encapsulation = 0;
	SKB_GSO_CB(skb)->encap_level = 0;
	__skb_pull(skb, tnl_hlen);
	skb_reset_mac_header(skb);
	skb_set_network_header(skb, skb_inner_network_offset(skb));
	skb->mac_len = skb_inner_network_offset(skb);
	skb->protocol = skb->inner_protocol;

	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
	skb->encap_hdr_csum = need_csum;

	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);

	features &= skb->dev->hw_enc_features;

	/* The only checksum offload we care about from here on out is the
	 * outer one so strip the existing checksum feature flags based
	 * on the fact that we will be computing our checksum in software.
	 */
	if (ufo) {
		features &= ~NETIF_F_CSUM_MASK;
		if (!need_csum)
			features |= NETIF_F_HW_CSUM;
	}

	/* segment inner packet. */
	segs = skb_mac_gso_segment(skb, features);
	if (IS_ERR_OR_NULL(segs)) {
		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
				     mac_len);
		goto out;
	}

	outer_hlen = skb_tnl_header_len(skb);
	gre_offset = outer_hlen - tnl_hlen;
	skb = segs;
	do {
		struct gre_base_hdr *greh;
		__sum16 *pcsum;

		/* Set up inner headers if we are offloading inner checksum */
		if (skb->ip_summed == CHECKSUM_PARTIAL) {
			skb_reset_inner_headers(skb);
			skb->encapsulation = 1;
		}

		skb->mac_len = mac_len;
		skb->protocol = protocol;

		__skb_push(skb, outer_hlen);
		skb_reset_mac_header(skb);
		skb_set_network_header(skb, mac_len);
		skb_set_transport_header(skb, gre_offset);

		if (!need_csum)
			continue;

		greh = (struct gre_base_hdr *)skb_transport_header(skb);
		pcsum = (__sum16 *)(greh + 1);

		if (skb_is_gso(skb)) {
			unsigned int partial_adj;

			/* Adjust checksum to account for the fact that
			 * the partial checksum is based on actual size
			 * whereas headers should be based on MSS size.
			 */
			partial_adj = skb->len + skb_headroom(skb) -
				      SKB_GSO_CB(skb)->data_offset -
				      skb_shinfo(skb)->gso_size;
			*pcsum = ~csum_fold((__force __wsum)htonl(partial_adj));
		} else {
			*pcsum = 0;
		}

		*(pcsum + 1) = 0;
		*pcsum = gso_make_checksum(skb, 0);
	} while ((skb = skb->next));
out:
	return segs;
}
static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
{
	struct mlx5_wq_cyc       *wq   = &sq->wq;

	u16 pi = sq->pc & wq->sz_m1;
	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
	struct mlx5e_tx_wqe_info *wi   = &sq->wqe_info[pi];

	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
	struct mlx5_wqe_data_seg *dseg;

	unsigned char *skb_data = skb->data;
	unsigned int skb_len = skb->len;
	u8  opcode = MLX5_OPCODE_SEND;
	dma_addr_t dma_addr = 0;
	unsigned int num_bytes;
	bool bf = false;
	u16 headlen;
	u16 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
	u16 ihs;
	int i;

	memset(wqe, 0, sizeof(*wqe));

	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
		eseg->cs_flags	= MLX5_ETH_WQE_L3_CSUM;
		if (skb->encapsulation) {
			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
					  MLX5_ETH_WQE_L4_INNER_CSUM;
			sq->stats.csum_partial_inner++;
		} else {
			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
			sq->stats.csum_partial++;
		}
	} else
		sq->stats.csum_none++;

	if (sq->cc != sq->prev_cc) {
		sq->prev_cc = sq->cc;
		sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0;
	}

	if (skb_is_gso(skb)) {
		eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
		opcode    = MLX5_OPCODE_LSO;

		if (skb->encapsulation) {
			ihs = skb_inner_transport_header(skb) - skb->data +
			      inner_tcp_hdrlen(skb);
			sq->stats.tso_inner_packets++;
			sq->stats.tso_inner_bytes += skb->len - ihs;
		} else {
			ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
			sq->stats.tso_packets++;
			sq->stats.tso_bytes += skb->len - ihs;
		}

		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
	} else {
		bf = sq->bf_budget   &&
		     !skb->xmit_more &&
		     !skb_shinfo(skb)->nr_frags;
		ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
	}

	wi->num_bytes = num_bytes;

	if (skb_vlan_tag_present(skb)) {
		mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data,
				  &skb_len);
		ihs += VLAN_HLEN;
	} else {
		memcpy(eseg->inline_hdr_start, skb_data, ihs);
		mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs);
	}

	eseg->inline_hdr_sz = cpu_to_be16(ihs);

	ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start),
			       MLX5_SEND_WQE_DS);

	dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt;

	wi->num_dma = 0;

	headlen = skb_len - skb->data_len;
	if (headlen) {
		dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
					  DMA_TO_DEVICE);
		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
			goto dma_unmap_wqe_err;

		dseg->addr       = cpu_to_be64(dma_addr);
		dseg->lkey       = sq->mkey_be;
		dseg->byte_count = cpu_to_be32(headlen);

		mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
		wi->num_dma++;

		dseg++;
	}

	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
		int fsz = skb_frag_size(frag);

		dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
					    DMA_TO_DEVICE);
		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
			goto dma_unmap_wqe_err;

		dseg->addr       = cpu_to_be64(dma_addr);
		dseg->lkey       = sq->mkey_be;
		dseg->byte_count = cpu_to_be32(fsz);

		mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
		wi->num_dma++;

		dseg++;
	}

	ds_cnt += wi->num_dma;

	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);

	sq->skb[pi] = skb;

	wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
	sq->pc += wi->num_wqebbs;

	if (unlikely(MLX5E_TX_HW_STAMP(sq->channel->priv, skb)))
		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;

	netdev_tx_sent_queue(sq->txq, wi->num_bytes);

	if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) {
		netif_tx_stop_queue(sq->txq);
		sq->stats.queue_stopped++;
	}

	if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
		int bf_sz = 0;

		if (bf && sq->uar_bf_map)
			bf_sz = wi->num_wqebbs << 3;

		cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
		mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz);
	}

	sq->bf_budget = bf ? sq->bf_budget - 1 : 0;

	/* fill sq edge with nops to avoid wqe wrap around */
	while ((sq->pc & wq->sz_m1) > sq->edge)
		mlx5e_send_nop(sq, false);

	sq->stats.packets++;
	sq->stats.bytes += num_bytes;
	return NETDEV_TX_OK;

dma_unmap_wqe_err:
	sq->stats.queue_dropped++;
	mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);

	dev_kfree_skb_any(skb);

	return NETDEV_TX_OK;
}
Beispiel #24
0
static bool is_gre_gso(struct sk_buff *skb)
{
	return skb_is_gso(skb);
}
Beispiel #25
0
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
	struct hv_netvsc_packet *packet = NULL;
	int ret;
	unsigned int num_data_pgs;
	struct rndis_message *rndis_msg;
	struct rndis_packet *rndis_pkt;
	u32 rndis_msg_size;
	struct rndis_per_packet_info *ppi;
	u32 hash;
	u32 skb_length;
	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
	struct hv_page_buffer *pb = page_buf;

	/* We will atmost need two pages to describe the rndis
	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
	 * of pages in a single packet. If skb is scattered around
	 * more pages we try linearizing it.
	 */

	skb_length = skb->len;
	num_data_pgs = netvsc_get_slots(skb) + 2;

	if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
		++net_device_ctx->eth_stats.tx_scattered;

		if (skb_linearize(skb))
			goto no_memory;

		num_data_pgs = netvsc_get_slots(skb) + 2;
		if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
			++net_device_ctx->eth_stats.tx_too_big;
			goto drop;
		}
	}

	/*
	 * Place the rndis header in the skb head room and
	 * the skb->cb will be used for hv_netvsc_packet
	 * structure.
	 */
	ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
	if (ret)
		goto no_memory;

	/* Use the skb control buffer for building up the packet */
	BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
			FIELD_SIZEOF(struct sk_buff, cb));
	packet = (struct hv_netvsc_packet *)skb->cb;

	/* TODO: This will likely evaluate to false, since RH7 and
	 * below kernels will set next pointer to NULL before calling
	 * into here. Should find another way to set this flag.
	 */
	packet->xmit_more = (skb->next != NULL);
	
	packet->q_idx = skb_get_queue_mapping(skb);

	packet->total_data_buflen = skb->len;

	rndis_msg = (struct rndis_message *)skb->head;

	memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);

	packet->send_completion_ctx = packet;

	/* Add the rndis header */
	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
	rndis_msg->msg_len = packet->total_data_buflen;
	rndis_pkt = &rndis_msg->msg.pkt;
	rndis_pkt->data_offset = sizeof(struct rndis_packet);
	rndis_pkt->data_len = packet->total_data_buflen;
	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);

	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);

#ifdef NOTYET
	// Divergence from upstream commit:
	// 307f099520b66504cf6c5638f3f404c48b9fb45b
	hash = skb_get_hash_raw(skb);
#endif
	hash = skb_get_hash(skb);
	if (hash != 0 && net->real_num_tx_queues > 1) {
		rndis_msg_size += NDIS_HASH_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
				    NBL_HASH_VALUE);
		*(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
	}

	if (skb_vlan_tag_present(skb)) {
		struct ndis_pkt_8021q_info *vlan;

		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
					IEEE_8021Q_INFO);
		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
						ppi->ppi_offset);
		vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
		vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
				VLAN_PRIO_SHIFT;
	}

	if (skb_is_gso(skb)) {
		struct ndis_tcp_lso_info *lso_info;

		rndis_msg_size += NDIS_LSO_PPI_SIZE;
		ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
				    TCP_LARGESEND_PKTINFO);

		lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
							ppi->ppi_offset);

		lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
		if (skb->protocol == htons(ETH_P_IP)) {
			lso_info->lso_v2_transmit.ip_version =
				NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
			ip_hdr(skb)->tot_len = 0;
			ip_hdr(skb)->check = 0;
			tcp_hdr(skb)->check =
				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
						   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
		} else {
			lso_info->lso_v2_transmit.ip_version =
				NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
			ipv6_hdr(skb)->payload_len = 0;
			tcp_hdr(skb)->check =
				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
						 &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
		}
		lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb);
		lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;

	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
		if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) {
			struct ndis_tcp_ip_checksum_info *csum_info;

			rndis_msg_size += NDIS_CSUM_PPI_SIZE;
			ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
					    TCPIP_CHKSUM_PKTINFO);

			csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
									 ppi->ppi_offset);

			csum_info->transmit.tcp_header_offset = skb_transport_offset(skb);

			if (skb->protocol == htons(ETH_P_IP)) {
				csum_info->transmit.is_ipv4 = 1;
				if (ip_hdr(skb)->protocol == IPPROTO_TCP)
					csum_info->transmit.tcp_checksum = 1;
				else
					csum_info->transmit.udp_checksum = 1;
			} else {
				csum_info->transmit.is_ipv6 = 1;

				if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
					csum_info->transmit.tcp_checksum = 1;
				else
					csum_info->transmit.udp_checksum = 1;
			}
		} else {
			/* Can't do offload of this type of checksum */
			if (skb_checksum_help(skb))
				goto drop;
		}
	}

	/* Start filling in the page buffers with the rndis hdr */
	rndis_msg->msg_len += rndis_msg_size;
	packet->total_data_buflen = rndis_msg->msg_len;
	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
					       skb, packet, &pb);

	/* timestamp packet in software */
	skb_tx_timestamp(skb);

	ret = netvsc_send(net_device_ctx->device_ctx, packet,
			  rndis_msg, &pb, skb);

	if (likely(ret == 0)) {
		struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);

		u64_stats_update_begin(&tx_stats->syncp);
		tx_stats->packets++;
		tx_stats->bytes += skb_length;
		u64_stats_update_end(&tx_stats->syncp);
		return NETDEV_TX_OK;
	}

	if (ret == -EAGAIN) {
		++net_device_ctx->eth_stats.tx_busy;
		return NETDEV_TX_BUSY;
	}

	if (ret == -ENOSPC)
		++net_device_ctx->eth_stats.tx_no_space;

drop:
	dev_kfree_skb_any(skb);
	net->stats.tx_dropped++;

	return NETDEV_TX_OK;

no_memory:
	++net_device_ctx->eth_stats.tx_no_memory;
	goto drop;
}
Beispiel #26
0
static int start_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
	int num, err;
	struct scatterlist sg[1+MAX_SKB_FRAGS];
	struct virtio_net_hdr *hdr;
	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;

	sg_init_table(sg, 1+MAX_SKB_FRAGS);

	pr_debug("%s: xmit %p " MAC_FMT "\n", dev->name, skb,
		 dest[0], dest[1], dest[2],
		 dest[3], dest[4], dest[5]);

	/* Encode metadata header at front. */
	hdr = skb_vnet_hdr(skb);
	if (skb->ip_summed == CHECKSUM_PARTIAL) {
		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
		hdr->csum_start = skb->csum_start - skb_headroom(skb);
		hdr->csum_offset = skb->csum_offset;
	} else {
		hdr->flags = 0;
		hdr->csum_offset = hdr->csum_start = 0;
	}

	if (skb_is_gso(skb)) {
		hdr->hdr_len = skb_transport_header(skb) - skb->data;
		hdr->gso_size = skb_shinfo(skb)->gso_size;
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
		else
			BUG();
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
	} else {
		hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
		hdr->gso_size = hdr->hdr_len = 0;
	}

	vnet_hdr_to_sg(sg, skb);
	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
	__skb_queue_head(&vi->send, skb);

again:
	/* Free up any pending old buffers before queueing new ones. */
	free_old_xmit_skbs(vi);
	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
	if (err) {
		pr_debug("%s: virtio not prepared to send\n", dev->name);
		netif_stop_queue(dev);

		/* Activate callback for using skbs: if this returns false it
		 * means some were used in the meantime. */
		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
			vi->svq->vq_ops->disable_cb(vi->svq);
			netif_start_queue(dev);
			goto again;
		}
		__skb_unlink(skb, &vi->send);

		return NETDEV_TX_BUSY;
	}
	vi->svq->vq_ops->kick(vi->svq);

	return 0;
}
Beispiel #27
0
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
	     struct ipv6_txoptions *opt, int tclass)
{
	struct net *net = sock_net(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct in6_addr *first_hop = &fl6->daddr;
	struct dst_entry *dst = skb_dst(skb);
	struct ipv6hdr *hdr;
	u8  proto = fl6->flowi6_proto;
	int seg_len = skb->len;
	int hlimit = -1;
	u32 mtu;

	if (opt) {
		unsigned int head_room;

		/* First: exthdrs may take lots of space (~8K for now)
		   MAX_HEADER is not enough.
		 */
		head_room = opt->opt_nflen + opt->opt_flen;
		seg_len += head_room;
		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);

		if (skb_headroom(skb) < head_room) {
			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
			if (skb2 == NULL) {
				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
					      IPSTATS_MIB_OUTDISCARDS);
				kfree_skb(skb);
				return -ENOBUFS;
			}
			kfree_skb(skb);
			skb = skb2;
			skb_set_owner_w(skb, sk);
		}
		if (opt->opt_flen)
			ipv6_push_frag_opts(skb, opt, &proto);
		if (opt->opt_nflen)
			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
	}

	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	hdr = ipv6_hdr(skb);

	/*
	 *	Fill in the IPv6 header
	 */
	if (np)
		hlimit = np->hop_limit;
	if (hlimit < 0)
		hlimit = ip6_dst_hoplimit(dst);

	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;

	hdr->payload_len = htons(seg_len);
	hdr->nexthdr = proto;
	hdr->hop_limit = hlimit;

	hdr->saddr = fl6->saddr;
	hdr->daddr = *first_hop;

	skb->priority = sk->sk_priority;
	skb->mark = sk->sk_mark;

	mtu = dst_mtu(dst);
	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
			      IPSTATS_MIB_OUT, skb->len);
		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
			       dst->dev, dst_output);
	}

	net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
	skb->dev = dst->dev;
	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
	kfree_skb(skb);
	return -EMSGSIZE;
}
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
	     struct ipv6_txoptions *opt, int ipfragok)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct in6_addr *first_hop = &fl->fl6_dst;
	struct dst_entry *dst = skb->dst;
	struct ipv6hdr *hdr;
	u8  proto = fl->proto;
	int seg_len = skb->len;
	int hlimit, tclass;
	u32 mtu;

	if (opt) {
		int head_room;

		/* First: exthdrs may take lots of space (~8K for now)
		   MAX_HEADER is not enough.
		 */
		head_room = opt->opt_nflen + opt->opt_flen;
		seg_len += head_room;
		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);

		if (skb_headroom(skb) < head_room) {
			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
			if (skb2 == NULL) {
				IP6_INC_STATS(ip6_dst_idev(skb->dst),
					      IPSTATS_MIB_OUTDISCARDS);
				kfree_skb(skb);
				return -ENOBUFS;
			}
			kfree_skb(skb);
			skb = skb2;
			if (sk)
				skb_set_owner_w(skb, sk);
		}
		if (opt->opt_flen)
			ipv6_push_frag_opts(skb, opt, &proto);
		if (opt->opt_nflen)
			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
	}

	hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));

	/*
	 *	Fill in the IPv6 header
	 */

	hlimit = -1;
	if (np)
		hlimit = np->hop_limit;
	if (hlimit < 0)
		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
	if (hlimit < 0)
		hlimit = ipv6_get_hoplimit(dst->dev);

	tclass = -1;
	if (np)
		tclass = np->tclass;
	if (tclass < 0)
		tclass = 0;

	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;

	hdr->payload_len = htons(seg_len);
	hdr->nexthdr = proto;
	hdr->hop_limit = hlimit;

	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
	ipv6_addr_copy(&hdr->daddr, first_hop);

	skb->priority = sk->sk_priority;

	mtu = dst_mtu(dst);
	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
		IP6_INC_STATS(ip6_dst_idev(skb->dst),
			      IPSTATS_MIB_OUTREQUESTS);
		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
				dst_output);
	}

	if (net_ratelimit())
		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
	skb->dev = dst->dev;
	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
	kfree_skb(skb);
	return -EMSGSIZE;
}
Beispiel #29
0
struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
{
	int err;
	unsigned long flags;
	struct xfrm_state *x;
	struct sk_buff *skb2;
	struct softnet_data *sd;
	netdev_features_t esp_features = features;
	struct xfrm_offload *xo = xfrm_offload(skb);

	if (!xo)
		return skb;

	if (!(features & NETIF_F_HW_ESP))
		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);

	x = skb->sp->xvec[skb->sp->len - 1];
	if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
		return skb;

	local_irq_save(flags);
	sd = this_cpu_ptr(&softnet_data);
	err = !skb_queue_empty(&sd->xfrm_backlog);
	local_irq_restore(flags);

	if (err) {
		*again = true;
		return skb;
	}

	if (skb_is_gso(skb)) {
		struct net_device *dev = skb->dev;

		if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) {
			struct sk_buff *segs;

			/* Packet got rerouted, fixup features and segment it. */
			esp_features = esp_features & ~(NETIF_F_HW_ESP
							| NETIF_F_GSO_ESP);

			segs = skb_gso_segment(skb, esp_features);
			if (IS_ERR(segs)) {
				kfree_skb(skb);
				atomic_long_inc(&dev->tx_dropped);
				return NULL;
			} else {
				consume_skb(skb);
				skb = segs;
			}
		}
	}

	if (!skb->next) {
		x->outer_mode->xmit(x, skb);

		xo->flags |= XFRM_DEV_RESUME;

		err = x->type_offload->xmit(x, skb, esp_features);
		if (err) {
			if (err == -EINPROGRESS)
				return NULL;

			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
			kfree_skb(skb);
			return NULL;
		}

		skb_push(skb, skb->data - skb_mac_header(skb));

		return skb;
	}

	skb2 = skb;

	do {
		struct sk_buff *nskb = skb2->next;
		skb2->next = NULL;

		xo = xfrm_offload(skb2);
		xo->flags |= XFRM_DEV_RESUME;

		x->outer_mode->xmit(x, skb2);

		err = x->type_offload->xmit(x, skb2, esp_features);
		if (!err) {
			skb2->next = nskb;
		} else if (err != -EINPROGRESS) {
			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
			skb2->next = nskb;
			kfree_skb_list(skb2);
			return NULL;
		} else {
			if (skb == skb2)
				skb = nskb;

			if (!skb)
				return NULL;

			goto skip_push;
		}

		skb_push(skb2, skb2->data - skb_mac_header(skb2));

skip_push:
		skb2 = nskb;
	} while (skb2);

	return skb;
}
Beispiel #30
0
static int xenvif_tx_submit(struct xenvif *vif)
{
	struct gnttab_copy *gop = vif->tx_copy_ops;
	struct sk_buff *skb;
	int work_done = 0;

	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
		struct xen_netif_tx_request *txp;
		u16 pending_idx;
		unsigned data_len;

		pending_idx = *((u16 *)skb->data);
		txp = &vif->pending_tx_info[pending_idx].req;

		/* Check the remap error code. */
		if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) {
			netdev_dbg(vif->dev, "netback grant failed.\n");
			skb_shinfo(skb)->nr_frags = 0;
			kfree_skb(skb);
			continue;
		}

		data_len = skb->len;
		memcpy(skb->data,
		       (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
		       data_len);
		if (data_len < txp->size) {
			/* Append the packet payload as a fragment. */
			txp->offset += data_len;
			txp->size -= data_len;
		} else {
			/* Schedule a response immediately. */
			xenvif_idx_release(vif, pending_idx,
					   XEN_NETIF_RSP_OKAY);
		}

		if (txp->flags & XEN_NETTXF_csum_blank)
			skb->ip_summed = CHECKSUM_PARTIAL;
		else if (txp->flags & XEN_NETTXF_data_validated)
			skb->ip_summed = CHECKSUM_UNNECESSARY;

		xenvif_fill_frags(vif, skb);

		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
			int target = min_t(int, skb->len, PKT_PROT_LEN);
			__pskb_pull_tail(skb, target - skb_headlen(skb));
		}

		skb->dev      = vif->dev;
		skb->protocol = eth_type_trans(skb, skb->dev);
		skb_reset_network_header(skb);

		if (checksum_setup(vif, skb)) {
			netdev_dbg(vif->dev,
				   "Can't setup checksum in net_tx_action\n");
			kfree_skb(skb);
			continue;
		}

		skb_probe_transport_header(skb, 0);

		/* If the packet is GSO then we will have just set up the
		 * transport header offset in checksum_setup so it's now
		 * straightforward to calculate gso_segs.
		 */
		if (skb_is_gso(skb)) {
			int mss = skb_shinfo(skb)->gso_size;
			int hdrlen = skb_transport_header(skb) -
				skb_mac_header(skb) +
				tcp_hdrlen(skb);

			skb_shinfo(skb)->gso_segs =
				DIV_ROUND_UP(skb->len - hdrlen, mss);
		}

		vif->dev->stats.rx_bytes += skb->len;
		vif->dev->stats.rx_packets++;

		work_done++;

		netif_receive_skb(skb);
	}