Exemplo n.º 1
0
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
				  struct sk_buff *skb)
{
	u32 hash = skb_get_hash_perturb(skb, q->perturbation);

	return reciprocal_scale(hash, q->flows_cnt);
}
Exemplo n.º 2
0
/**
 *  reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
 *  @sk: First socket in the group.
 *  @hash: When no BPF filter is available, use this hash to select.
 *  @skb: skb to run through BPF filter.
 *  @hdr_len: BPF filter expects skb data pointer at payload data.  If
 *    the skb does not yet point at the payload, this parameter represents
 *    how far the pointer needs to advance to reach the payload.
 *  Returns a socket that should receive the packet (or NULL on error).
 */
struct sock *reuseport_select_sock(struct sock *sk,
				   u32 hash,
				   struct sk_buff *skb,
				   int hdr_len)
{
	struct sock_reuseport *reuse;
	struct bpf_prog *prog;
	struct sock *sk2 = NULL;
	u16 socks;

	rcu_read_lock();
	reuse = rcu_dereference(sk->sk_reuseport_cb);

	/* if memory allocation failed or add call is not yet complete */
	if (!reuse)
		goto out;

	prog = rcu_dereference(reuse->prog);
	socks = READ_ONCE(reuse->num_socks);
	if (likely(socks)) {
		/* paired with smp_wmb() in reuseport_add_sock() */
		smp_rmb();

		if (prog && skb)
			sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
		else
			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
	}

out:
	rcu_read_unlock();
	return sk2;
}
Exemplo n.º 3
0
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
		       void *accel_priv, select_queue_fallback_t fallback)
{
	struct mlx5e_priv *priv = netdev_priv(dev);
	int channel_ix = fallback(dev, skb);
	int up = 0;

	if (priv->params.num_rl_txqs) {
		u16 ix = mlx5e_select_queue_assigned(priv, skb);

		if (ix) {
			sk_tx_queue_set(skb->sk, ix);
			return ix;
		}
	}

	if (!netdev_get_num_tc(dev))
		return channel_ix;

	if (skb_vlan_tag_present(skb))
		up = skb->vlan_tci >> VLAN_PRIO_SHIFT;

	/* channel_ix can be larger than num_channels since
	 * dev->num_real_tx_queues = num_channels * num_tc
	 */
	if (channel_ix >= priv->params.num_channels)
		channel_ix = reciprocal_scale(channel_ix,
					      priv->params.num_channels);

	return priv->tc_to_txq_map[channel_ix][up];
}
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
	struct xps_dev_maps *dev_maps;
	struct xps_map *map;
	int queue_index = -1;

	rcu_read_lock();
	dev_maps = rcu_dereference(dev->xps_maps);
	if (dev_maps) {
		map = rcu_dereference(
		    dev_maps->cpu_map[raw_smp_processor_id()]);
		if (map) {
			if (map->len == 1)
				queue_index = map->queues[0];
			else
				queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
									   map->len)];
			if (unlikely(queue_index >= dev->real_num_tx_queues))
				queue_index = -1;
		}
	}
	rcu_read_unlock();

	return queue_index;
#else
	return -1;
#endif
}
Exemplo n.º 5
0
/*
 * CoDel control_law is t + interval/sqrt(count)
 * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
 * both sqrt() and divide operation.
 *
 * Borrow from codel_control_law in Linux kernel
 */
static codel_time_t codel_control_law(codel_time_t t,
				      codel_time_t interval,
				      u32 rec_inv_sqrt)
{
	return t + reciprocal_scale(interval,
				    rec_inv_sqrt << REC_INV_SQRT_SHIFT);
}
Exemplo n.º 6
0
    static void nft_ng_random_eval(const struct nft_expr *expr,
                                   struct nft_regs *regs,
                                   const struct nft_pktinfo *pkt)
{
    struct nft_ng_random *priv = nft_expr_priv(expr);
    struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
    u32 val;

    val = reciprocal_scale(prandom_u32_state(state), priv->modulus);
    regs->data[priv->dreg] = val + priv->offset;
}
Exemplo n.º 7
0
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
				  const struct sk_buff *skb)
{
	struct flow_keys keys;
	unsigned int hash;

	skb_flow_dissect(skb, &keys);
	hash = jhash_3words((__force u32)keys.dst,
			    (__force u32)keys.src ^ keys.ip_proto,
			    (__force u32)keys.ports, q->perturbation);

	return reciprocal_scale(hash, q->flows_cnt);
}
Exemplo n.º 8
0
static inline u_int32_t
xt_cluster_hash(const struct nf_conn *ct,
		const struct xt_cluster_match_info *info)
{
	u_int32_t hash = 0;

	switch(nf_ct_l3num(ct)) {
	case AF_INET:
		hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info);
		break;
	case AF_INET6:
		hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info);
		break;
	default:
		WARN_ON(1);
		break;
	}

	return reciprocal_scale(hash, info->total_nodes);
}
Exemplo n.º 9
0
/**
 *  reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
 *  @sk: First socket in the group.
 *  @hash: When no BPF filter is available, use this hash to select.
 *  @skb: skb to run through BPF filter.
 *  @hdr_len: BPF filter expects skb data pointer at payload data.  If
 *    the skb does not yet point at the payload, this parameter represents
 *    how far the pointer needs to advance to reach the payload.
 *  Returns a socket that should receive the packet (or NULL on error).
 */
struct sock *reuseport_select_sock(struct sock *sk,
				   u32 hash,
				   struct sk_buff *skb,
				   int hdr_len)
{
	struct sock_reuseport *reuse;
	struct bpf_prog *prog;
	struct sock *sk2 = NULL;
	u16 socks;

	rcu_read_lock();
	reuse = rcu_dereference(sk->sk_reuseport_cb);

	/* if memory allocation failed or add call is not yet complete */
	if (!reuse)
		goto out;

	prog = rcu_dereference(reuse->prog);
	socks = READ_ONCE(reuse->num_socks);
	if (likely(socks)) {
		/* paired with smp_wmb() in reuseport_add_sock() */
		smp_rmb();

		if (!prog || !skb)
			goto select_by_hash;

		if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
			sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
		else
			sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);

select_by_hash:
		/* no bpf or invalid bpf result: fall back to hash usage */
		if (!sk2)
			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
	}

out:
	rcu_read_unlock();
	return sk2;
}
/*
 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
 * to be used as a distribution range.
 */
u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
		  unsigned int num_tx_queues)
{
	u32 hash;
	u16 qoffset = 0;
	u16 qcount = num_tx_queues;

	if (skb_rx_queue_recorded(skb)) {
		hash = skb_get_rx_queue(skb);
		while (unlikely(hash >= num_tx_queues))
			hash -= num_tx_queues;
		return hash;
	}

	if (dev->num_tc) {
		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
		qoffset = dev->tc_to_txq[tc].offset;
		qcount = dev->tc_to_txq[tc].count;
	}

	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
Exemplo n.º 11
0
/* For [FUTURE] fragmentation handling, we want the least-used
 * src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
 * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
 * 1-65535, we don't do pro-rata allocation based on ports; we choose
 * the ip with the lowest src-ip/dst-ip/proto usage.
 */
static void
find_best_ips_proto(const struct nf_conntrack_zone *zone,
		    struct nf_conntrack_tuple *tuple,
		    const struct nf_nat_range *range,
		    const struct nf_conn *ct,
		    enum nf_nat_manip_type maniptype)
{
	union nf_inet_addr *var_ipp;
	unsigned int i, max;
	/* Host order */
	u32 minip, maxip, j, dist;
	bool full_range;

	/* No IP mapping?  Do nothing. */
	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
		return;

	if (maniptype == NF_NAT_MANIP_SRC)
		var_ipp = &tuple->src.u3;
	else
		var_ipp = &tuple->dst.u3;

	/* Fast path: only one choice. */
	if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) {
		*var_ipp = range->min_addr;
		return;
	}

	if (nf_ct_l3num(ct) == NFPROTO_IPV4)
		max = sizeof(var_ipp->ip) / sizeof(u32) - 1;
	else
		max = sizeof(var_ipp->ip6) / sizeof(u32) - 1;

	/* Hashing source and destination IPs gives a fairly even
	 * spread in practice (if there are a small number of IPs
	 * involved, there usually aren't that many connections
	 * anyway).  The consistency means that servers see the same
	 * client coming from the same IP (some Internet Banking sites
	 * like this), even across reboots.
	 */
	j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
		   range->flags & NF_NAT_RANGE_PERSISTENT ?
			0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id);

	full_range = false;
	for (i = 0; i <= max; i++) {
		/* If first bytes of the address are at the maximum, use the
		 * distance. Otherwise use the full range.
		 */
		if (!full_range) {
			minip = ntohl((__force __be32)range->min_addr.all[i]);
			maxip = ntohl((__force __be32)range->max_addr.all[i]);
			dist  = maxip - minip + 1;
		} else {
			minip = 0;
			dist  = ~0;
		}

		var_ipp->all[i] = (__force __u32)
			htonl(minip + reciprocal_scale(j, dist));
		if (var_ipp->all[i] != range->max_addr.all[i])
			full_range = true;

		if (!(range->flags & NF_NAT_RANGE_PERSISTENT))
			j ^= (__force u32)tuple->dst.u3.all[i];
	}
}
Exemplo n.º 12
0
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
				  struct sk_buff *skb)
{
	return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
}
Exemplo n.º 13
0
static inline unsigned int
cake_hash(struct cake_bin_data *q, const struct sk_buff *skb, int flow_mode)
{
#if KERNEL_VERSION(4, 2, 0) > LINUX_VERSION_CODE
	struct flow_keys keys;
#else
	struct flow_keys keys, host_keys;
#endif
	u32 flow_hash, host_hash, reduced_hash;

	if (unlikely(flow_mode == CAKE_FLOW_NONE ||
		     q->flows_cnt < CAKE_SET_WAYS))
		return 0;

#if KERNEL_VERSION(4, 2, 0) > LINUX_VERSION_CODE
	skb_flow_dissect(skb, &keys);

	host_hash = jhash_3words(
		(__force u32)((flow_mode & CAKE_FLOW_DST_IP) ? keys.dst : 0),
		(__force u32)((flow_mode & CAKE_FLOW_SRC_IP) ? keys.src : 0),
		(__force u32)0, q->perturbation);

	if (!(flow_mode & CAKE_FLOW_FLOWS))
		flow_hash = host_hash;
	else
		flow_hash = jhash_3words(
			(__force u32)keys.dst,
			(__force u32)keys.src ^ keys.ip_proto,
			(__force u32)keys.ports, q->perturbation);

#else

/* Linux kernel 4.2.x have skb_flow_dissect_flow_keys which takes only 2
 * arguments
 */
#if (KERNEL_VERSION(4, 2, 0) <= LINUX_VERSION_CODE) && (KERNEL_VERSION(4,3,0) >  LINUX_VERSION_CODE)
	skb_flow_dissect_flow_keys(skb, &keys);
#else
	skb_flow_dissect_flow_keys(skb, &keys,
				FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
#endif
	/* flow_hash_from_keys() sorts the addresses by value, so we have
	 * to preserve their order in a separate data structure to treat
	 * src and dst host addresses as independently selectable.
	 */
	host_keys = keys;
	host_keys.ports.ports     = 0;
	host_keys.basic.ip_proto  = 0;
	host_keys.keyid.keyid     = 0;
	host_keys.tags.vlan_id    = 0;
	host_keys.tags.flow_label = 0;

	if (!(flow_mode & CAKE_FLOW_SRC_IP)) {
		switch (host_keys.control.addr_type) {
		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
			host_keys.addrs.v4addrs.src = 0;
			break;

		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
			memset(&host_keys.addrs.v6addrs.src, 0,
			       sizeof(host_keys.addrs.v6addrs.src));
			break;
		};
	}

	if (!(flow_mode & CAKE_FLOW_DST_IP)) {
		switch (host_keys.control.addr_type) {
		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
			host_keys.addrs.v4addrs.dst = 0;
			break;

		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
			memset(&host_keys.addrs.v6addrs.dst, 0,
			       sizeof(host_keys.addrs.v6addrs.dst));
			break;
		};
	}

	host_hash = flow_hash_from_keys(&host_keys);
	if (!(flow_mode & CAKE_FLOW_FLOWS)) {
		flow_hash = host_hash;
	} else {		
		flow_hash = flow_hash_from_keys(&keys);
	}
#endif
	reduced_hash = reciprocal_scale(flow_hash, q->flows_cnt);

	/* set-associative hashing */
	/* fast path if no hash collision (direct lookup succeeds) */
	if (likely(q->tags[reduced_hash] == flow_hash)) {
		q->way_directs++;
	} else {
		u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
		u32 outer_hash = reduced_hash - inner_hash;
		u32 i, j, k;

		/* check if any active queue in the set is reserved for
		 * this flow. count the empty queues in the set, too
		 */

		for (i = j = 0, k = inner_hash; i < CAKE_SET_WAYS;
		     i++, k = (k + 1) % CAKE_SET_WAYS) {
			if (q->tags[outer_hash + k] == flow_hash) {
				q->way_hits++;
				goto found;
			} else if (list_empty(&q->flows[outer_hash + k].
					      flowchain)) {
				j++;
			}
		}

		/* no queue is reserved for this flow */
		if (j) {
			/* there's at least one empty queue, so find one
			 * to reserve.
			 */
			q->way_misses++;

			for (i = 0; i < CAKE_SET_WAYS; i++, k = (k + 1)
				     % CAKE_SET_WAYS)
				if (list_empty(&q->flows[outer_hash + k].
					       flowchain))
					goto found;
		} else {
			/* With no empty queues default to the original
			 * queue and accept the collision.
			 */
			q->way_collisions++;
		}

found:
		/* reserve queue for future packets in same flow */
		reduced_hash = outer_hash + k;
		q->tags[reduced_hash] = flow_hash;
	}

	return reduced_hash;
}