static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, struct sk_buff *skb) { u32 hash = skb_get_hash_perturb(skb, q->perturbation); return reciprocal_scale(hash, q->flows_cnt); }
/** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. * @hash: When no BPF filter is available, use this hash to select. * @skb: skb to run through BPF filter. * @hdr_len: BPF filter expects skb data pointer at payload data. If * the skb does not yet point at the payload, this parameter represents * how far the pointer needs to advance to reach the payload. * Returns a socket that should receive the packet (or NULL on error). */ struct sock *reuseport_select_sock(struct sock *sk, u32 hash, struct sk_buff *skb, int hdr_len) { struct sock_reuseport *reuse; struct bpf_prog *prog; struct sock *sk2 = NULL; u16 socks; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); /* if memory allocation failed or add call is not yet complete */ if (!reuse) goto out; prog = rcu_dereference(reuse->prog); socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in reuseport_add_sock() */ smp_rmb(); if (prog && skb) sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); else sk2 = reuse->socks[reciprocal_scale(hash, socks)]; } out: rcu_read_unlock(); return sk2; }
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); int up = 0; if (priv->params.num_rl_txqs) { u16 ix = mlx5e_select_queue_assigned(priv, skb); if (ix) { sk_tx_queue_set(skb->sk, ix); return ix; } } if (!netdev_get_num_tc(dev)) return channel_ix; if (skb_vlan_tag_present(skb)) up = skb->vlan_tci >> VLAN_PRIO_SHIFT; /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc */ if (channel_ix >= priv->params.num_channels) channel_ix = reciprocal_scale(channel_ix, priv->params.num_channels); return priv->tc_to_txq_map[channel_ix][up]; }
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; struct xps_map *map; int queue_index = -1; rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { map = rcu_dereference( dev_maps->cpu_map[raw_smp_processor_id()]); if (map) { if (map->len == 1) queue_index = map->queues[0]; else queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), map->len)]; if (unlikely(queue_index >= dev->real_num_tx_queues)) queue_index = -1; } } rcu_read_unlock(); return queue_index; #else return -1; #endif }
/* * CoDel control_law is t + interval/sqrt(count) * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid * both sqrt() and divide operation. * * Borrow from codel_control_law in Linux kernel */ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, u32 rec_inv_sqrt) { return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); }
static void nft_ng_random_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_ng_random *priv = nft_expr_priv(expr); struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); u32 val; val = reciprocal_scale(prandom_u32_state(state), priv->modulus); regs->data[priv->dreg] = val + priv->offset; }
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, const struct sk_buff *skb) { struct flow_keys keys; unsigned int hash; skb_flow_dissect(skb, &keys); hash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src ^ keys.ip_proto, (__force u32)keys.ports, q->perturbation); return reciprocal_scale(hash, q->flows_cnt); }
static inline u_int32_t xt_cluster_hash(const struct nf_conn *ct, const struct xt_cluster_match_info *info) { u_int32_t hash = 0; switch(nf_ct_l3num(ct)) { case AF_INET: hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info); break; case AF_INET6: hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info); break; default: WARN_ON(1); break; } return reciprocal_scale(hash, info->total_nodes); }
/** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. * @hash: When no BPF filter is available, use this hash to select. * @skb: skb to run through BPF filter. * @hdr_len: BPF filter expects skb data pointer at payload data. If * the skb does not yet point at the payload, this parameter represents * how far the pointer needs to advance to reach the payload. * Returns a socket that should receive the packet (or NULL on error). */ struct sock *reuseport_select_sock(struct sock *sk, u32 hash, struct sk_buff *skb, int hdr_len) { struct sock_reuseport *reuse; struct bpf_prog *prog; struct sock *sk2 = NULL; u16 socks; rcu_read_lock(); reuse = rcu_dereference(sk->sk_reuseport_cb); /* if memory allocation failed or add call is not yet complete */ if (!reuse) goto out; prog = rcu_dereference(reuse->prog); socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in reuseport_add_sock() */ smp_rmb(); if (!prog || !skb) goto select_by_hash; if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash); else sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len); select_by_hash: /* no bpf or invalid bpf result: fall back to hash usage */ if (!sk2) sk2 = reuse->socks[reciprocal_scale(hash, socks)]; } out: rcu_read_unlock(); return sk2; }
/* * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, unsigned int num_tx_queues) { u32 hash; u16 qoffset = 0; u16 qcount = num_tx_queues; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); while (unlikely(hash >= num_tx_queues)) hash -= num_tx_queues; return hash; } if (dev->num_tc) { u8 tc = netdev_get_prio_tc_map(dev, skb->priority); qoffset = dev->tc_to_txq[tc].offset; qcount = dev->tc_to_txq[tc].count; } return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; }
/* For [FUTURE] fragmentation handling, we want the least-used * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports * 1-65535, we don't do pro-rata allocation based on ports; we choose * the ip with the lowest src-ip/dst-ip/proto usage. */ static void find_best_ips_proto(const struct nf_conntrack_zone *zone, struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) { union nf_inet_addr *var_ipp; unsigned int i, max; /* Host order */ u32 minip, maxip, j, dist; bool full_range; /* No IP mapping? Do nothing. */ if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) return; if (maniptype == NF_NAT_MANIP_SRC) var_ipp = &tuple->src.u3; else var_ipp = &tuple->dst.u3; /* Fast path: only one choice. */ if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) { *var_ipp = range->min_addr; return; } if (nf_ct_l3num(ct) == NFPROTO_IPV4) max = sizeof(var_ipp->ip) / sizeof(u32) - 1; else max = sizeof(var_ipp->ip6) / sizeof(u32) - 1; /* Hashing source and destination IPs gives a fairly even * spread in practice (if there are a small number of IPs * involved, there usually aren't that many connections * anyway). The consistency means that servers see the same * client coming from the same IP (some Internet Banking sites * like this), even across reboots. */ j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), range->flags & NF_NAT_RANGE_PERSISTENT ? 0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id); full_range = false; for (i = 0; i <= max; i++) { /* If first bytes of the address are at the maximum, use the * distance. Otherwise use the full range. */ if (!full_range) { minip = ntohl((__force __be32)range->min_addr.all[i]); maxip = ntohl((__force __be32)range->max_addr.all[i]); dist = maxip - minip + 1; } else { minip = 0; dist = ~0; } var_ipp->all[i] = (__force __u32) htonl(minip + reciprocal_scale(j, dist)); if (var_ipp->all[i] != range->max_addr.all[i]) full_range = true; if (!(range->flags & NF_NAT_RANGE_PERSISTENT)) j ^= (__force u32)tuple->dst.u3.all[i]; } }
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, struct sk_buff *skb) { return reciprocal_scale(skb_get_hash(skb), q->flows_cnt); }
static inline unsigned int cake_hash(struct cake_bin_data *q, const struct sk_buff *skb, int flow_mode) { #if KERNEL_VERSION(4, 2, 0) > LINUX_VERSION_CODE struct flow_keys keys; #else struct flow_keys keys, host_keys; #endif u32 flow_hash, host_hash, reduced_hash; if (unlikely(flow_mode == CAKE_FLOW_NONE || q->flows_cnt < CAKE_SET_WAYS)) return 0; #if KERNEL_VERSION(4, 2, 0) > LINUX_VERSION_CODE skb_flow_dissect(skb, &keys); host_hash = jhash_3words( (__force u32)((flow_mode & CAKE_FLOW_DST_IP) ? keys.dst : 0), (__force u32)((flow_mode & CAKE_FLOW_SRC_IP) ? keys.src : 0), (__force u32)0, q->perturbation); if (!(flow_mode & CAKE_FLOW_FLOWS)) flow_hash = host_hash; else flow_hash = jhash_3words( (__force u32)keys.dst, (__force u32)keys.src ^ keys.ip_proto, (__force u32)keys.ports, q->perturbation); #else /* Linux kernel 4.2.x have skb_flow_dissect_flow_keys which takes only 2 * arguments */ #if (KERNEL_VERSION(4, 2, 0) <= LINUX_VERSION_CODE) && (KERNEL_VERSION(4,3,0) > LINUX_VERSION_CODE) skb_flow_dissect_flow_keys(skb, &keys); #else skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); #endif /* flow_hash_from_keys() sorts the addresses by value, so we have * to preserve their order in a separate data structure to treat * src and dst host addresses as independently selectable. */ host_keys = keys; host_keys.ports.ports = 0; host_keys.basic.ip_proto = 0; host_keys.keyid.keyid = 0; host_keys.tags.vlan_id = 0; host_keys.tags.flow_label = 0; if (!(flow_mode & CAKE_FLOW_SRC_IP)) { switch (host_keys.control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: host_keys.addrs.v4addrs.src = 0; break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: memset(&host_keys.addrs.v6addrs.src, 0, sizeof(host_keys.addrs.v6addrs.src)); break; }; } if (!(flow_mode & CAKE_FLOW_DST_IP)) { switch (host_keys.control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: host_keys.addrs.v4addrs.dst = 0; break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: memset(&host_keys.addrs.v6addrs.dst, 0, sizeof(host_keys.addrs.v6addrs.dst)); break; }; } host_hash = flow_hash_from_keys(&host_keys); if (!(flow_mode & CAKE_FLOW_FLOWS)) { flow_hash = host_hash; } else { flow_hash = flow_hash_from_keys(&keys); } #endif reduced_hash = reciprocal_scale(flow_hash, q->flows_cnt); /* set-associative hashing */ /* fast path if no hash collision (direct lookup succeeds) */ if (likely(q->tags[reduced_hash] == flow_hash)) { q->way_directs++; } else { u32 inner_hash = reduced_hash % CAKE_SET_WAYS; u32 outer_hash = reduced_hash - inner_hash; u32 i, j, k; /* check if any active queue in the set is reserved for * this flow. count the empty queues in the set, too */ for (i = j = 0, k = inner_hash; i < CAKE_SET_WAYS; i++, k = (k + 1) % CAKE_SET_WAYS) { if (q->tags[outer_hash + k] == flow_hash) { q->way_hits++; goto found; } else if (list_empty(&q->flows[outer_hash + k]. flowchain)) { j++; } } /* no queue is reserved for this flow */ if (j) { /* there's at least one empty queue, so find one * to reserve. */ q->way_misses++; for (i = 0; i < CAKE_SET_WAYS; i++, k = (k + 1) % CAKE_SET_WAYS) if (list_empty(&q->flows[outer_hash + k]. flowchain)) goto found; } else { /* With no empty queues default to the original * queue and accept the collision. */ q->way_collisions++; } found: /* reserve queue for future packets in same flow */ reduced_hash = outer_hash + k; q->tags[reduced_hash] = flow_hash; } return reduced_hash; }