static int udp_v4_get_port(struct sock *sk, unsigned short snum) { struct hlist_node *node; struct sock *sk2; struct inet_sock *inet = inet_sk(sk); write_lock_bh(&udp_hash_lock); if (!snum) { int i, low, high, remaining; unsigned rover, best, best_size_so_far; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; best_size_so_far = UINT_MAX; best = rover = net_random() % remaining + low; if (!udp_lport_inuse(rover) && !inet_is_reserved_local_port(rover)) goto gotit; /* 1st pass: look for empty (or shortest) hash chain */ for (i = 0; i < UDP_HTABLE_SIZE; i++) { struct hlist_head *list; int size = 0; list = &udp_hash[rover & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(list) && !inet_is_reserved_local_port(rover)) goto gotit; sk_for_each(sk2, node, list) if (++size >= best_size_so_far) goto next; best_size_so_far = size; best = rover; next: /* fold back if end of range */ if (++rover > high) rover = low + ((rover - low) & (UDP_HTABLE_SIZE - 1)); } /* 2nd pass: find hole in shortest hash chain */ rover = best; for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { if (!udp_lport_inuse(rover) && !inet_is_reserved_local_port(rover)) goto gotit; rover += UDP_HTABLE_SIZE; if (rover > high) rover = low + ((rover - low) & (UDP_HTABLE_SIZE - 1)); } /* All ports in use! */ goto fail; gotit: snum = rover; } else {
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->sport; struct rtable *rt; struct flowi fl; __be16 src_port; int port_min; int port_max; __be16 df; int err; if (unlikely(!OVS_CB(skb)->tun_key)) { err = -EINVAL; goto error; } /* Route lookup */ memset(&fl, 0, sizeof(fl)); fl.fl4_dst = OVS_CB(skb)->tun_key->ipv4_dst; fl.fl4_src = OVS_CB(skb)->tun_key->ipv4_src; fl.fl4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos); fl.mark = skb->mark; fl.proto = IPPROTO_UDP; err = ip_route_output_key(net, &rt, &fl); if (err) goto error; df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; skb->local_df = 1; inet_get_local_port_range(&port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); err = vxlan_xmit_skb(net, vxlan_port->vs, rt, skb, fl.fl4_src, OVS_CB(skb)->tun_key->ipv4_dst, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); if (err < 0) ip_rt_put(rt); error: return err; }
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sport(vxlan_port->vs->sock->sk); struct rtable *rt; __be16 src_port; __be32 saddr; __be16 df; int port_min; int port_max; int err; if (unlikely(!OVS_CB(skb)->tun_key)) { err = -EINVAL; goto error; } /* Route lookup */ saddr = OVS_CB(skb)->tun_key->ipv4_src; rt = find_route(ovs_dp_get_net(vport->dp), &saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_UDP, OVS_CB(skb)->tun_key->ipv4_tos, skb->mark); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; } df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; skb->local_df = 1; inet_get_local_port_range(&port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, saddr, OVS_CB(skb)->tun_key->ipv4_dst, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); if (err < 0) ip_rt_put(rt); error: return err; }
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, struct ovs_tunnel_info *egress_tun_info) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; __be16 src_port; int port_min; int port_max; inet_get_local_port_range(net, &port_min, &port_max); src_port = udp_flow_src_port(net, skb, 0, 0, true); return ovs_tunnel_get_egress_info(egress_tun_info, net, OVS_CB(skb)->egress_tun_info, IPPROTO_UDP, skb->mark, src_port, dst_port); }
/* Validate changes from /proc interface. */ static int ipv4_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, .extra1 = &ip_local_port_range_min, .extra2 = &ip_local_port_range_max, }; inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) ret = -EINVAL; else set_local_port_range(range); } return ret; } static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; ctl_table tbl = { .data = val, .maxlen = TCP_CA_NAME_MAX, }; int ret; tcp_get_default_congestion_control(val); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_default_congestion_control(val); return ret; }
/* Validate changes from /proc interface. */ static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.ip_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, .extra1 = &ip_local_port_range_min, .extra2 = &ip_local_port_range_max, }; inet_get_local_port_range(net, &range[0], &range[1]); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) ret = -EINVAL; else set_local_port_range(net, range); } return ret; } static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); }
/* Validate changes from /proc interface. */ static int ipv4_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, .extra1 = &ip_local_port_range_min, .extra2 = &ip_local_port_range_max, }; inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) ret = -EINVAL; else set_local_port_range(range); } return ret; } void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) { gid_t *data = table->data; unsigned seq; do { seq = read_seqbegin(&sysctl_local_ports.lock); *low = data[0]; *high = data[1]; } while (read_seqretry(&sysctl_local_ports.lock, seq)); }
/** * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up * @saddr_comp: AF-dependent comparison of bound local IP addresses */ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { struct hlist_head *udptable = sk->sk_prot->h.udp_hash; struct hlist_node *node; struct hlist_head *head; struct sock *sk2; int error = 1; struct net *net = sock_net(sk); write_lock_bh(&udp_hash_lock); if (!snum) { int i, low, high, remaining; unsigned rover, best, best_size_so_far; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; best_size_so_far = UINT_MAX; best = rover = net_random() % remaining + low; /* 1st pass: look for empty (or shortest) hash chain */ for (i = 0; i < UDP_HTABLE_SIZE; i++) { int size = 0; head = &udptable[udp_hashfn(net, rover)]; if (hlist_empty(head)) goto gotit; sk_for_each(sk2, node, head) { if (++size >= best_size_so_far) goto next; } best_size_so_far = size; best = rover; next: /* fold back if end of range */ if (++rover > high) rover = low + ((rover - low) & (UDP_HTABLE_SIZE - 1)); } /* 2nd pass: find hole in shortest hash chain */ rover = best; for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { if (! __udp_lib_lport_inuse(net, rover, udptable)) goto gotit; rover += UDP_HTABLE_SIZE; if (rover > high) rover = low + ((rover - low) & (UDP_HTABLE_SIZE - 1)); } /* All ports in use! */ goto fail; gotit: snum = rover; } else {
/* Validate changes from /proc interface. */ static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.ip_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, .extra1 = &ip_local_port_range_min, .extra2 = &ip_local_port_range_max, }; inet_get_local_port_range(net, &range[0], &range[1]); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { /* Ensure that the upper limit is not smaller than the lower, * and that the lower does not encroach upon the privileged * port limit. */ if ((range[1] < range[0]) || (range[0] < net->ipv4.sysctl_ip_prot_sock)) ret = -EINVAL; else set_local_port_range(net, range); } return ret; } /* Validate changes from /proc interface. */ static int ipv4_privileged_ports(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_ip_prot_sock); int ret; int pports; int range[2]; struct ctl_table tmp = { .data = &pports, .maxlen = sizeof(pports), .mode = table->mode, .extra1 = &ip_privileged_port_min, .extra2 = &ip_privileged_port_max, }; pports = net->ipv4.sysctl_ip_prot_sock; ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { inet_get_local_port_range(net, &range[0], &range[1]); /* Ensure that the local port range doesn't overlap with the * privileged port range. */ if (range[0] < pports) ret = -EINVAL; else net->ipv4.sysctl_ip_prot_sock = pports; } return ret; } static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } /* Update system visible IP port range */ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) { kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); write_seqlock(&net->ipv4.ping_group_range.lock); data[0] = low; data[1] = high; write_sequnlock(&net->ipv4.ping_group_range.lock); }
/* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */ int inet_csk_get_port(struct sock *sk, unsigned short snum) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_bind_hashbucket *head; struct inet_bind_bucket *tb; int ret, attempts = 5; struct net *net = sock_net(sk); int smallest_size = -1, smallest_rover; //kuid_t uid = sock_i_uid(sk); //local_bh_disable(); if (!snum) { int remaining, rover, low, high; again: inet_get_local_port_range(net,&low, &high); remaining = (high - low) + 1; smallest_rover = rover = net_random() % remaining + low; smallest_size = -1; do { if (inet_is_reserved_local_port(net,rover)) goto next_nolock; head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; //spin_lock(&head->lock); inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == rover) { if (((tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN) || (tb->fastreuseport > 0 && sk->sk_reuseport )) // && uid_eq(tb->fastuid, uid) && (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; if ((atomic_read(&hashinfo->bsockets) > (high - low) + 1) && (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))) { snum = smallest_rover; goto tb_found; } } if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { snum = rover; goto tb_found; } goto next; } break; next: //spin_unlock(&head->lock); next_nolock: if (++rover > high) rover = low; } while (--remaining > 0); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash * locks if this test triggers, because if 'remaining' * drops to zero, we broke out of the do/while loop at * the top level, not from the 'break;' statement. */ ret = 1; if (remaining <= 0) { if (smallest_size != -1) { snum = smallest_rover; goto have_snum; } goto fail; } /* OK, here is the one we will use. HEAD is * non-NULL and we hold it's mutex. */ snum = rover; } else {
static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options = parms->options; struct net_device *dev; struct vport *vport; struct nlattr *a; int err; struct vxlan_config conf = { .no_share = true, .flags = VXLAN_F_COLLECT_METADATA, }; if (!options) { err = -EINVAL; goto error; } a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); if (a && nla_len(a) == sizeof(u16)) { conf.dst_port = htons(nla_get_u16(a)); } else { /* Require destination port from userspace. */ err = -EINVAL; goto error; } vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms); if (IS_ERR(vport)) return vport; a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); if (a) { err = vxlan_configure_exts(vport, a, &conf); if (err) { ovs_vport_free(vport); goto error; } } rtnl_lock(); dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf); if (IS_ERR(dev)) { rtnl_unlock(); ovs_vport_free(vport); return ERR_CAST(dev); } dev_change_flags(dev, dev->flags | IFF_UP); rtnl_unlock(); return vport; error: return ERR_PTR(err); } static struct vport *vxlan_create(const struct vport_parms *parms) { struct vport *vport; vport = vxlan_tnl_create(parms); if (IS_ERR(vport)) return vport; return ovs_netdev_link(vport, parms->name); } static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, struct dp_upcall_info *upcall) { struct vxlan_dev *vxlan = netdev_priv(vport->dev); struct net *net = ovs_dp_get_net(vport->dp); unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info); __be16 dst_port = vxlan_dev_dst_port(vxlan, family); __be16 src_port; int port_min; int port_max; inet_get_local_port_range(net, &port_min, &port_max); src_port = udp_flow_src_port(net, skb, 0, 0, true); return ovs_tunnel_get_egress_info(upcall, net, skb, IPPROTO_UDP, src_port, dst_port); } static struct vport_ops ovs_vxlan_netdev_vport_ops = { .type = OVS_VPORT_TYPE_VXLAN, .create = vxlan_create, .destroy = ovs_netdev_tunnel_destroy, .get_options = vxlan_get_options, .send = ovs_netdev_send, .get_egress_tun_info = vxlan_get_egress_tun_info, }; static int __init ovs_vxlan_tnl_init(void) { return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops); } static void __exit ovs_vxlan_tnl_exit(void) { ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops); }