示例#1
0
static int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
	struct hlist_node *node;
	struct sock *sk2;
	struct inet_sock *inet = inet_sk(sk);

	write_lock_bh(&udp_hash_lock);
	if (!snum) {
		int i, low, high, remaining;
		unsigned rover, best, best_size_so_far;

		inet_get_local_port_range(&low, &high);
		remaining = (high - low) + 1;

		best_size_so_far = UINT_MAX;
		best = rover = net_random() % remaining + low;

		if (!udp_lport_inuse(rover) &&
		    !inet_is_reserved_local_port(rover))
			goto gotit;

		/* 1st pass: look for empty (or shortest) hash chain */
		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
			struct hlist_head *list;
			int size = 0;

			list = &udp_hash[rover & (UDP_HTABLE_SIZE - 1)];
			if (hlist_empty(list) &&
			    !inet_is_reserved_local_port(rover))
				goto gotit;

			sk_for_each(sk2, node, list)
				if (++size >= best_size_so_far)
					goto next;
			best_size_so_far = size;
			best = rover;
		next:
			/* fold back if end of range */
			if (++rover > high)
				rover = low + ((rover - low)
				            & (UDP_HTABLE_SIZE - 1));
		}
		/* 2nd pass: find hole in shortest hash chain */
		rover = best;
		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
			if (!udp_lport_inuse(rover) &&
			    !inet_is_reserved_local_port(rover))
				goto gotit;
			rover += UDP_HTABLE_SIZE;
			if (rover > high)
				rover = low + ((rover - low)
				            & (UDP_HTABLE_SIZE - 1));
		}
		/* All ports in use! */
		goto fail;

gotit:
		snum = rover;
	} else {
示例#2
0
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
	struct net *net = ovs_dp_get_net(vport->dp);
	struct vxlan_port *vxlan_port = vxlan_vport(vport);
	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->sport;
	struct rtable *rt;
	struct flowi fl;
	__be16 src_port;
	int port_min;
	int port_max;
	__be16 df;
	int err;

	if (unlikely(!OVS_CB(skb)->tun_key)) {
		err = -EINVAL;
		goto error;
	}

	/* Route lookup */
	memset(&fl, 0, sizeof(fl));
	fl.fl4_dst = OVS_CB(skb)->tun_key->ipv4_dst;
	fl.fl4_src = OVS_CB(skb)->tun_key->ipv4_src;
	fl.fl4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
	fl.mark = skb->mark;
	fl.proto = IPPROTO_UDP;

	err = ip_route_output_key(net, &rt, &fl);
	if (err)
		goto error;

	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
		htons(IP_DF) : 0;

	skb->local_df = 1;

	inet_get_local_port_range(&port_min, &port_max);
	src_port = vxlan_src_port(port_min, port_max, skb);

	err = vxlan_xmit_skb(net, vxlan_port->vs, rt, skb,
			     fl.fl4_src, OVS_CB(skb)->tun_key->ipv4_dst,
			     OVS_CB(skb)->tun_key->ipv4_tos,
			     OVS_CB(skb)->tun_key->ipv4_ttl, df,
			     src_port, dst_port,
			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
	if (err < 0)
		ip_rt_put(rt);
error:
	return err;
}
示例#3
0
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
	struct vxlan_port *vxlan_port = vxlan_vport(vport);
	__be16 dst_port = inet_sport(vxlan_port->vs->sock->sk);
	struct rtable *rt;
	__be16 src_port;
	__be32 saddr;
	__be16 df;
	int port_min;
	int port_max;
	int err;

	if (unlikely(!OVS_CB(skb)->tun_key)) {
		err = -EINVAL;
		goto error;
	}

	/* Route lookup */
	saddr = OVS_CB(skb)->tun_key->ipv4_src;
	rt = find_route(ovs_dp_get_net(vport->dp),
			&saddr,
			OVS_CB(skb)->tun_key->ipv4_dst,
			IPPROTO_UDP,
			OVS_CB(skb)->tun_key->ipv4_tos,
			skb->mark);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		goto error;
	}

	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
		htons(IP_DF) : 0;

	skb->local_df = 1;

	inet_get_local_port_range(&port_min, &port_max);
	src_port = vxlan_src_port(port_min, port_max, skb);

	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
			     saddr, OVS_CB(skb)->tun_key->ipv4_dst,
			     OVS_CB(skb)->tun_key->ipv4_tos,
			     OVS_CB(skb)->tun_key->ipv4_ttl, df,
			     src_port, dst_port,
			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
	if (err < 0)
		ip_rt_put(rt);
error:
	return err;
}
示例#4
0
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
				     struct ovs_tunnel_info *egress_tun_info)
{
	struct net *net = ovs_dp_get_net(vport->dp);
	struct vxlan_port *vxlan_port = vxlan_vport(vport);
	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
	__be16 src_port;
	int port_min;
	int port_max;

	inet_get_local_port_range(net, &port_min, &port_max);
	src_port = udp_flow_src_port(net, skb, 0, 0, true);

	return ovs_tunnel_get_egress_info(egress_tun_info, net,
					  OVS_CB(skb)->egress_tun_info,
					  IPPROTO_UDP, skb->mark,
					  src_port, dst_port);
}
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(ctl_table *table, int write,
				 void __user *buffer,
				 size_t *lenp, loff_t *ppos)
{
	int ret;
	int range[2];
	ctl_table tmp = {
		.data = &range,
		.maxlen = sizeof(range),
		.mode = table->mode,
		.extra1 = &ip_local_port_range_min,
		.extra2 = &ip_local_port_range_max,
	};

	inet_get_local_port_range(range, range + 1);
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

	if (write && ret == 0) {
		if (range[1] < range[0])
			ret = -EINVAL;
		else
			set_local_port_range(range);
	}

	return ret;
}

static int proc_tcp_congestion_control(ctl_table *ctl, int write,
				       void __user *buffer, size_t *lenp, loff_t *ppos)
{
	char val[TCP_CA_NAME_MAX];
	ctl_table tbl = {
		.data = val,
		.maxlen = TCP_CA_NAME_MAX,
	};
	int ret;

	tcp_get_default_congestion_control(val);

	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
	if (write && ret == 0)
		ret = tcp_set_default_congestion_control(val);
	return ret;
}
示例#6
0
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(struct ctl_table *table, int write,
				 void __user *buffer,
				 size_t *lenp, loff_t *ppos)
{
	struct net *net =
		container_of(table->data, struct net, ipv4.ip_local_ports.range);
	int ret;
	int range[2];
	struct ctl_table tmp = {
		.data = &range,
		.maxlen = sizeof(range),
		.mode = table->mode,
		.extra1 = &ip_local_port_range_min,
		.extra2 = &ip_local_port_range_max,
	};

	inet_get_local_port_range(net, &range[0], &range[1]);

	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

	if (write && ret == 0) {
		if (range[1] < range[0])
			ret = -EINVAL;
		else
			set_local_port_range(net, range);
	}

	return ret;
}


static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
{
	kgid_t *data = table->data;
	struct net *net =
		container_of(table->data, struct net, ipv4.ping_group_range.range);
	unsigned int seq;
	do {
		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);

		*low = data[0];
		*high = data[1];
	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
}
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(ctl_table *table, int write,
                                 void __user *buffer,
                                 size_t *lenp, loff_t *ppos)
{
    int ret;
    int range[2];
    ctl_table tmp = {
        .data = &range,
        .maxlen = sizeof(range),
        .mode = table->mode,
        .extra1 = &ip_local_port_range_min,
        .extra2 = &ip_local_port_range_max,
    };

    inet_get_local_port_range(range, range + 1);
    ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

    if (write && ret == 0) {
        if (range[1] < range[0])
            ret = -EINVAL;
        else
            set_local_port_range(range);
    }

    return ret;
}


void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
{
    gid_t *data = table->data;
    unsigned seq;
    do {
        seq = read_seqbegin(&sysctl_local_ports.lock);

        *low = data[0];
        *high = data[1];
    } while (read_seqretry(&sysctl_local_ports.lock, seq));
}
示例#8
0
/**
 *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
 *
 *  @sk:          socket struct in question
 *  @snum:        port number to look up
 *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
 */
int udp_lib_get_port(struct sock *sk, unsigned short snum,
		       int (*saddr_comp)(const struct sock *sk1,
					 const struct sock *sk2 )    )
{
	struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
	struct hlist_node *node;
	struct hlist_head *head;
	struct sock *sk2;
	int    error = 1;
	struct net *net = sock_net(sk);

	write_lock_bh(&udp_hash_lock);

	if (!snum) {
		int i, low, high, remaining;
		unsigned rover, best, best_size_so_far;

		inet_get_local_port_range(&low, &high);
		remaining = (high - low) + 1;

		best_size_so_far = UINT_MAX;
		best = rover = net_random() % remaining + low;

		/* 1st pass: look for empty (or shortest) hash chain */
		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
			int size = 0;

			head = &udptable[udp_hashfn(net, rover)];
			if (hlist_empty(head))
				goto gotit;

			sk_for_each(sk2, node, head) {
				if (++size >= best_size_so_far)
					goto next;
			}
			best_size_so_far = size;
			best = rover;
		next:
			/* fold back if end of range */
			if (++rover > high)
				rover = low + ((rover - low)
					       & (UDP_HTABLE_SIZE - 1));


		}

		/* 2nd pass: find hole in shortest hash chain */
		rover = best;
		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
			if (! __udp_lib_lport_inuse(net, rover, udptable))
				goto gotit;
			rover += UDP_HTABLE_SIZE;
			if (rover > high)
				rover = low + ((rover - low)
					       & (UDP_HTABLE_SIZE - 1));
		}


		/* All ports in use! */
		goto fail;

gotit:
		snum = rover;
	} else {
示例#9
0
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(struct ctl_table *table, int write,
				 void __user *buffer,
				 size_t *lenp, loff_t *ppos)
{
	struct net *net =
		container_of(table->data, struct net, ipv4.ip_local_ports.range);
	int ret;
	int range[2];
	struct ctl_table tmp = {
		.data = &range,
		.maxlen = sizeof(range),
		.mode = table->mode,
		.extra1 = &ip_local_port_range_min,
		.extra2 = &ip_local_port_range_max,
	};

	inet_get_local_port_range(net, &range[0], &range[1]);

	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

	if (write && ret == 0) {
		/* Ensure that the upper limit is not smaller than the lower,
		 * and that the lower does not encroach upon the privileged
		 * port limit.
		 */
		if ((range[1] < range[0]) ||
		    (range[0] < net->ipv4.sysctl_ip_prot_sock))
			ret = -EINVAL;
		else
			set_local_port_range(net, range);
	}

	return ret;
}

/* Validate changes from /proc interface. */
static int ipv4_privileged_ports(struct ctl_table *table, int write,
				void __user *buffer, size_t *lenp, loff_t *ppos)
{
	struct net *net = container_of(table->data, struct net,
	    ipv4.sysctl_ip_prot_sock);
	int ret;
	int pports;
	int range[2];
	struct ctl_table tmp = {
		.data = &pports,
		.maxlen = sizeof(pports),
		.mode = table->mode,
		.extra1 = &ip_privileged_port_min,
		.extra2 = &ip_privileged_port_max,
	};

	pports = net->ipv4.sysctl_ip_prot_sock;

	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

	if (write && ret == 0) {
		inet_get_local_port_range(net, &range[0], &range[1]);
		/* Ensure that the local port range doesn't overlap with the
		 * privileged port range.
		 */
		if (range[0] < pports)
			ret = -EINVAL;
		else
			net->ipv4.sysctl_ip_prot_sock = pports;
	}

	return ret;
}

static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
{
	kgid_t *data = table->data;
	struct net *net =
		container_of(table->data, struct net, ipv4.ping_group_range.range);
	unsigned int seq;
	do {
		seq = read_seqbegin(&net->ipv4.ping_group_range.lock);

		*low = data[0];
		*high = data[1];
	} while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
}

/* Update system visible IP port range */
static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
{
	kgid_t *data = table->data;
	struct net *net =
		container_of(table->data, struct net, ipv4.ping_group_range.range);
	write_seqlock(&net->ipv4.ping_group_range.lock);
	data[0] = low;
	data[1] = high;
	write_sequnlock(&net->ipv4.ping_group_range.lock);
}
示例#10
0
/* Obtain a reference to a local port for the given sock,
 * if snum is zero it means select any available local port.
 */
int inet_csk_get_port(struct sock *sk, unsigned short snum)
{	
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
	struct inet_bind_hashbucket *head;
	struct inet_bind_bucket *tb;
	int ret, attempts = 5;
	
	struct net *net = sock_net(sk);
	int smallest_size = -1, smallest_rover;
	//kuid_t uid = sock_i_uid(sk);

	//local_bh_disable();
	if (!snum) {
		int remaining, rover, low, high;

again:
		inet_get_local_port_range(net,&low, &high);
		remaining = (high - low) + 1;
		smallest_rover = rover = net_random() % remaining + low;

		smallest_size = -1;
		do {
			if (inet_is_reserved_local_port(net,rover))
				goto next_nolock;
			head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)];
			//spin_lock(&head->lock);
			inet_bind_bucket_for_each(tb, &head->chain)
				if (net_eq(ib_net(tb), net) && tb->port == rover) {
					if (((tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN) 
						|| (tb->fastreuseport > 0 && sk->sk_reuseport ))  //  && uid_eq(tb->fastuid, uid)
						&& (tb->num_owners < smallest_size || smallest_size == -1)) {
						smallest_size = tb->num_owners;
						smallest_rover = rover;
						if ((atomic_read(&hashinfo->bsockets) > (high - low) + 1) 
							&& (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))) {
							snum = smallest_rover;
							goto tb_found;
						}
					}
					if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
						snum = rover;
						goto tb_found;
					}
					goto next;
				}
				
			break;
		next:
			//spin_unlock(&head->lock);
		next_nolock:
			if (++rover > high)
				rover = low;
		} while (--remaining > 0);

		/* Exhausted local port range during search?  It is not
		 * possible for us to be holding one of the bind hash
		 * locks if this test triggers, because if 'remaining'
		 * drops to zero, we broke out of the do/while loop at
		 * the top level, not from the 'break;' statement.
		 */
		ret = 1;
		if (remaining <= 0) {
			if (smallest_size != -1) {
				snum = smallest_rover;
				goto have_snum;
			}
			goto fail;
		}
		/* OK, here is the one we will use.  HEAD is
		 * non-NULL and we hold it's mutex.
		 */
		snum = rover;
	} else {
示例#11
0
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
	struct net *net = ovs_dp_get_net(parms->dp);
	struct nlattr *options = parms->options;
	struct net_device *dev;
	struct vport *vport;
	struct nlattr *a;
	int err;
	struct vxlan_config conf = {
		.no_share = true,
		.flags = VXLAN_F_COLLECT_METADATA,
	};

	if (!options) {
		err = -EINVAL;
		goto error;
	}

	a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
	if (a && nla_len(a) == sizeof(u16)) {
		conf.dst_port = htons(nla_get_u16(a));
	} else {
		/* Require destination port from userspace. */
		err = -EINVAL;
		goto error;
	}

	vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms);
	if (IS_ERR(vport))
		return vport;

	a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
	if (a) {
		err = vxlan_configure_exts(vport, a, &conf);
		if (err) {
			ovs_vport_free(vport);
			goto error;
		}
	}

	rtnl_lock();
	dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf);
	if (IS_ERR(dev)) {
		rtnl_unlock();
		ovs_vport_free(vport);
		return ERR_CAST(dev);
	}

	dev_change_flags(dev, dev->flags | IFF_UP);
	rtnl_unlock();
	return vport;
error:
	return ERR_PTR(err);
}

static struct vport *vxlan_create(const struct vport_parms *parms)
{
	struct vport *vport;

	vport = vxlan_tnl_create(parms);
	if (IS_ERR(vport))
		return vport;

	return ovs_netdev_link(vport, parms->name);
}

static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
				     struct dp_upcall_info *upcall)
{
	struct vxlan_dev *vxlan = netdev_priv(vport->dev);
	struct net *net = ovs_dp_get_net(vport->dp);
	unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
	__be16 dst_port = vxlan_dev_dst_port(vxlan, family);
	__be16 src_port;
	int port_min;
	int port_max;

	inet_get_local_port_range(net, &port_min, &port_max);
	src_port = udp_flow_src_port(net, skb, 0, 0, true);

	return ovs_tunnel_get_egress_info(upcall, net,
					  skb, IPPROTO_UDP,
					  src_port, dst_port);
}

static struct vport_ops ovs_vxlan_netdev_vport_ops = {
	.type			= OVS_VPORT_TYPE_VXLAN,
	.create			= vxlan_create,
	.destroy		= ovs_netdev_tunnel_destroy,
	.get_options		= vxlan_get_options,
	.send			= ovs_netdev_send,
	.get_egress_tun_info	= vxlan_get_egress_tun_info,
};

static int __init ovs_vxlan_tnl_init(void)
{
	return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops);
}

static void __exit ovs_vxlan_tnl_exit(void)
{
	ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops);
}