Exemple #1
0
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
{
	int uninitialized_var(err);
	struct net *net = sock_net(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct in6_flowlabel_req freq;
	struct ipv6_fl_socklist *sfl1 = NULL;
	struct ipv6_fl_socklist *sfl;
	struct ipv6_fl_socklist __rcu **sflp;
	struct ip6_flowlabel *fl, *fl1 = NULL;


	if (optlen < sizeof(freq))
		return -EINVAL;

	if (copy_from_user(&freq, optval, sizeof(freq)))
		return -EFAULT;

	switch (freq.flr_action) {
	case IPV6_FL_A_PUT:
		if (freq.flr_flags & IPV6_FL_F_REFLECT) {
			if (sk->sk_protocol != IPPROTO_TCP)
				return -ENOPROTOOPT;
			if (!np->repflow)
				return -ESRCH;
			np->flow_label = 0;
			np->repflow = 0;
			return 0;
		}
		spin_lock_bh(&ip6_sk_fl_lock);
		for (sflp = &np->ipv6_fl_list;
		     (sfl = rcu_dereference_protected(*sflp,
						      lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
		     sflp = &sfl->next) {
			if (sfl->fl->label == freq.flr_label) {
				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
				*sflp = sfl->next;
				spin_unlock_bh(&ip6_sk_fl_lock);
				fl_release(sfl->fl);
				kfree_rcu(sfl, rcu);
				return 0;
			}
		}
		spin_unlock_bh(&ip6_sk_fl_lock);
		return -ESRCH;

	case IPV6_FL_A_RENEW:
		rcu_read_lock_bh();
		for_each_sk_fl_rcu(np, sfl) {
			if (sfl->fl->label == freq.flr_label) {
				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
				rcu_read_unlock_bh();
				return err;
			}
		}
		rcu_read_unlock_bh();

		if (freq.flr_share == IPV6_FL_S_NONE &&
		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
			fl = fl_lookup(net, freq.flr_label);
			if (fl) {
				err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
				fl_release(fl);
				return err;
			}
		}
		return -ESRCH;

	case IPV6_FL_A_GET:
		if (freq.flr_flags & IPV6_FL_F_REFLECT) {
			struct net *net = sock_net(sk);
			if (net->ipv6.sysctl.flowlabel_consistency) {
				net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
				return -EPERM;
			}

			if (sk->sk_protocol != IPPROTO_TCP)
				return -ENOPROTOOPT;

			np->repflow = 1;
			return 0;
		}

		if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
			return -EINVAL;

		if (net->ipv6.sysctl.flowlabel_state_ranges &&
		    (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
			return -ERANGE;

		fl = fl_create(net, sk, &freq, optval, optlen, &err);
		if (!fl)
			return err;
		sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);

		if (freq.flr_label) {
			err = -EEXIST;
			rcu_read_lock_bh();
			for_each_sk_fl_rcu(np, sfl) {
				if (sfl->fl->label == freq.flr_label) {
					if (freq.flr_flags&IPV6_FL_F_EXCL) {
						rcu_read_unlock_bh();
						goto done;
					}
					fl1 = sfl->fl;
					atomic_inc(&fl1->users);
					break;
				}
			}
			rcu_read_unlock_bh();

			if (!fl1)
				fl1 = fl_lookup(net, freq.flr_label);
			if (fl1) {
recheck:
				err = -EEXIST;
				if (freq.flr_flags&IPV6_FL_F_EXCL)
					goto release;
				err = -EPERM;
				if (fl1->share == IPV6_FL_S_EXCL ||
				    fl1->share != fl->share ||
				    ((fl1->share == IPV6_FL_S_PROCESS) &&
				     (fl1->owner.pid == fl->owner.pid)) ||
				    ((fl1->share == IPV6_FL_S_USER) &&
				     uid_eq(fl1->owner.uid, fl->owner.uid)))
					goto release;

				err = -ENOMEM;
				if (!sfl1)
					goto release;
				if (fl->linger > fl1->linger)
					fl1->linger = fl->linger;
				if ((long)(fl->expires - fl1->expires) > 0)
					fl1->expires = fl->expires;
				fl_link(np, sfl1, fl1);
				fl_free(fl);
				return 0;

release:
				fl_release(fl1);
				goto done;
			}
		}
		err = -ENOENT;
		if (!(freq.flr_flags&IPV6_FL_F_CREATE))
			goto done;

		err = -ENOMEM;
		if (!sfl1)
			goto done;

		err = mem_check(sk);
		if (err != 0)
			goto done;

		fl1 = fl_intern(net, fl, freq.flr_label);
		if (fl1)
			goto recheck;

		if (!freq.flr_label) {
			if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
					 &fl->label, sizeof(fl->label))) {
				/* Intentionally ignore fault. */
			}
		}

		fl_link(np, sfl1, fl);
		return 0;

	default:
		return -EINVAL;
	}
int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
{
	spinlock_t *lock = rht_bucket_lock(tbl, hash);

	return (debug_locks) ? lockdep_is_held(lock) : 1;
}
Exemple #3
0
/* This is analogous to rtnl_dereference for the tunnel cache.  It checks that
 * cache_lock is held, so it is only for update side code.
 */
static inline struct tnl_cache *cache_dereference(struct tnl_vport *tnl_vport)
{
	return rcu_dereference_protected(tnl_vport->cache,
					 lockdep_is_held(&tnl_vport->cache_lock));
}
static int rps_sock_flow_sysctl(ctl_table *table, int write,
				void __user *buffer, size_t *lenp, loff_t *ppos)
{
	unsigned int orig_size, size;
	int ret, i;
	ctl_table tmp = {
		.data = &size,
		.maxlen = sizeof(size),
		.mode = table->mode
	};
	struct rps_sock_flow_table *orig_sock_table, *sock_table;
	static DEFINE_MUTEX(sock_flow_mutex);

	mutex_lock(&sock_flow_mutex);

	orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
					lockdep_is_held(&sock_flow_mutex));
	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;

	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);

	if (write) {
		if (size) {
			if (size > 1<<30) {
				/* Enforce limit to prevent overflow */
				mutex_unlock(&sock_flow_mutex);
				return -EINVAL;
			}
			size = roundup_pow_of_two(size);
			if (size != orig_size) {
				sock_table =
				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
				if (!sock_table) {
					mutex_unlock(&sock_flow_mutex);
					return -ENOMEM;
				}

				sock_table->mask = size - 1;
			} else
				sock_table = orig_sock_table;

			for (i = 0; i < size; i++)
				sock_table->ents[i] = RPS_NO_CPU;
		} else
			sock_table = NULL;

		if (sock_table != orig_sock_table) {
			rcu_assign_pointer(rps_sock_flow_table, sock_table);
			if (sock_table)
				static_key_slow_inc(&rps_needed);
			if (orig_sock_table) {
				static_key_slow_dec(&rps_needed);
				synchronize_rcu();
				vfree(orig_sock_table);
			}
		}
	}

	mutex_unlock(&sock_flow_mutex);

	return ret;
}
#endif /* CONFIG_RPS */

static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
	{
		.procname	= "wmem_max",
		.data		= &sysctl_wmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
	{
		.procname	= "rmem_max",
int lockdep_genl_is_held(void)
{
	return lockdep_is_held(&genl_mutex);
}
Exemple #6
0
int lockdep_nfnl_is_held(u8 subsys_id)
{
	return lockdep_is_held(&table[subsys_id].mutex);
}
Exemple #7
0
/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_rx(struct vhost_net *net)
{
	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
	unsigned uninitialized_var(in), log;
	struct vhost_log *vq_log;
	struct msghdr msg = {
		.msg_name = NULL,
		.msg_namelen = 0,
		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
		.msg_controllen = 0,
		.msg_iov = vq->iov,
		.msg_flags = MSG_DONTWAIT,
	};
	struct virtio_net_hdr_mrg_rxbuf hdr = {
		.hdr.flags = 0,
		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
	};
	size_t total_len = 0;
	int err, mergeable;
	s16 headcount;
	size_t vhost_hlen, sock_hlen;
	size_t vhost_len, sock_len;
	/* TODO: check that we are running from vhost_worker? */
	struct socket *sock = rcu_dereference_check(vq->private_data, 1);

	if (!sock)
		return;

	mutex_lock(&vq->mutex);
	vhost_disable_notify(&net->dev, vq);
	vhost_hlen = vq->vhost_hlen;
	sock_hlen = vq->sock_hlen;

	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
		vq->log : NULL;
	mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);

	while ((sock_len = peek_head_len(sock->sk))) {
		sock_len += sock_hlen;
		vhost_len = sock_len + vhost_hlen;
		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
					&in, vq_log, &log,
					likely(mergeable) ? UIO_MAXIOV : 1);
		/* On error, stop handling until the next kick. */
		if (unlikely(headcount < 0))
			break;
		/* OK, now we need to know about added descriptors. */
		if (!headcount) {
			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
				/* They have slipped one in as we were
				 * doing that: check again. */
				vhost_disable_notify(&net->dev, vq);
				continue;
			}
			/* Nothing new?  Wait for eventfd to tell us
			 * they refilled. */
			break;
		}
		/* We don't need to be notified again. */
		if (unlikely((vhost_hlen)))
			/* Skip header. TODO: support TSO. */
			move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
		else
			/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
			 * needed because recvmsg can modify msg_iov. */
			copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in);
		msg.msg_iovlen = in;
		err = sock->ops->recvmsg(NULL, sock, &msg,
					 sock_len, MSG_DONTWAIT | MSG_TRUNC);
		/* Userspace might have consumed the packet meanwhile:
		 * it's not supposed to do this usually, but might be hard
		 * to prevent. Discard data we got (if any) and keep going. */
		if (unlikely(err != sock_len)) {
			pr_debug("Discarded rx packet: "
				 " len %d, expected %zd\n", err, sock_len);
			vhost_discard_vq_desc(vq, headcount);
			continue;
		}
		if (unlikely(vhost_hlen) &&
		    memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0,
				      vhost_hlen)) {
			vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
			       vq->iov->iov_base);
			break;
		}
		/* TODO: Should check and handle checksum. */
		if (likely(mergeable) &&
		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
				      offsetof(typeof(hdr), num_buffers),
				      sizeof hdr.num_buffers)) {
			vq_err(vq, "Failed num_buffers write");
			vhost_discard_vq_desc(vq, headcount);
			break;
		}
		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
					    headcount);
		if (unlikely(vq_log))
			vhost_log_write(vq, vq_log, log, vhost_len);
		total_len += vhost_len;
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
			break;
		}
	}

	mutex_unlock(&vq->mutex);
}

static void handle_tx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
						  poll.work);
	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);

	handle_tx(net);
}

static void handle_rx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
						  poll.work);
	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);

	handle_rx(net);
}

static void handle_tx_net(struct vhost_work *work)
{
	struct vhost_net *net = container_of(work, struct vhost_net,
					     poll[VHOST_NET_VQ_TX].work);
	handle_tx(net);
}

static void handle_rx_net(struct vhost_work *work)
{
	struct vhost_net *net = container_of(work, struct vhost_net,
					     poll[VHOST_NET_VQ_RX].work);
	handle_rx(net);
}

static int vhost_net_open(struct inode *inode, struct file *f)
{
	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
	struct vhost_dev *dev;
	int r;

	if (!n)
		return -ENOMEM;

	dev = &n->dev;
	n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
	r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
	if (r < 0) {
		kfree(n);
		return r;
	}

	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
	n->tx_poll_state = VHOST_NET_POLL_DISABLED;

	f->private_data = n;

	return 0;
}

static void vhost_net_disable_vq(struct vhost_net *n,
				 struct vhost_virtqueue *vq)
{
	if (!vq->private_data)
		return;
	if (vq == n->vqs + VHOST_NET_VQ_TX) {
		tx_poll_stop(n);
		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
	} else
		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
}

static void vhost_net_enable_vq(struct vhost_net *n,
				struct vhost_virtqueue *vq)
{
	struct socket *sock;

	sock = rcu_dereference_protected(vq->private_data,
					 lockdep_is_held(&vq->mutex));
	if (!sock)
		return;
	if (vq == n->vqs + VHOST_NET_VQ_TX) {
		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
		tx_poll_start(n, sock);
	} else
		vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
}

static struct socket *vhost_net_stop_vq(struct vhost_net *n,
					struct vhost_virtqueue *vq)
{
	struct socket *sock;

	mutex_lock(&vq->mutex);
	sock = rcu_dereference_protected(vq->private_data,
					 lockdep_is_held(&vq->mutex));
	vhost_net_disable_vq(n, vq);
	rcu_assign_pointer(vq->private_data, NULL);
	mutex_unlock(&vq->mutex);
	return sock;
}

static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
			   struct socket **rx_sock)
{
	*tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
	*rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
}

static void vhost_net_flush_vq(struct vhost_net *n, int index)
{
	vhost_poll_flush(n->poll + index);
	vhost_poll_flush(&n->dev.vqs[index].poll);
}
int dns_query(const char *type, const char *name, size_t namelen,
	      const char *options, char **_result, time_t *_expiry)
{
	struct key *rkey;
	struct user_key_payload *upayload;
	const struct cred *saved_cred;
	size_t typelen, desclen;
	char *desc, *cp;
	int ret, len;

	kenter("%s,%*.*s,%zu,%s",
	       type, (int)namelen, (int)namelen, name, namelen, options);

	if (!name || namelen == 0 || !_result)
		return -EINVAL;

	/*                                                          */
	typelen = 0;
	desclen = 0;
	if (type) {
		typelen = strlen(type);
		if (typelen < 1)
			return -EINVAL;
		desclen += typelen + 1;
	}

	if (!namelen)
		namelen = strlen(name);
	if (namelen < 3)
		return -EINVAL;
	desclen += namelen + 1;

	desc = kmalloc(desclen, GFP_KERNEL);
	if (!desc)
		return -ENOMEM;

	cp = desc;
	if (type) {
		memcpy(cp, type, typelen);
		cp += typelen;
		*cp++ = ':';
	}
	memcpy(cp, name, namelen);
	cp += namelen;
	*cp = '\0';

	if (!options)
		options = "";
	kdebug("call request_key(,%s,%s)", desc, options);

	/*                                                                 
                                                  
  */
	saved_cred = override_creds(dns_resolver_cache);
	rkey = request_key(&key_type_dns_resolver, desc, options);
	revert_creds(saved_cred);
	kfree(desc);
	if (IS_ERR(rkey)) {
		ret = PTR_ERR(rkey);
		goto out;
	}

	down_read(&rkey->sem);
	rkey->perm |= KEY_USR_VIEW;

	ret = key_validate(rkey);
	if (ret < 0)
		goto put;

	/*                                                            */
	ret = rkey->type_data.x[0];
	if (ret)
		goto put;

	upayload = rcu_dereference_protected(rkey->payload.data,
					     lockdep_is_held(&rkey->sem));
	len = upayload->datalen;

	ret = -ENOMEM;
	*_result = kmalloc(len + 1, GFP_KERNEL);
	if (!*_result)
		goto put;

	memcpy(*_result, upayload->data, len + 1);
	if (_expiry)
		*_expiry = rkey->expiry;

	ret = len;
put:
	up_read(&rkey->sem);
	key_put(rkey);
out:
	kleave(" = %d", ret);
	return ret;
}
Exemple #9
0
/**
 *
 * nx842_OF_upd -- Handle OF properties updates for the device.
 *
 * Set all properties from the OF tree.  Optionally, a new property
 * can be provided by the @new_prop pointer to overwrite an existing value.
 * The device will remain disabled until all values are valid, this function
 * will return an error for updates unless all values are valid.
 *
 * @new_prop: If not NULL, this property is being updated.  If NULL, update
 *  all properties from the current values in the OF tree.
 *
 * Returns:
 *  0 - Success
 *  -ENOMEM - Could not allocate memory for new devdata structure
 *  -EINVAL - property value not found, new_prop is not a recognized
 *	property for the device or property value is not valid.
 *  -ENODEV - Device is not available
 */
static int nx842_OF_upd(struct property *new_prop)
{
	struct nx842_devdata *old_devdata = NULL;
	struct nx842_devdata *new_devdata = NULL;
	struct device_node *of_node = NULL;
	struct property *status = NULL;
	struct property *maxsglen = NULL;
	struct property *maxsyncop = NULL;
	int ret = 0;
	unsigned long flags;

	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
	if (!new_devdata)
		return -ENOMEM;

	spin_lock_irqsave(&devdata_mutex, flags);
	old_devdata = rcu_dereference_check(devdata,
			lockdep_is_held(&devdata_mutex));
	if (old_devdata)
		of_node = old_devdata->dev->of_node;

	if (!old_devdata || !of_node) {
		pr_err("%s: device is not available\n", __func__);
		spin_unlock_irqrestore(&devdata_mutex, flags);
		kfree(new_devdata);
		return -ENODEV;
	}

	memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
	new_devdata->counters = old_devdata->counters;

	/* Set ptrs for existing properties */
	status = of_find_property(of_node, "status", NULL);
	maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
	maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
	if (!status || !maxsglen || !maxsyncop) {
		dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
		ret = -EINVAL;
		goto error_out;
	}

	/*
	 * If this is a property update, there are only certain properties that
	 * we care about. Bail if it isn't in the below list
	 */
	if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
		         strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
		         strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
		goto out;

	/* Perform property updates */
	ret = nx842_OF_upd_status(status);
	if (ret)
		goto error_out;

	ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
	if (ret)
		goto error_out;

	ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
	if (ret)
		goto error_out;

out:
	dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
			__func__, new_devdata->max_sync_size,
			old_devdata->max_sync_size);
	dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
			__func__, new_devdata->max_sync_sg,
			old_devdata->max_sync_sg);
	dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
			__func__, new_devdata->max_sg_len,
			old_devdata->max_sg_len);

	rcu_assign_pointer(devdata, new_devdata);
	spin_unlock_irqrestore(&devdata_mutex, flags);
	synchronize_rcu();
	dev_set_drvdata(new_devdata->dev, new_devdata);
	kfree(old_devdata);
	return 0;

error_out:
	if (new_devdata) {
		dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
		nx842_OF_set_defaults(new_devdata);
		rcu_assign_pointer(devdata, new_devdata);
		spin_unlock_irqrestore(&devdata_mutex, flags);
		synchronize_rcu();
		dev_set_drvdata(new_devdata->dev, new_devdata);
		kfree(old_devdata);
	} else {
		dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
		spin_unlock_irqrestore(&devdata_mutex, flags);
	}

	if (!ret)
		ret = -EINVAL;
	return ret;
}
Exemple #10
0
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
				u_int16_t subsys_id)
{
	struct sk_buff *nskb, *oskb = skb;
	struct net *net = sock_net(skb->sk);
	const struct nfnetlink_subsystem *ss;
	const struct nfnl_callback *nc;
	bool success = true, done = false;
	int err;

	if (subsys_id >= NFNL_SUBSYS_COUNT)
		return netlink_ack(skb, nlh, -EINVAL);
replay:
	nskb = netlink_skb_clone(oskb, GFP_KERNEL);
	if (!nskb)
		return netlink_ack(oskb, nlh, -ENOMEM);

	nskb->sk = oskb->sk;
	skb = nskb;

	nfnl_lock(subsys_id);
	ss = rcu_dereference_protected(table[subsys_id].subsys,
				       lockdep_is_held(&table[subsys_id].mutex));
	if (!ss) {
#ifdef CONFIG_MODULES
		nfnl_unlock(subsys_id);
		request_module("nfnetlink-subsys-%d", subsys_id);
		nfnl_lock(subsys_id);
		ss = rcu_dereference_protected(table[subsys_id].subsys,
					       lockdep_is_held(&table[subsys_id].mutex));
		if (!ss)
#endif
		{
			nfnl_unlock(subsys_id);
			netlink_ack(skb, nlh, -EOPNOTSUPP);
			return kfree_skb(nskb);
		}
	}

	if (!ss->commit || !ss->abort) {
		nfnl_unlock(subsys_id);
		netlink_ack(skb, nlh, -EOPNOTSUPP);
		return kfree_skb(skb);
	}

	while (skb->len >= nlmsg_total_size(0)) {
		int msglen, type;

		nlh = nlmsg_hdr(skb);
		err = 0;

		if (nlh->nlmsg_len < NLMSG_HDRLEN) {
			err = -EINVAL;
			goto ack;
		}

		/* Only requests are handled by the kernel */
		if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
			err = -EINVAL;
			goto ack;
		}

		type = nlh->nlmsg_type;
		if (type == NFNL_MSG_BATCH_BEGIN) {
			/* Malformed: Batch begin twice */
			success = false;
			goto done;
		} else if (type == NFNL_MSG_BATCH_END) {
			done = true;
			goto done;
		} else if (type < NLMSG_MIN_TYPE) {
			err = -EINVAL;
			goto ack;
		}

		/* We only accept a batch with messages for the same
		 * subsystem.
		 */
		if (NFNL_SUBSYS_ID(type) != subsys_id) {
			err = -EINVAL;
			goto ack;
		}

		nc = nfnetlink_find_client(type, ss);
		if (!nc) {
			err = -EINVAL;
			goto ack;
		}

		{
			int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
			u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
			struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
			struct nlattr *attr = (void *)nlh + min_len;
			int attrlen = nlh->nlmsg_len - min_len;

			err = nla_parse(cda, ss->cb[cb_id].attr_count,
					attr, attrlen, ss->cb[cb_id].policy);
			if (err < 0)
				goto ack;

			if (nc->call_batch) {
				err = nc->call_batch(net->nfnl, skb, nlh,
						     (const struct nlattr **)cda);
			}

			/* The lock was released to autoload some module, we
			 * have to abort and start from scratch using the
			 * original skb.
			 */
			if (err == -EAGAIN) {
				ss->abort(skb);
				nfnl_unlock(subsys_id);
				kfree_skb(nskb);
				goto replay;
			}
		}
ack:
		if (nlh->nlmsg_flags & NLM_F_ACK || err) {
			/* We don't stop processing the batch on errors, thus,
			 * userspace gets all the errors that the batch
			 * triggers.
			 */
			netlink_ack(skb, nlh, err);
			if (err)
				success = false;
		}

		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
		if (msglen > skb->len)
			msglen = skb->len;
		skb_pull(skb, msglen);
	}
done:
	if (success && done)
		ss->commit(skb);
	else
		ss->abort(skb);

	nfnl_unlock(subsys_id);
	kfree_skb(nskb);
}
Exemple #11
0
/*
 * connection-level Rx packet processor
 */
static int rxrpc_process_event(struct rxrpc_connection *conn,
			       struct sk_buff *skb,
			       u32 *_abort_code)
{
	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
	__be32 wtmp;
	u32 abort_code;
	int loop, ret;

	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
		_leave(" = -ECONNABORTED [%u]", conn->state);
		return -ECONNABORTED;
	}

	_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);

	switch (sp->hdr.type) {
	case RXRPC_PACKET_TYPE_DATA:
	case RXRPC_PACKET_TYPE_ACK:
		rxrpc_conn_retransmit_call(conn, skb,
					   sp->hdr.cid & RXRPC_CHANNELMASK);
		return 0;

	case RXRPC_PACKET_TYPE_BUSY:
		/* Just ignore BUSY packets for now. */
		return 0;

	case RXRPC_PACKET_TYPE_ABORT:
		if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
				  &wtmp, sizeof(wtmp)) < 0) {
			trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
					      tracepoint_string("bad_abort"));
			return -EPROTO;
		}
		abort_code = ntohl(wtmp);
		_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);

		conn->error = -ECONNABORTED;
		conn->abort_code = abort_code;
		conn->state = RXRPC_CONN_REMOTELY_ABORTED;
		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED);
		return -ECONNABORTED;

	case RXRPC_PACKET_TYPE_CHALLENGE:
		return conn->security->respond_to_challenge(conn, skb,
							    _abort_code);

	case RXRPC_PACKET_TYPE_RESPONSE:
		ret = conn->security->verify_response(conn, skb, _abort_code);
		if (ret < 0)
			return ret;

		ret = conn->security->init_connection_security(conn);
		if (ret < 0)
			return ret;

		ret = conn->security->prime_packet_security(conn);
		if (ret < 0)
			return ret;

		spin_lock(&conn->channel_lock);
		spin_lock(&conn->state_lock);

		if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
			conn->state = RXRPC_CONN_SERVICE;
			spin_unlock(&conn->state_lock);
			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
				rxrpc_call_is_secure(
					rcu_dereference_protected(
						conn->channels[loop].call,
						lockdep_is_held(&conn->channel_lock)));
		} else {
			spin_unlock(&conn->state_lock);
		}

		spin_unlock(&conn->channel_lock);
		return 0;

	default:
		trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
				      tracepoint_string("bad_conn_pkt"));
		return -EPROTO;
	}
}
Exemple #12
0
/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_rx_big(struct vhost_net *net)
{
	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
	unsigned out, in, log, s;
	int head;
	struct vhost_log *vq_log;
	struct msghdr msg = {
		.msg_name = NULL,
		.msg_namelen = 0,
		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
		.msg_controllen = 0,
		.msg_iov = vq->iov,
		.msg_flags = MSG_DONTWAIT,
	};

	struct virtio_net_hdr hdr = {
		.flags = 0,
		.gso_type = VIRTIO_NET_HDR_GSO_NONE
	};

	size_t len, total_len = 0;
	int err;
	size_t hdr_size;
	struct socket *sock = rcu_dereference(vq->private_data);
	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
		return;

	use_mm(net->dev.mm);
	mutex_lock(&vq->mutex);
	vhost_disable_notify(vq);
	hdr_size = vq->vhost_hlen;

	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
		vq->log : NULL;

	for (;;) {
		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
					 ARRAY_SIZE(vq->iov),
					 &out, &in,
					 vq_log, &log);
		/* On error, stop handling until the next kick. */
		if (unlikely(head < 0))
			break;
		/* OK, now we need to know about added descriptors. */
		if (head == vq->num) {
			if (unlikely(vhost_enable_notify(vq))) {
				/* They have slipped one in as we were
				 * doing that: check again. */
				vhost_disable_notify(vq);
				continue;
			}
			/* Nothing new?  Wait for eventfd to tell us
			 * they refilled. */
			break;
		}
		/* We don't need to be notified again. */
		if (out) {
			vq_err(vq, "Unexpected descriptor format for RX: "
			       "out %d, int %d\n",
			       out, in);
			break;
		}
		/* Skip header. TODO: support TSO/mergeable rx buffers. */
		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
		msg.msg_iovlen = in;
		len = iov_length(vq->iov, in);
		/* Sanity check */
		if (!len) {
			vq_err(vq, "Unexpected header len for RX: "
			       "%zd expected %zd\n",
			       iov_length(vq->hdr, s), hdr_size);
			break;
		}
		err = sock->ops->recvmsg(NULL, sock, &msg,
					 len, MSG_DONTWAIT | MSG_TRUNC);
		/* TODO: Check specific error and bomb out unless EAGAIN? */
		if (err < 0) {
			vhost_discard_vq_desc(vq, 1);
			break;
		}
		/* TODO: Should check and handle checksum. */
		if (err > len) {
			pr_debug("Discarded truncated rx packet: "
				 " len %d > %zd\n", err, len);
			vhost_discard_vq_desc(vq, 1);
			continue;
		}
		len = err;
		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
		if (err) {
			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
			       vq->iov->iov_base, err);
			break;
		}
		len += hdr_size;
		vhost_add_used_and_signal(&net->dev, vq, head, len);
		if (unlikely(vq_log))
			vhost_log_write(vq, vq_log, log, len);
		total_len += len;
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
			break;
		}
	}

	mutex_unlock(&vq->mutex);
	unuse_mm(net->dev.mm);
}

/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_rx_mergeable(struct vhost_net *net)
{
	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
	unsigned uninitialized_var(in), log;
	struct vhost_log *vq_log;
	struct msghdr msg = {
		.msg_name = NULL,
		.msg_namelen = 0,
		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
		.msg_controllen = 0,
		.msg_iov = vq->iov,
		.msg_flags = MSG_DONTWAIT,
	};

	struct virtio_net_hdr_mrg_rxbuf hdr = {
		.hdr.flags = 0,
		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
	};

	size_t total_len = 0;
	int err, headcount;
	size_t vhost_hlen, sock_hlen;
	size_t vhost_len, sock_len;
	struct socket *sock = rcu_dereference(vq->private_data);
	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
		return;

	use_mm(net->dev.mm);
	mutex_lock(&vq->mutex);
	vhost_disable_notify(vq);
	vhost_hlen = vq->vhost_hlen;
	sock_hlen = vq->sock_hlen;

	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
		vq->log : NULL;

	while ((sock_len = peek_head_len(sock->sk))) {
		sock_len += sock_hlen;
		vhost_len = sock_len + vhost_hlen;
		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
					&in, vq_log, &log);
		/* On error, stop handling until the next kick. */
		if (unlikely(headcount < 0))
			break;
		/* OK, now we need to know about added descriptors. */
		if (!headcount) {
			if (unlikely(vhost_enable_notify(vq))) {
				/* They have slipped one in as we were
				 * doing that: check again. */
				vhost_disable_notify(vq);
				continue;
			}
			/* Nothing new?  Wait for eventfd to tell us
			 * they refilled. */
			break;
		}
		/* We don't need to be notified again. */
		if (unlikely((vhost_hlen)))
			/* Skip header. TODO: support TSO. */
			move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
		else
			/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
			 * needed because sendmsg can modify msg_iov. */
			copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in);
		msg.msg_iovlen = in;
		err = sock->ops->recvmsg(NULL, sock, &msg,
					 sock_len, MSG_DONTWAIT | MSG_TRUNC);
		/* Userspace might have consumed the packet meanwhile:
		 * it's not supposed to do this usually, but might be hard
		 * to prevent. Discard data we got (if any) and keep going. */
		if (unlikely(err != sock_len)) {
			pr_debug("Discarded rx packet: "
				 " len %d, expected %zd\n", err, sock_len);
			vhost_discard_vq_desc(vq, headcount);
			continue;
		}
		if (unlikely(vhost_hlen) &&
		    memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0,
				      vhost_hlen)) {
			vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
			       vq->iov->iov_base);
			break;
		}
		/* TODO: Should check and handle checksum. */
		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
				      offsetof(typeof(hdr), num_buffers),
				      sizeof hdr.num_buffers)) {
			vq_err(vq, "Failed num_buffers write");
			vhost_discard_vq_desc(vq, headcount);
			break;
		}
		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
					    headcount);
		if (unlikely(vq_log))
			vhost_log_write(vq, vq_log, log, vhost_len);
		total_len += vhost_len;
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
			break;
		}
	}

	mutex_unlock(&vq->mutex);
	unuse_mm(net->dev.mm);
}

static void handle_rx(struct vhost_net *net)
{
	if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
		handle_rx_mergeable(net);
	else
		handle_rx_big(net);
}

static void handle_tx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
						  poll.work);
	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);

	handle_tx(net);
}

static void handle_rx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
						  poll.work);
	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);

	handle_rx(net);
}

static void handle_tx_net(struct vhost_work *work)
{
	struct vhost_net *net = container_of(work, struct vhost_net,
					     poll[VHOST_NET_VQ_TX].work);
	handle_tx(net);
}

static void handle_rx_net(struct vhost_work *work)
{
	struct vhost_net *net = container_of(work, struct vhost_net,
					     poll[VHOST_NET_VQ_RX].work);
	handle_rx(net);
}

static int vhost_net_open(struct inode *inode, struct file *f)
{
	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
	struct vhost_dev *dev;
	int r;

	if (!n)
		return -ENOMEM;

	dev = &n->dev;
	n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
	r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
	if (r < 0) {
		kfree(n);
		return r;
	}

	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
	n->tx_poll_state = VHOST_NET_POLL_DISABLED;

	f->private_data = n;

	return 0;
}

static void vhost_net_disable_vq(struct vhost_net *n,
				 struct vhost_virtqueue *vq)
{
	if (!vq->private_data)
		return;
	if (vq == n->vqs + VHOST_NET_VQ_TX) {
		tx_poll_stop(n);
		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
	} else
		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
}

static void vhost_net_enable_vq(struct vhost_net *n,
				struct vhost_virtqueue *vq)
{
	struct socket *sock;

	sock = rcu_dereference_protected(vq->private_data,
					 lockdep_is_held(&vq->mutex));
	if (!sock)
		return;
	if (vq == n->vqs + VHOST_NET_VQ_TX) {
		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
		tx_poll_start(n, sock);
	} else
		vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
}

static struct socket *vhost_net_stop_vq(struct vhost_net *n,
					struct vhost_virtqueue *vq)
{
	struct socket *sock;

	mutex_lock(&vq->mutex);
	sock = rcu_dereference_protected(vq->private_data,
					 lockdep_is_held(&vq->mutex));
	vhost_net_disable_vq(n, vq);
	rcu_assign_pointer(vq->private_data, NULL);
	mutex_unlock(&vq->mutex);
	return sock;
}

static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
			   struct socket **rx_sock)
{
	*tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
	*rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
}

static void vhost_net_flush_vq(struct vhost_net *n, int index)
{
	vhost_poll_flush(n->poll + index);
	vhost_poll_flush(&n->dev.vqs[index].poll);
}

static void vhost_net_flush(struct vhost_net *n)
{
	vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
	vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
}

static int vhost_net_release(struct inode *inode, struct file *f)
{
	struct vhost_net *n = f->private_data;
	struct socket *tx_sock;
	struct socket *rx_sock;

	vhost_net_stop(n, &tx_sock, &rx_sock);
	vhost_net_flush(n);
	vhost_dev_cleanup(&n->dev);
	if (tx_sock)
		fput(tx_sock->file);
	if (rx_sock)
		fput(rx_sock->file);
	/* We do an extra flush before freeing memory,
	 * since jobs can re-queue themselves. */
	vhost_net_flush(n);
	kfree(n);
	return 0;
}

static struct socket *get_raw_socket(int fd)
{
	struct {
		struct sockaddr_ll sa;
		char  buf[MAX_ADDR_LEN];
	} uaddr;
	int uaddr_len = sizeof uaddr, r;
	struct socket *sock = sockfd_lookup(fd, &r);
	if (!sock)
		return ERR_PTR(-ENOTSOCK);

	/* Parameter checking */
	if (sock->sk->sk_type != SOCK_RAW) {
		r = -ESOCKTNOSUPPORT;
		goto err;
	}

	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
			       &uaddr_len, 0);
	if (r)
		goto err;

	if (uaddr.sa.sll_family != AF_PACKET) {
		r = -EPFNOSUPPORT;
		goto err;
	}
	return sock;
err:
	fput(sock->file);
	return ERR_PTR(r);
}

static struct socket *get_tap_socket(int fd)
{
	struct file *file = fget(fd);
	struct socket *sock;
	if (!file)
		return ERR_PTR(-EBADF);
	sock = tun_get_socket(file);
	if (!IS_ERR(sock))
		return sock;
	sock = macvtap_get_socket(file);
	if (IS_ERR(sock))
		fput(file);
	return sock;
}

static struct socket *get_socket(int fd)
{
	struct socket *sock;
	/* special case to disable backend */
	if (fd == -1)
		return NULL;
	sock = get_raw_socket(fd);
	if (!IS_ERR(sock))
		return sock;
	sock = get_tap_socket(fd);
	if (!IS_ERR(sock))
		return sock;
	return ERR_PTR(-ENOTSOCK);
}

static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
{
	struct socket *sock, *oldsock;
	struct vhost_virtqueue *vq;
	int r;

	mutex_lock(&n->dev.mutex);
	r = vhost_dev_check_owner(&n->dev);
	if (r)
		goto err;

	if (index >= VHOST_NET_VQ_MAX) {
		r = -ENOBUFS;
		goto err;
	}
	vq = n->vqs + index;
	mutex_lock(&vq->mutex);

	/* Verify that ring has been setup correctly. */
	if (!vhost_vq_access_ok(vq)) {
		r = -EFAULT;
		goto err_vq;
	}
	sock = get_socket(fd);
	if (IS_ERR(sock)) {
		r = PTR_ERR(sock);
		goto err_vq;
	}

	/* start polling new socket */
	oldsock = rcu_dereference_protected(vq->private_data,
					    lockdep_is_held(&vq->mutex));
	if (sock != oldsock) {
                vhost_net_disable_vq(n, vq);
                rcu_assign_pointer(vq->private_data, sock);
                vhost_net_enable_vq(n, vq);
	}

	mutex_unlock(&vq->mutex);

	if (oldsock) {
		vhost_net_flush_vq(n, index);
		fput(oldsock->file);
	}

	mutex_unlock(&n->dev.mutex);
	return 0;

err_vq:
	mutex_unlock(&vq->mutex);
err:
	mutex_unlock(&n->dev.mutex);
	return r;
}

static long vhost_net_reset_owner(struct vhost_net *n)
{
	struct socket *tx_sock = NULL;
	struct socket *rx_sock = NULL;
	long err;
	mutex_lock(&n->dev.mutex);
	err = vhost_dev_check_owner(&n->dev);
	if (err)
		goto done;
	vhost_net_stop(n, &tx_sock, &rx_sock);
	vhost_net_flush(n);
	err = vhost_dev_reset_owner(&n->dev);
done:
	mutex_unlock(&n->dev.mutex);
	if (tx_sock)
		fput(tx_sock->file);
	if (rx_sock)
		fput(rx_sock->file);
	return err;
}

static int vhost_net_set_features(struct vhost_net *n, u64 features)
{
	size_t vhost_hlen, sock_hlen, hdr_len;
	int i;

	hdr_len = (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ?
			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
			sizeof(struct virtio_net_hdr);
	if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
		/* vhost provides vnet_hdr */
		vhost_hlen = hdr_len;
		sock_hlen = 0;
	} else {
		/* socket provides vnet_hdr */
		vhost_hlen = 0;
		sock_hlen = hdr_len;
	}
	mutex_lock(&n->dev.mutex);
	if ((features & (1 << VHOST_F_LOG_ALL)) &&
	    !vhost_log_access_ok(&n->dev)) {
		mutex_unlock(&n->dev.mutex);
		return -EFAULT;
	}
	n->dev.acked_features = features;
	smp_wmb();
	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
		mutex_lock(&n->vqs[i].mutex);
		n->vqs[i].vhost_hlen = vhost_hlen;
		n->vqs[i].sock_hlen = sock_hlen;
		mutex_unlock(&n->vqs[i].mutex);
	}
	vhost_net_flush(n);
	mutex_unlock(&n->dev.mutex);
	return 0;
}
Exemple #13
0
/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net)
{
	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
	unsigned out, in, s;
	int head;
	struct msghdr msg = {
		.msg_name = NULL,
		.msg_namelen = 0,
		.msg_control = NULL,
		.msg_controllen = 0,
		.msg_iov = vq->iov,
		.msg_flags = MSG_DONTWAIT,
	};
	size_t len, total_len = 0;
	int err, wmem;
	size_t hdr_size;
	struct socket *sock;

	sock = rcu_dereference_check(vq->private_data,
				     lockdep_is_held(&vq->mutex));
	if (!sock)
		return;

	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
	if (wmem >= sock->sk->sk_sndbuf) {
		mutex_lock(&vq->mutex);
		tx_poll_start(net, sock);
		mutex_unlock(&vq->mutex);
		return;
	}

	use_mm(net->dev.mm);
	mutex_lock(&vq->mutex);
	vhost_disable_notify(vq);

	if (wmem < sock->sk->sk_sndbuf / 2)
		tx_poll_stop(net);
	hdr_size = vq->vhost_hlen;

	for (;;) {
		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
					 ARRAY_SIZE(vq->iov),
					 &out, &in,
					 NULL, NULL);
		/* On error, stop handling until the next kick. */
		if (unlikely(head < 0))
			break;
		/* Nothing new?  Wait for eventfd to tell us they refilled. */
		if (head == vq->num) {
			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
				tx_poll_start(net, sock);
				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
				break;
			}
			if (unlikely(vhost_enable_notify(vq))) {
				vhost_disable_notify(vq);
				continue;
			}
			break;
		}
		if (in) {
			vq_err(vq, "Unexpected descriptor format for TX: "
			       "out %d, int %d\n", out, in);
			break;
		}
		/* Skip header. TODO: support TSO. */
		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
		msg.msg_iovlen = out;
		len = iov_length(vq->iov, out);
		/* Sanity check */
		if (!len) {
			vq_err(vq, "Unexpected header len for TX: "
			       "%zd expected %zd\n",
			       iov_length(vq->hdr, s), hdr_size);
			break;
		}
		/* TODO: Check specific error and bomb out unless ENOBUFS? */
		err = sock->ops->sendmsg(NULL, sock, &msg, len);
		if (unlikely(err < 0)) {
			vhost_discard_vq_desc(vq, 1);
			tx_poll_start(net, sock);
			break;
		}
		if (err != len)
			pr_debug("Truncated TX packet: "
				 " len %d != %zd\n", err, len);
		vhost_add_used_and_signal(&net->dev, vq, head, 0);
		total_len += len;
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
			break;
		}
	}

	mutex_unlock(&vq->mutex);
	unuse_mm(net->dev.mm);
}

static int peek_head_len(struct sock *sk)
{
	struct sk_buff *head;
	int len = 0;

	lock_sock(sk);
	head = skb_peek(&sk->sk_receive_queue);
	if (head)
		len = head->len;
	release_sock(sk);
	return len;
}

/* This is a multi-buffer version of vhost_get_desc, that works if
 *	vq has read descriptors only.
 * @vq		- the relevant virtqueue
 * @datalen	- data length we'll be reading
 * @iovcount	- returned count of io vectors we fill
 * @log		- vhost log
 * @log_num	- log offset
 *	returns number of buffer heads allocated, negative on error
 */
static int get_rx_bufs(struct vhost_virtqueue *vq,
		       struct vring_used_elem *heads,
		       int datalen,
		       unsigned *iovcount,
		       struct vhost_log *log,
		       unsigned *log_num)
{
	unsigned int out, in;
	int seg = 0;
	int headcount = 0;
	unsigned d;
	int r, nlogs = 0;

	while (datalen > 0) {
		if (unlikely(seg >= VHOST_NET_MAX_SG)) {
			r = -ENOBUFS;
			goto err;
		}
		d = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg,
				      ARRAY_SIZE(vq->iov) - seg, &out,
				      &in, log, log_num);
		if (d == vq->num) {
			r = 0;
			goto err;
		}
		if (unlikely(out || in <= 0)) {
			vq_err(vq, "unexpected descriptor format for RX: "
				"out %d, in %d\n", out, in);
			r = -EINVAL;
			goto err;
		}
		if (unlikely(log)) {
			nlogs += *log_num;
			log += *log_num;
		}
		heads[headcount].id = d;
		heads[headcount].len = iov_length(vq->iov + seg, in);
		datalen -= heads[headcount].len;
		++headcount;
		seg += in;
	}
	heads[headcount - 1].len += datalen;
	*iovcount = seg;
	if (unlikely(log))
		*log_num = nlogs;
	return headcount;
err:
	vhost_discard_vq_desc(vq, headcount);
	return r;
}
Exemple #14
0
/* Called from syscall or from eBPF program */
static int trie_update_elem(struct bpf_map *map,
			    void *_key, void *value, u64 flags)
{
	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
	struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
	struct lpm_trie_node __rcu **slot;
	struct bpf_lpm_trie_key *key = _key;
	unsigned long irq_flags;
	unsigned int next_bit;
	size_t matchlen = 0;
	int ret = 0;

	if (unlikely(flags > BPF_EXIST))
		return -EINVAL;

	if (key->prefixlen > trie->max_prefixlen)
		return -EINVAL;

	raw_spin_lock_irqsave(&trie->lock, irq_flags);

	/* Allocate and fill a new node */

	if (trie->n_entries == trie->map.max_entries) {
		ret = -ENOSPC;
		goto out;
	}

	new_node = lpm_trie_node_alloc(trie, value);
	if (!new_node) {
		ret = -ENOMEM;
		goto out;
	}

	trie->n_entries++;

	new_node->prefixlen = key->prefixlen;
	RCU_INIT_POINTER(new_node->child[0], NULL);
	RCU_INIT_POINTER(new_node->child[1], NULL);
	memcpy(new_node->data, key->data, trie->data_size);

	/* Now find a slot to attach the new node. To do that, walk the tree
	 * from the root and match as many bits as possible for each node until
	 * we either find an empty slot or a slot that needs to be replaced by
	 * an intermediate node.
	 */
	slot = &trie->root;

	while ((node = rcu_dereference_protected(*slot,
					lockdep_is_held(&trie->lock)))) {
		matchlen = longest_prefix_match(trie, node, key);

		if (node->prefixlen != matchlen ||
		    node->prefixlen == key->prefixlen ||
		    node->prefixlen == trie->max_prefixlen)
			break;

		next_bit = extract_bit(key->data, node->prefixlen);
		slot = &node->child[next_bit];
	}

	/* If the slot is empty (a free child pointer or an empty root),
	 * simply assign the @new_node to that slot and be done.
	 */
	if (!node) {
		rcu_assign_pointer(*slot, new_node);
		goto out;
	}

	/* If the slot we picked already exists, replace it with @new_node
	 * which already has the correct data array set.
	 */
	if (node->prefixlen == matchlen) {
		new_node->child[0] = node->child[0];
		new_node->child[1] = node->child[1];

		if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
			trie->n_entries--;

		rcu_assign_pointer(*slot, new_node);
		kfree_rcu(node, rcu);

		goto out;
	}

	/* If the new node matches the prefix completely, it must be inserted
	 * as an ancestor. Simply insert it between @node and *@slot.
	 */
	if (matchlen == key->prefixlen) {
		next_bit = extract_bit(node->data, matchlen);
		rcu_assign_pointer(new_node->child[next_bit], node);
		rcu_assign_pointer(*slot, new_node);
		goto out;
	}

	im_node = lpm_trie_node_alloc(trie, NULL);
	if (!im_node) {
		ret = -ENOMEM;
		goto out;
	}

	im_node->prefixlen = matchlen;
	im_node->flags |= LPM_TREE_NODE_FLAG_IM;
	memcpy(im_node->data, node->data, trie->data_size);

	/* Now determine which child to install in which slot */
	if (extract_bit(key->data, matchlen)) {
		rcu_assign_pointer(im_node->child[0], node);
		rcu_assign_pointer(im_node->child[1], new_node);
	} else {
		rcu_assign_pointer(im_node->child[0], new_node);
		rcu_assign_pointer(im_node->child[1], node);
	}

	/* Finally, assign the intermediate node to the determined spot */
	rcu_assign_pointer(*slot, im_node);

out:
	if (ret) {
		if (new_node)
			trie->n_entries--;

		kfree(new_node);
		kfree(im_node);
	}

	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);

	return ret;
}
Exemple #15
0
	spin_unlock_bh(&ct->lock);

out_unlock:
	rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);

int nf_conntrack_register_notifier(struct net *net,
				   struct nf_ct_event_notifier *new)
{
	int ret;
	struct nf_ct_event_notifier *notify;

	mutex_lock(&nf_ct_ecache_mutex);
	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
					   lockdep_is_held(&nf_ct_ecache_mutex));
	if (notify != NULL) {
		ret = -EBUSY;
		goto out_unlock;
	}
	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
	ret = 0;

out_unlock:
	mutex_unlock(&nf_ct_ecache_mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);

void nf_conntrack_unregister_notifier(struct net *net,
				      struct nf_ct_event_notifier *new)
Exemple #16
0
static int nx842_probe(struct vio_dev *viodev,
		       const struct vio_device_id *id)
{
	struct nx842_devdata *old_devdata, *new_devdata = NULL;
	unsigned long flags;
	int ret = 0;

	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
	if (!new_devdata)
		return -ENOMEM;

	new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
			GFP_NOFS);
	if (!new_devdata->counters) {
		kfree(new_devdata);
		return -ENOMEM;
	}

	spin_lock_irqsave(&devdata_mutex, flags);
	old_devdata = rcu_dereference_check(devdata,
			lockdep_is_held(&devdata_mutex));

	if (old_devdata && old_devdata->vdev != NULL) {
		dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
		ret = -1;
		goto error_unlock;
	}

	dev_set_drvdata(&viodev->dev, NULL);

	new_devdata->vdev = viodev;
	new_devdata->dev = &viodev->dev;
	nx842_OF_set_defaults(new_devdata);

	rcu_assign_pointer(devdata, new_devdata);
	spin_unlock_irqrestore(&devdata_mutex, flags);
	synchronize_rcu();
	kfree(old_devdata);

	of_reconfig_notifier_register(&nx842_of_nb);

	ret = nx842_OF_upd(NULL);
	if (ret)
		goto error;

	ret = crypto_register_alg(&nx842_pseries_alg);
	if (ret) {
		dev_err(&viodev->dev, "could not register comp alg: %d\n", ret);
		goto error;
	}

	rcu_read_lock();
	dev_set_drvdata(&viodev->dev, rcu_dereference(devdata));
	rcu_read_unlock();

	if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
		dev_err(&viodev->dev, "could not create sysfs device attributes\n");
		ret = -1;
		goto error;
	}

	return 0;

error_unlock:
	spin_unlock_irqrestore(&devdata_mutex, flags);
	if (new_devdata)
		kfree(new_devdata->counters);
	kfree(new_devdata);
error:
	return ret;
}
Exemple #17
0
/* Process one complete nfnetlink message. */
static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	const struct nfnl_callback *nc;
	const struct nfnetlink_subsystem *ss;
	int type, err;

	/* All the messages must at least contain nfgenmsg */
	if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
		return 0;

	type = nlh->nlmsg_type;
replay:
	rcu_read_lock();
	ss = nfnetlink_get_subsys(type);
	if (!ss) {
#ifdef CONFIG_MODULES
		rcu_read_unlock();
		request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
		rcu_read_lock();
		ss = nfnetlink_get_subsys(type);
		if (!ss)
#endif
		{
			rcu_read_unlock();
			return -EINVAL;
		}
	}

	nc = nfnetlink_find_client(type, ss);
	if (!nc) {
		rcu_read_unlock();
		return -EINVAL;
	}

	{
		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
		u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
		struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
		struct nlattr *attr = (void *)nlh + min_len;
		int attrlen = nlh->nlmsg_len - min_len;
		__u8 subsys_id = NFNL_SUBSYS_ID(type);

		err = nla_parse(cda, ss->cb[cb_id].attr_count,
				attr, attrlen, ss->cb[cb_id].policy);
		if (err < 0) {
			rcu_read_unlock();
			return err;
		}

		if (nc->call_rcu) {
			err = nc->call_rcu(net->nfnl, skb, nlh,
					   (const struct nlattr **)cda);
			rcu_read_unlock();
		} else {
			rcu_read_unlock();
			nfnl_lock(subsys_id);
			if (rcu_dereference_protected(table[subsys_id].subsys,
				lockdep_is_held(&table[subsys_id].mutex)) != ss ||
			    nfnetlink_find_client(type, ss) != nc)
				err = -EAGAIN;
			else if (nc->call)
				err = nc->call(net->nfnl, skb, nlh,
						   (const struct nlattr **)cda);
			else
				err = -EINVAL;
			nfnl_unlock(subsys_id);
		}
		if (err == -EAGAIN)
			goto replay;
		return err;
	}
}
Exemple #18
0
struct nfp_reprs *
nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type)
{
	return rcu_dereference_protected(app->reprs[type],
					 lockdep_is_held(&app->pf->lock));
}
Exemple #19
0
static ssize_t iwl_dbgfs_mac_params_read(struct file *file,
					 char __user *user_buf,
					 size_t count, loff_t *ppos)
{
	struct ieee80211_vif *vif = file->private_data;
	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
	struct iwl_mvm *mvm = mvmvif->mvm;
	u8 ap_sta_id;
	struct ieee80211_chanctx_conf *chanctx_conf;
	char buf[512];
	int bufsz = sizeof(buf);
	int pos = 0;
	int i;

	mutex_lock(&mvm->mutex);

	ap_sta_id = mvmvif->ap_sta_id;

	switch (ieee80211_vif_type_p2p(vif)) {
	case NL80211_IFTYPE_ADHOC:
		pos += scnprintf(buf+pos, bufsz-pos, "type: ibss\n");
		break;
	case NL80211_IFTYPE_STATION:
		pos += scnprintf(buf+pos, bufsz-pos, "type: bss\n");
		break;
	case NL80211_IFTYPE_AP:
		pos += scnprintf(buf+pos, bufsz-pos, "type: ap\n");
		break;
	case NL80211_IFTYPE_P2P_CLIENT:
		pos += scnprintf(buf+pos, bufsz-pos, "type: p2p client\n");
		break;
	case NL80211_IFTYPE_P2P_GO:
		pos += scnprintf(buf+pos, bufsz-pos, "type: p2p go\n");
		break;
	case NL80211_IFTYPE_P2P_DEVICE:
		pos += scnprintf(buf+pos, bufsz-pos, "type: p2p dev\n");
		break;
	default:
		break;
	}

	pos += scnprintf(buf+pos, bufsz-pos, "mac id/color: %d / %d\n",
			 mvmvif->id, mvmvif->color);
	pos += scnprintf(buf+pos, bufsz-pos, "bssid: %pM\n",
			 vif->bss_conf.bssid);
	pos += scnprintf(buf+pos, bufsz-pos, "QoS:\n");
	for (i = 0; i < ARRAY_SIZE(mvmvif->queue_params); i++)
		pos += scnprintf(buf+pos, bufsz-pos,
				 "\t%d: txop:%d - cw_min:%d - cw_max = %d - aifs = %d upasd = %d\n",
				 i, mvmvif->queue_params[i].txop,
				 mvmvif->queue_params[i].cw_min,
				 mvmvif->queue_params[i].cw_max,
				 mvmvif->queue_params[i].aifs,
				 mvmvif->queue_params[i].uapsd);

	if (vif->type == NL80211_IFTYPE_STATION &&
	    ap_sta_id != IWL_MVM_STATION_COUNT) {
		struct ieee80211_sta *sta;

		sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[ap_sta_id],
						lockdep_is_held(&mvm->mutex));
		if (!IS_ERR_OR_NULL(sta)) {
			struct iwl_mvm_sta *mvm_sta = (void *)sta->drv_priv;

			pos += scnprintf(buf+pos, bufsz-pos,
					 "ap_sta_id %d - reduced Tx power %d force %d\n",
					 ap_sta_id,
					 mvm_sta->bt_reduced_txpower,
					 mvm_sta->bt_reduced_txpower_dbg);
		}
	}

	rcu_read_lock();
	chanctx_conf = rcu_dereference(vif->chanctx_conf);
	if (chanctx_conf)
		pos += scnprintf(buf+pos, bufsz-pos,
				 "idle rx chains %d, active rx chains: %d\n",
				 chanctx_conf->rx_chains_static,
				 chanctx_conf->rx_chains_dynamic);
	rcu_read_unlock();

	mutex_unlock(&mvm->mutex);

	return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
}
Exemple #20
0
struct net_device *
nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, unsigned int id)
{
	return rcu_dereference_protected(set->reprs[id],
					 lockdep_is_held(&app->pf->lock));
}
Exemple #21
0
static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
{
	struct socket *sock, *oldsock;
	struct vhost_virtqueue *vq;
	struct vhost_ubuf_ref *ubufs, *oldubufs = NULL;
	int r;

	mutex_lock(&n->dev.mutex);
	r = vhost_dev_check_owner(&n->dev);
	if (r)
		goto err;

	if (index >= VHOST_NET_VQ_MAX) {
		r = -ENOBUFS;
		goto err;
	}
	vq = n->vqs + index;
	mutex_lock(&vq->mutex);

	/* Verify that ring has been setup correctly. */
	if (!vhost_vq_access_ok(vq)) {
		r = -EFAULT;
		goto err_vq;
	}
	sock = get_socket(fd);
	if (IS_ERR(sock)) {
		r = PTR_ERR(sock);
		goto err_vq;
	}

	/* start polling new socket */
	oldsock = rcu_dereference_protected(vq->private_data,
					    lockdep_is_held(&vq->mutex));
	if (sock != oldsock) {
		ubufs = vhost_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock));
		if (IS_ERR(ubufs)) {
			r = PTR_ERR(ubufs);
			goto err_ubufs;
		}
		oldubufs = vq->ubufs;
		vq->ubufs = ubufs;
		vhost_net_disable_vq(n, vq);
		rcu_assign_pointer(vq->private_data, sock);
		vhost_net_enable_vq(n, vq);

		r = vhost_init_used(vq);
		if (r)
			goto err_vq;
	}

	mutex_unlock(&vq->mutex);

	if (oldubufs) {
		vhost_ubuf_put_and_wait(oldubufs);
		mutex_lock(&vq->mutex);
		vhost_zerocopy_signal_used(vq);
		mutex_unlock(&vq->mutex);
	}

	if (oldsock) {
		vhost_net_flush_vq(n, index);
		fput(oldsock->file);
	}

	mutex_unlock(&n->dev.mutex);
	return 0;

err_ubufs:
	fput(sock->file);
err_vq:
	mutex_unlock(&vq->mutex);
err:
	mutex_unlock(&n->dev.mutex);
	return r;
}
static int tunnel_key_init(struct net *net, struct nlattr *nla,
			   struct nlattr *est, struct tc_action **a,
			   int ovr, int bind, bool rtnl_held,
			   struct netlink_ext_ack *extack)
{
	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
	struct tcf_tunnel_key_params *params_new;
	struct metadata_dst *metadata = NULL;
	struct tc_tunnel_key *parm;
	struct tcf_tunnel_key *t;
	bool exists = false;
	__be16 dst_port = 0;
	__be64 key_id = 0;
	int opts_len = 0;
	__be16 flags = 0;
	u8 tos, ttl;
	int ret = 0;
	int err;

	if (!nla) {
		NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed");
		return -EINVAL;
	}

	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
			       extack);
	if (err < 0) {
		NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes");
		return err;
	}

	if (!tb[TCA_TUNNEL_KEY_PARMS]) {
		NL_SET_ERR_MSG(extack, "Missing tunnel key parameters");
		return -EINVAL;
	}

	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
	if (err < 0)
		return err;
	exists = err;
	if (exists && bind)
		return 0;

	switch (parm->t_action) {
	case TCA_TUNNEL_KEY_ACT_RELEASE:
		break;
	case TCA_TUNNEL_KEY_ACT_SET:
		if (tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
			__be32 key32;

			key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]);
			key_id = key32_to_tunnel_id(key32);
			flags = TUNNEL_KEY;
		}

		flags |= TUNNEL_CSUM;
		if (tb[TCA_TUNNEL_KEY_NO_CSUM] &&
		    nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM]))
			flags &= ~TUNNEL_CSUM;

		if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
			dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);

		if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) {
			opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS],
							   extack);
			if (opts_len < 0) {
				ret = opts_len;
				goto err_out;
			}
		}

		tos = 0;
		if (tb[TCA_TUNNEL_KEY_ENC_TOS])
			tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
		ttl = 0;
		if (tb[TCA_TUNNEL_KEY_ENC_TTL])
			ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);

		if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
		    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
			__be32 saddr;
			__be32 daddr;

			saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
			daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);

			metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
						    dst_port, flags,
						    key_id, opts_len);
		} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
			   tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
			struct in6_addr saddr;
			struct in6_addr daddr;

			saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
			daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);

			metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
						      0, flags,
						      key_id, 0);
		} else {
			NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
			ret = -EINVAL;
			goto err_out;
		}

		if (!metadata) {
			NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst");
			ret = -ENOMEM;
			goto err_out;
		}

		if (opts_len) {
			ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
						  &metadata->u.tun_info,
						  opts_len, extack);
			if (ret < 0)
				goto release_tun_meta;
		}

		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
		break;
	default:
		NL_SET_ERR_MSG(extack, "Unknown tunnel key action");
		ret = -EINVAL;
		goto err_out;
	}

	if (!exists) {
		ret = tcf_idr_create(tn, parm->index, est, a,
				     &act_tunnel_key_ops, bind, true);
		if (ret) {
			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
			goto release_tun_meta;
		}

		ret = ACT_P_CREATED;
	} else if (!ovr) {
		NL_SET_ERR_MSG(extack, "TC IDR already exists");
		ret = -EEXIST;
		goto release_tun_meta;
	}

	t = to_tunnel_key(*a);

	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
	if (unlikely(!params_new)) {
		NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
		ret = -ENOMEM;
		exists = true;
		goto release_tun_meta;
	}
	params_new->tcft_action = parm->t_action;
	params_new->tcft_enc_metadata = metadata;

	spin_lock_bh(&t->tcf_lock);
	t->tcf_action = parm->action;
	rcu_swap_protected(t->params, params_new,
			   lockdep_is_held(&t->tcf_lock));
	spin_unlock_bh(&t->tcf_lock);
	if (params_new)
		kfree_rcu(params_new, rcu);

	if (ret == ACT_P_CREATED)
		tcf_idr_insert(tn, *a);

	return ret;

release_tun_meta:
	dst_release(&metadata->dst);

err_out:
	if (exists)
		tcf_idr_release(*a, bind);
	else
		tcf_idr_cleanup(tn, parm->index);
	return ret;
}
Exemple #23
0
/**
 * dns_query - Query the DNS
 * @type: Query type (or NULL for straight host->IP lookup)
 * @name: Name to look up
 * @namelen: Length of name
 * @options: Request options (or NULL if no options)
 * @_result: Where to place the returned data.
 * @_expiry: Where to store the result expiry time (or NULL)
 *
 * The data will be returned in the pointer at *result, and the caller is
 * responsible for freeing it.
 *
 * The description should be of the form "[<query_type>:]<domain_name>", and
 * the options need to be appropriate for the query type requested.  If no
 * query_type is given, then the query is a straight hostname to IP address
 * lookup.
 *
 * The DNS resolution lookup is performed by upcalling to userspace by way of
 * requesting a key of type dns_resolver.
 *
 * Returns the size of the result on success, -ve error code otherwise.
 */
int dns_query(const char *type, const char *name, size_t namelen,
	      const char *options, char **_result, time_t *_expiry)
{
	struct key *rkey;
	struct user_key_payload *upayload;
	const struct cred *saved_cred;
	size_t typelen, desclen;
	char *desc, *cp;
	int ret, len;

	kenter("%s,%*.*s,%zu,%s",
	       type, (int)namelen, (int)namelen, name, namelen, options);

	if (!name || namelen == 0 || !_result)
		return -EINVAL;

	/* construct the query key description as "[<type>:]<name>" */
	typelen = 0;
	desclen = 0;
	if (type) {
		typelen = strlen(type);
		if (typelen < 1)
			return -EINVAL;
		desclen += typelen + 1;
	}

	if (!namelen)
		namelen = strlen(name);
	if (namelen < 3)
		return -EINVAL;
	desclen += namelen + 1;

	desc = kmalloc(desclen, GFP_KERNEL);
	if (!desc)
		return -ENOMEM;

	cp = desc;
	if (type) {
		memcpy(cp, type, typelen);
		cp += typelen;
		*cp++ = ':';
	}
	memcpy(cp, name, namelen);
	cp += namelen;
	*cp = '\0';

	if (!options)
		options = "";
	kdebug("call request_key(,%s,%s)", desc, options);

	/* make the upcall, using special credentials to prevent the use of
	 * add_key() to preinstall malicious redirections
	 */
	saved_cred = override_creds(dns_resolver_cache);
	rkey = request_key(&key_type_dns_resolver, desc, options);
	revert_creds(saved_cred);
	kfree(desc);
	if (IS_ERR(rkey)) {
		ret = PTR_ERR(rkey);
		goto out;
	}

	down_read(&rkey->sem);
	set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
	rkey->perm |= KEY_USR_VIEW;

	ret = key_validate(rkey);
	if (ret < 0)
		goto put;

	/* If the DNS server gave an error, return that to the caller */
	ret = rkey->type_data.x[0];
	if (ret)
		goto put;

	upayload = rcu_dereference_protected(rkey->payload.data,
					     lockdep_is_held(&rkey->sem));
	len = upayload->datalen;

	ret = -ENOMEM;
	*_result = kmalloc(len + 1, GFP_KERNEL);
	if (!*_result)
		goto put;

	memcpy(*_result, upayload->data, len + 1);
	if (_expiry)
		*_expiry = rkey->expiry;

	ret = len;
put:
	up_read(&rkey->sem);
	key_put(rkey);
out:
	kleave(" = %d", ret);
	return ret;
}
static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
			   int bind, int ref)
{
	unsigned char *b = skb_tail_pointer(skb);
	struct tcf_tunnel_key *t = to_tunnel_key(a);
	struct tcf_tunnel_key_params *params;
	struct tc_tunnel_key opt = {
		.index    = t->tcf_index,
		.refcnt   = refcount_read(&t->tcf_refcnt) - ref,
		.bindcnt  = atomic_read(&t->tcf_bindcnt) - bind,
	};
	struct tcf_t tm;

	spin_lock_bh(&t->tcf_lock);
	params = rcu_dereference_protected(t->params,
					   lockdep_is_held(&t->tcf_lock));
	opt.action   = t->tcf_action;
	opt.t_action = params->tcft_action;

	if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
		goto nla_put_failure;

	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
		struct ip_tunnel_info *info =
			&params->tcft_enc_metadata->u.tun_info;
		struct ip_tunnel_key *key = &info->key;
		__be32 key_id = tunnel_id_to_key32(key->tun_id);

		if (((key->tun_flags & TUNNEL_KEY) &&
		     nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) ||
		    tunnel_key_dump_addresses(skb,
					      &params->tcft_enc_metadata->u.tun_info) ||
		    (key->tp_dst &&
		      nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT,
				   key->tp_dst)) ||
		    nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
			       !(key->tun_flags & TUNNEL_CSUM)) ||
		    tunnel_key_opts_dump(skb, info))
			goto nla_put_failure;

		if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
			goto nla_put_failure;

		if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
			goto nla_put_failure;
	}

	tcf_tm_dump(&tm, &t->tcf_tm);
	if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
			  &tm, TCA_TUNNEL_KEY_PAD))
		goto nla_put_failure;
	spin_unlock_bh(&t->tcf_lock);

	return skb->len;

nla_put_failure:
	spin_unlock_bh(&t->tcf_lock);
	nlmsg_trim(skb, b);
	return -1;
}

static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
			     struct netlink_callback *cb, int type,
			     const struct tc_action_ops *ops,
			     struct netlink_ext_ack *extack)
{
	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);

	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}

static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
{
	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);

	return tcf_idr_search(tn, a, index);
}

static struct tc_action_ops act_tunnel_key_ops = {
	.kind		=	"tunnel_key",
	.type		=	TCA_ACT_TUNNEL_KEY,
	.owner		=	THIS_MODULE,
	.act		=	tunnel_key_act,
	.dump		=	tunnel_key_dump,
	.init		=	tunnel_key_init,
	.cleanup	=	tunnel_key_release,
	.walk		=	tunnel_key_walker,
	.lookup		=	tunnel_key_search,
	.size		=	sizeof(struct tcf_tunnel_key),
};

static __net_init int tunnel_key_init_net(struct net *net)
{
	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);

	return tc_action_net_init(tn, &act_tunnel_key_ops);
}

static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
{
	tc_action_net_exit(net_list, tunnel_key_net_id);
}

static struct pernet_operations tunnel_key_net_ops = {
	.init = tunnel_key_init_net,
	.exit_batch = tunnel_key_exit_net,
	.id   = &tunnel_key_net_id,
	.size = sizeof(struct tc_action_net),
};

static int __init tunnel_key_init_module(void)
{
	return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
}

static void __exit tunnel_key_cleanup_module(void)
{
	tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
}

module_init(tunnel_key_init_module);
module_exit(tunnel_key_cleanup_module);

MODULE_AUTHOR("Amir Vadai <*****@*****.**>");
MODULE_DESCRIPTION("ip tunnel manipulation actions");
MODULE_LICENSE("GPL v2");
int lockdep_rht_mutex_is_held(struct rhashtable *ht)
{
	return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
}
Exemple #26
0
static ssize_t sta_tx_latency_stat_write(struct file *file,
					 const char __user *userbuf,
					 size_t count, loff_t *ppos)
{
	struct ieee80211_local *local = file->private_data;
	char buf[128] = {};
	char *bins = buf;
	char *token;
	int buf_size, i, alloc_size;
	int prev_bin = 0;
	int n_ranges = 0;
	int ret = count;
	struct ieee80211_tx_latency_bin_ranges  *tx_latency;

	if (sizeof(buf) <= count)
		return -EINVAL;
	buf_size = count;
	if (copy_from_user(buf, userbuf, buf_size))
		return -EFAULT;

	mutex_lock(&local->sta_mtx);

	/* cannot change config once we have stations */
	if (local->num_sta)
		goto unlock;

	tx_latency =
		rcu_dereference_protected(local->tx_latency,
					  lockdep_is_held(&local->sta_mtx));

	/* disable Tx statistics */
	if (!strcmp(buf, TX_LATENCY_DISABLED)) {
		if (!tx_latency)
			goto unlock;
		RCU_INIT_POINTER(local->tx_latency, NULL);
		synchronize_rcu();
		kfree(tx_latency);
		goto unlock;
	}

	/* Tx latency already enabled */
	if (tx_latency)
		goto unlock;

	if (strcmp(TX_LATENCY_BINS_DISABLED, buf)) {
		/* check how many bins and between what ranges user requested */
		token = buf;
		while (*token != '\0') {
			if (*token == TX_LATENCY_BIN_DELIMTER_C)
				n_ranges++;
			token++;
		}
		n_ranges++;
	}

	alloc_size = sizeof(struct ieee80211_tx_latency_bin_ranges) +
		     n_ranges * sizeof(u32);
	tx_latency = kzalloc(alloc_size, GFP_ATOMIC);
	if (!tx_latency) {
		ret = -ENOMEM;
		goto unlock;
	}
	tx_latency->n_ranges = n_ranges;
	for (i = 0; i < n_ranges; i++) { /* setting bin ranges */
		token = strsep(&bins, TX_LATENCY_BIN_DELIMTER_S);
		sscanf(token, "%d", &tx_latency->ranges[i]);
		/* bins values should be in ascending order */
		if (prev_bin >= tx_latency->ranges[i]) {
			ret = -EINVAL;
			kfree(tx_latency);
			goto unlock;
		}
		prev_bin = tx_latency->ranges[i];
	}
	rcu_assign_pointer(local->tx_latency, tx_latency);

unlock:
	mutex_unlock(&local->sta_mtx);

	return ret;
}
Exemple #27
0
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
				void __user *buffer, size_t *lenp, loff_t *ppos)
{
	unsigned int orig_size, size;
	int ret, i;
	struct ctl_table tmp = {
		.data = &size,
		.maxlen = sizeof(size),
		.mode = table->mode
	};
	struct rps_sock_flow_table *orig_sock_table, *sock_table;
	static DEFINE_MUTEX(sock_flow_mutex);

	mutex_lock(&sock_flow_mutex);

	orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
					lockdep_is_held(&sock_flow_mutex));
	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;

	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);

	if (write) {
		if (size) {
			if (size > 1<<29) {
				/* Enforce limit to prevent overflow */
				mutex_unlock(&sock_flow_mutex);
				return -EINVAL;
			}
			size = roundup_pow_of_two(size);
			if (size != orig_size) {
				sock_table =
				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
				if (!sock_table) {
					mutex_unlock(&sock_flow_mutex);
					return -ENOMEM;
				}
				rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
				sock_table->mask = size - 1;
			} else
				sock_table = orig_sock_table;

			for (i = 0; i < size; i++)
				sock_table->ents[i] = RPS_NO_CPU;
		} else
			sock_table = NULL;

		if (sock_table != orig_sock_table) {
			rcu_assign_pointer(rps_sock_flow_table, sock_table);
			if (sock_table) {
				static_key_slow_inc(&rps_needed);
				static_key_slow_inc(&rfs_needed);
			}
			if (orig_sock_table) {
				static_key_slow_dec(&rps_needed);
				static_key_slow_dec(&rfs_needed);
				synchronize_rcu();
				vfree(orig_sock_table);
			}
		}
	}

	mutex_unlock(&sock_flow_mutex);

	return ret;
}
#endif /* CONFIG_RPS */

#ifdef CONFIG_NET_FLOW_LIMIT
static DEFINE_MUTEX(flow_limit_update_mutex);

static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
				 void __user *buffer, size_t *lenp,
				 loff_t *ppos)
{
	struct sd_flow_limit *cur;
	struct softnet_data *sd;
	cpumask_var_t mask;
	int i, len, ret = 0;

	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
		return -ENOMEM;

	if (write) {
		ret = cpumask_parse_user(buffer, *lenp, mask);
		if (ret)
			goto done;

		mutex_lock(&flow_limit_update_mutex);
		len = sizeof(*cur) + netdev_flow_limit_table_len;
		for_each_possible_cpu(i) {
			sd = &per_cpu(softnet_data, i);
			cur = rcu_dereference_protected(sd->flow_limit,
				     lockdep_is_held(&flow_limit_update_mutex));
			if (cur && !cpumask_test_cpu(i, mask)) {
				RCU_INIT_POINTER(sd->flow_limit, NULL);
				synchronize_rcu();
				kfree(cur);
			} else if (!cur && cpumask_test_cpu(i, mask)) {
				cur = kzalloc_node(len, GFP_KERNEL,
						   cpu_to_node(i));
				if (!cur) {
					/* not unwinding previous changes */
					ret = -ENOMEM;
					goto write_unlock;
				}
				cur->num_buckets = netdev_flow_limit_table_len;
				rcu_assign_pointer(sd->flow_limit, cur);
			}
		}
write_unlock:
		mutex_unlock(&flow_limit_update_mutex);
	} else {
		char kbuf[128];

		if (*ppos || !*lenp) {
			*lenp = 0;
			goto done;
		}

		cpumask_clear(mask);
		rcu_read_lock();
		for_each_possible_cpu(i) {
			sd = &per_cpu(softnet_data, i);
			if (rcu_dereference(sd->flow_limit))
				cpumask_set_cpu(i, mask);
		}
		rcu_read_unlock();

		len = min(sizeof(kbuf) - 1, *lenp);
		len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
		if (!len) {
			*lenp = 0;
			goto done;
		}
		if (len < *lenp)
			kbuf[len++] = '\n';
		if (copy_to_user(buffer, kbuf, len)) {
			ret = -EFAULT;
			goto done;
		}
		*lenp = len;
		*ppos += len;
	}

done:
	free_cpumask_var(mask);
	return ret;
}
Exemple #28
0
static inline struct sta_info *
next_hop_deref_protected(struct mesh_path *mpath)
{
	return rcu_dereference_protected(mpath->next_hop,
					 lockdep_is_held(&mpath->state_lock));
}
Exemple #29
0
int lockdep_tasklist_lock_is_held(void)
{
	return lockdep_is_held(&tasklist_lock);
}
Exemple #30
0
/*
 * Probe all of a vlserver's addresses to find out the best route and to
 * query its capabilities.
 */
static bool afs_do_probe_vlserver(struct afs_net *net,
				  struct afs_vlserver *server,
				  struct key *key,
				  unsigned int server_index,
				  struct afs_error *_e)
{
	struct afs_addr_cursor ac = {
		.index = 0,
	};
	struct afs_call *call;
	bool in_progress = false;

	_enter("%s", server->name);

	read_lock(&server->lock);
	ac.alist = rcu_dereference_protected(server->addresses,
					     lockdep_is_held(&server->lock));
	read_unlock(&server->lock);

	atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
	memset(&server->probe, 0, sizeof(server->probe));
	server->probe.rtt = UINT_MAX;

	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
		call = afs_vl_get_capabilities(net, &ac, key, server,
					       server_index);
		if (!IS_ERR(call)) {
			afs_put_call(call);
			in_progress = true;
		} else {
			afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
		}
	}

	if (!in_progress)
		afs_vl_probe_done(server);
	return in_progress;
}

/*
 * Send off probes to all unprobed servers.
 */
int afs_send_vl_probes(struct afs_net *net, struct key *key,
		       struct afs_vlserver_list *vllist)
{
	struct afs_vlserver *server;
	struct afs_error e;
	bool in_progress = false;
	int i;

	e.error = 0;
	e.responded = false;
	for (i = 0; i < vllist->nr_servers; i++) {
		server = vllist->servers[i].server;
		if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
			continue;

		if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
		    afs_do_probe_vlserver(net, server, key, i, &e))
			in_progress = true;
	}

	return in_progress ? 0 : e.error;
}

/*
 * Wait for the first as-yet untried server to respond.
 */
int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
			   unsigned long untried)
{
	struct wait_queue_entry *waits;
	struct afs_vlserver *server;
	unsigned int rtt = UINT_MAX;
	bool have_responders = false;
	int pref = -1, i;

	_enter("%u,%lx", vllist->nr_servers, untried);

	/* Only wait for servers that have a probe outstanding. */
	for (i = 0; i < vllist->nr_servers; i++) {
		if (test_bit(i, &untried)) {
			server = vllist->servers[i].server;
			if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
				__clear_bit(i, &untried);
			if (server->probe.responded)
				have_responders = true;
		}
	}
	if (have_responders || !untried)
		return 0;

	waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
	if (!waits)
		return -ENOMEM;

	for (i = 0; i < vllist->nr_servers; i++) {
		if (test_bit(i, &untried)) {
			server = vllist->servers[i].server;
			init_waitqueue_entry(&waits[i], current);
			add_wait_queue(&server->probe_wq, &waits[i]);
		}
	}

	for (;;) {
		bool still_probing = false;

		set_current_state(TASK_INTERRUPTIBLE);
		for (i = 0; i < vllist->nr_servers; i++) {
			if (test_bit(i, &untried)) {
				server = vllist->servers[i].server;
				if (server->probe.responded)
					goto stop;
				if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
					still_probing = true;
			}
		}

		if (!still_probing || signal_pending(current))
			goto stop;
		schedule();
	}

stop:
	set_current_state(TASK_RUNNING);

	for (i = 0; i < vllist->nr_servers; i++) {
		if (test_bit(i, &untried)) {
			server = vllist->servers[i].server;
			if (server->probe.responded &&
			    server->probe.rtt < rtt) {
				pref = i;
				rtt = server->probe.rtt;
			}

			remove_wait_queue(&server->probe_wq, &waits[i]);
		}
	}

	kfree(waits);

	if (pref == -1 && signal_pending(current))
		return -ERESTARTSYS;

	if (pref >= 0)
		vllist->preferred = pref;

	_leave(" = 0 [%u]", pref);
	return 0;
}