Example #1
0
void vhost_net_stop(struct vhost_net *net,
                    VirtIODevice *dev)
{
    struct vhost_vring_file file = { .fd = -1 };

    for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
        int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
        assert(r >= 0);
    }
    net->vc->info->poll(net->vc, true);
    vhost_dev_stop(&net->dev, dev);
    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
    }
    vhost_dev_disable_notifiers(&net->dev, dev);
}

void vhost_net_cleanup(struct vhost_net *net)
{
    vhost_dev_cleanup(&net->dev);
    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
        tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr));
    }
    g_free(net);
}
Example #2
0
struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd)
{
    int r;
    struct vhost_net *net = qemu_malloc(sizeof *net);
    if (!backend) {
        fprintf(stderr, "vhost-net requires backend to be setup\n");
        goto fail;
    }
    r = vhost_net_get_fd(backend);
    if (r < 0) {
        goto fail;
    }
    net->vc = backend;
    net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
        (1 << VHOST_NET_F_VIRTIO_NET_HDR);
    net->backend = r;

    r = vhost_dev_init(&net->dev, devfd);
    if (r < 0) {
        goto fail;
    }
    if (~net->dev.features & net->dev.backend_features) {
        fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
                (uint64_t)(~net->dev.features & net->dev.backend_features));
        vhost_dev_cleanup(&net->dev);
        goto fail;
    }

    /* Set sane init value. Override when guest acks. */
    vhost_net_ack_features(net, 0);
    return net;
fail:
    qemu_free(net);
    return NULL;
}
Example #3
0
struct vhost_net *vhost_net_init(VhostNetOptions *options)
{
    int r;
    bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
    struct vhost_net *net = g_malloc(sizeof *net);

    if (!options->net_backend) {
        fprintf(stderr, "vhost-net requires net backend to be setup\n");
        goto fail;
    }
    net->nc = options->net_backend;

    net->dev.max_queues = 1;
    net->dev.nvqs = 2;
    net->dev.vqs = net->vqs;

    if (backend_kernel) {
        r = vhost_net_get_fd(options->net_backend);
        if (r < 0) {
            goto fail;
        }
        net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
            ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
        net->backend = r;
        net->dev.protocol_features = 0;
    } else {
        net->dev.backend_features = 0;
        net->dev.protocol_features = 0;
        net->backend = -1;

        /* vhost-user needs vq_index to initiate a specific queue pair */
        net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
    }

    r = vhost_dev_init(&net->dev, options->opaque,
                       options->backend_type);
    if (r < 0) {
        goto fail;
    }
    if (backend_kernel) {
        if (!qemu_has_vnet_hdr_len(options->net_backend,
                               sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
            net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
        }
        if (~net->dev.features & net->dev.backend_features) {
            fprintf(stderr, "vhost lacks feature mask %" PRIu64
                   " for backend\n",
                   (uint64_t)(~net->dev.features & net->dev.backend_features));
            vhost_dev_cleanup(&net->dev);
            goto fail;
        }
    }
    /* Set sane init value. Override when guest acks. */
    vhost_net_ack_features(net, 0);
    return net;
fail:
    g_free(net);
    return NULL;
}
Example #4
0
static void vhost_vsock_device_unrealize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VHostVSock *vsock = VHOST_VSOCK(dev);

    vhost_vsock_post_load_timer_cleanup(vsock);

    /* This will stop vhost backend if appropriate. */
    vhost_vsock_set_status(vdev, 0);

    vhost_dev_cleanup(&vsock->vhost_dev);
    virtio_cleanup(vdev);
}
Example #5
0
/* Caller should have device mutex */
long vhost_dev_reset_owner(struct vhost_dev *dev)
{
	struct vhost_memory *memory;

	/* Restore memory to default empty mapping. */
	memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
	if (!memory)
		return -ENOMEM;

	vhost_dev_cleanup(dev);

	memory->nregions = 0;
	dev->memory = memory;
	return 0;
}
Example #6
0
static void vhost_scsi_unrealize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VHostSCSI *s = VHOST_SCSI(dev);

    migrate_del_blocker(s->migration_blocker);
    error_free(s->migration_blocker);

    /* This will stop vhost backend. */
    vhost_scsi_set_status(vdev, 0);

    vhost_dev_cleanup(&s->dev);
    g_free(s->dev.vqs);

    virtio_scsi_common_unrealize(dev, errp);
}
Example #7
0
static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VHostUserSCSI *s = VHOST_USER_SCSI(dev);
    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
    struct vhost_virtqueue *vqs = vsc->dev.vqs;

    /* This will stop the vhost backend. */
    vhost_user_scsi_set_status(vdev, 0);

    vhost_dev_cleanup(&vsc->dev);
    g_free(vqs);

    virtio_scsi_common_unrealize(dev, errp);
    vhost_user_cleanup(&s->vhost_user);
}
Example #8
0
void vhost_net_stop(struct vhost_net *net,
                    VirtIODevice *dev)
{
    struct vhost_vring_file file = { .fd = -1 };

    for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
        int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
        assert(r >= 0);
    }
    net->vc->info->poll(net->vc, true);
    vhost_dev_stop(&net->dev, dev);
}

void vhost_net_cleanup(struct vhost_net *net)
{
    vhost_dev_cleanup(&net->dev);
    qemu_free(net);
}
Example #9
0
static int vhost_net_release(struct inode *inode, struct file *f)
{
	struct vhost_net *n = f->private_data;
	struct socket *tx_sock;
	struct socket *rx_sock;

	vhost_net_stop(n, &tx_sock, &rx_sock);
	vhost_net_flush(n);
	vhost_dev_cleanup(&n->dev, false);
	if (tx_sock)
		fput(tx_sock->file);
	if (rx_sock)
		fput(rx_sock->file);
	/* We do an extra flush before freeing memory,
	 * since jobs can re-queue themselves. */
	vhost_net_flush(n);
	kfree(n);
	return 0;
}
Example #10
0
struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
                                 bool force)
{
    int r;
    struct vhost_net *net = g_malloc(sizeof *net);
    if (!backend) {
        fprintf(stderr, "vhost-net requires backend to be setup\n");
        goto fail;
    }
    r = vhost_net_get_fd(backend);
    if (r < 0) {
        goto fail;
    }
    net->nc = backend;
    net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
        (1 << VHOST_NET_F_VIRTIO_NET_HDR);
    net->backend = r;

    net->dev.nvqs = 2;
    net->dev.vqs = net->vqs;

    r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
    if (r < 0) {
        goto fail;
    }
    if (!tap_has_vnet_hdr_len(backend,
                              sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
        net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
    }
    if (~net->dev.features & net->dev.backend_features) {
        fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
                (uint64_t)(~net->dev.features & net->dev.backend_features));
        vhost_dev_cleanup(&net->dev);
        goto fail;
    }

    /* Set sane init value. Override when guest acks. */
    vhost_net_ack_features(net, 0);
    return net;
fail:
    g_free(net);
    return NULL;
}
Example #11
0
static int vhost_net_release(struct inode *inode, struct file *f)
{
	struct vhost_net *n = f->private_data;
	struct socket *tx_sock;
	struct socket *rx_sock;

	vhost_net_stop(n, &tx_sock, &rx_sock);
	vhost_net_flush(n);
	vhost_dev_stop(&n->dev);
	vhost_dev_cleanup(&n->dev, false);
	vhost_net_vq_reset(n);
	if (tx_sock)
		sockfd_put(tx_sock);
	if (rx_sock)
		sockfd_put(rx_sock);
	/* Make sure no callbacks are outstanding */
	synchronize_rcu_bh();
	/* We do an extra flush before freeing memory,
	 * since jobs can re-queue themselves. */
	vhost_net_flush(n);
	kfree(n->dev.vqs);
	kvfree(n);
	return 0;
}
Example #12
0
void vhost_net_cleanup(struct vhost_net *net)
{
    vhost_dev_cleanup(&net->dev);
    g_free(net);
}
Example #13
0
static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VHostVSock *vsock = VHOST_VSOCK(dev);
    int vhostfd;
    int ret;

    /* Refuse to use reserved CID numbers */
    if (vsock->conf.guest_cid <= 2) {
        error_setg(errp, "guest-cid property must be greater than 2");
        return;
    }

    if (vsock->conf.guest_cid > UINT32_MAX) {
        error_setg(errp, "guest-cid property must be a 32-bit number");
        return;
    }

    if (vsock->conf.vhostfd) {
        vhostfd = monitor_fd_param(cur_mon, vsock->conf.vhostfd, errp);
        if (vhostfd == -1) {
            error_prepend(errp, "vhost-vsock: unable to parse vhostfd: ");
            return;
        }
    } else {
        vhostfd = open("/dev/vhost-vsock", O_RDWR);
        if (vhostfd < 0) {
            error_setg_errno(errp, -errno,
                             "vhost-vsock: failed to open vhost device");
            return;
        }
    }

    virtio_init(vdev, "vhost-vsock", VIRTIO_ID_VSOCK,
                sizeof(struct virtio_vsock_config));

    /* Receive and transmit queues belong to vhost */
    virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output);
    virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output);

    /* The event queue belongs to QEMU */
    vsock->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
                                       vhost_vsock_handle_output);

    vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs);
    vsock->vhost_dev.vqs = vsock->vhost_vqs;
    ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd,
                         VHOST_BACKEND_TYPE_KERNEL, 0);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed");
        goto err_virtio;
    }

    ret = vhost_vsock_set_guest_cid(vsock);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "vhost-vsock: unable to set guest cid");
        goto err_vhost_dev;
    }

    vsock->post_load_timer = NULL;
    return;

err_vhost_dev:
    vhost_dev_cleanup(&vsock->vhost_dev);
err_virtio:
    virtio_cleanup(vdev);
    close(vhostfd);
    return;
}
Example #14
0
/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_rx_big(struct vhost_net *net)
{
	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
	unsigned out, in, log, s;
	int head;
	struct vhost_log *vq_log;
	struct msghdr msg = {
		.msg_name = NULL,
		.msg_namelen = 0,
		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
		.msg_controllen = 0,
		.msg_iov = vq->iov,
		.msg_flags = MSG_DONTWAIT,
	};

	struct virtio_net_hdr hdr = {
		.flags = 0,
		.gso_type = VIRTIO_NET_HDR_GSO_NONE
	};

	size_t len, total_len = 0;
	int err;
	size_t hdr_size;
	struct socket *sock = rcu_dereference(vq->private_data);
	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
		return;

	use_mm(net->dev.mm);
	mutex_lock(&vq->mutex);
	vhost_disable_notify(vq);
	hdr_size = vq->vhost_hlen;

	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
		vq->log : NULL;

	for (;;) {
		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
					 ARRAY_SIZE(vq->iov),
					 &out, &in,
					 vq_log, &log);
		/* On error, stop handling until the next kick. */
		if (unlikely(head < 0))
			break;
		/* OK, now we need to know about added descriptors. */
		if (head == vq->num) {
			if (unlikely(vhost_enable_notify(vq))) {
				/* They have slipped one in as we were
				 * doing that: check again. */
				vhost_disable_notify(vq);
				continue;
			}
			/* Nothing new?  Wait for eventfd to tell us
			 * they refilled. */
			break;
		}
		/* We don't need to be notified again. */
		if (out) {
			vq_err(vq, "Unexpected descriptor format for RX: "
			       "out %d, int %d\n",
			       out, in);
			break;
		}
		/* Skip header. TODO: support TSO/mergeable rx buffers. */
		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
		msg.msg_iovlen = in;
		len = iov_length(vq->iov, in);
		/* Sanity check */
		if (!len) {
			vq_err(vq, "Unexpected header len for RX: "
			       "%zd expected %zd\n",
			       iov_length(vq->hdr, s), hdr_size);
			break;
		}
		err = sock->ops->recvmsg(NULL, sock, &msg,
					 len, MSG_DONTWAIT | MSG_TRUNC);
		/* TODO: Check specific error and bomb out unless EAGAIN? */
		if (err < 0) {
			vhost_discard_vq_desc(vq, 1);
			break;
		}
		/* TODO: Should check and handle checksum. */
		if (err > len) {
			pr_debug("Discarded truncated rx packet: "
				 " len %d > %zd\n", err, len);
			vhost_discard_vq_desc(vq, 1);
			continue;
		}
		len = err;
		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
		if (err) {
			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
			       vq->iov->iov_base, err);
			break;
		}
		len += hdr_size;
		vhost_add_used_and_signal(&net->dev, vq, head, len);
		if (unlikely(vq_log))
			vhost_log_write(vq, vq_log, log, len);
		total_len += len;
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
			break;
		}
	}

	mutex_unlock(&vq->mutex);
	unuse_mm(net->dev.mm);
}

/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_rx_mergeable(struct vhost_net *net)
{
	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
	unsigned uninitialized_var(in), log;
	struct vhost_log *vq_log;
	struct msghdr msg = {
		.msg_name = NULL,
		.msg_namelen = 0,
		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
		.msg_controllen = 0,
		.msg_iov = vq->iov,
		.msg_flags = MSG_DONTWAIT,
	};

	struct virtio_net_hdr_mrg_rxbuf hdr = {
		.hdr.flags = 0,
		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
	};

	size_t total_len = 0;
	int err, headcount;
	size_t vhost_hlen, sock_hlen;
	size_t vhost_len, sock_len;
	struct socket *sock = rcu_dereference(vq->private_data);
	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
		return;

	use_mm(net->dev.mm);
	mutex_lock(&vq->mutex);
	vhost_disable_notify(vq);
	vhost_hlen = vq->vhost_hlen;
	sock_hlen = vq->sock_hlen;

	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
		vq->log : NULL;

	while ((sock_len = peek_head_len(sock->sk))) {
		sock_len += sock_hlen;
		vhost_len = sock_len + vhost_hlen;
		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
					&in, vq_log, &log);
		/* On error, stop handling until the next kick. */
		if (unlikely(headcount < 0))
			break;
		/* OK, now we need to know about added descriptors. */
		if (!headcount) {
			if (unlikely(vhost_enable_notify(vq))) {
				/* They have slipped one in as we were
				 * doing that: check again. */
				vhost_disable_notify(vq);
				continue;
			}
			/* Nothing new?  Wait for eventfd to tell us
			 * they refilled. */
			break;
		}
		/* We don't need to be notified again. */
		if (unlikely((vhost_hlen)))
			/* Skip header. TODO: support TSO. */
			move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
		else
			/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
			 * needed because sendmsg can modify msg_iov. */
			copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in);
		msg.msg_iovlen = in;
		err = sock->ops->recvmsg(NULL, sock, &msg,
					 sock_len, MSG_DONTWAIT | MSG_TRUNC);
		/* Userspace might have consumed the packet meanwhile:
		 * it's not supposed to do this usually, but might be hard
		 * to prevent. Discard data we got (if any) and keep going. */
		if (unlikely(err != sock_len)) {
			pr_debug("Discarded rx packet: "
				 " len %d, expected %zd\n", err, sock_len);
			vhost_discard_vq_desc(vq, headcount);
			continue;
		}
		if (unlikely(vhost_hlen) &&
		    memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0,
				      vhost_hlen)) {
			vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
			       vq->iov->iov_base);
			break;
		}
		/* TODO: Should check and handle checksum. */
		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
				      offsetof(typeof(hdr), num_buffers),
				      sizeof hdr.num_buffers)) {
			vq_err(vq, "Failed num_buffers write");
			vhost_discard_vq_desc(vq, headcount);
			break;
		}
		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
					    headcount);
		if (unlikely(vq_log))
			vhost_log_write(vq, vq_log, log, vhost_len);
		total_len += vhost_len;
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
			break;
		}
	}

	mutex_unlock(&vq->mutex);
	unuse_mm(net->dev.mm);
}

static void handle_rx(struct vhost_net *net)
{
	if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
		handle_rx_mergeable(net);
	else
		handle_rx_big(net);
}

static void handle_tx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
						  poll.work);
	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);

	handle_tx(net);
}

static void handle_rx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
						  poll.work);
	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);

	handle_rx(net);
}

static void handle_tx_net(struct vhost_work *work)
{
	struct vhost_net *net = container_of(work, struct vhost_net,
					     poll[VHOST_NET_VQ_TX].work);
	handle_tx(net);
}

static void handle_rx_net(struct vhost_work *work)
{
	struct vhost_net *net = container_of(work, struct vhost_net,
					     poll[VHOST_NET_VQ_RX].work);
	handle_rx(net);
}

static int vhost_net_open(struct inode *inode, struct file *f)
{
	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
	struct vhost_dev *dev;
	int r;

	if (!n)
		return -ENOMEM;

	dev = &n->dev;
	n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
	r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
	if (r < 0) {
		kfree(n);
		return r;
	}

	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
	n->tx_poll_state = VHOST_NET_POLL_DISABLED;

	f->private_data = n;

	return 0;
}

static void vhost_net_disable_vq(struct vhost_net *n,
				 struct vhost_virtqueue *vq)
{
	if (!vq->private_data)
		return;
	if (vq == n->vqs + VHOST_NET_VQ_TX) {
		tx_poll_stop(n);
		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
	} else
		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
}

static void vhost_net_enable_vq(struct vhost_net *n,
				struct vhost_virtqueue *vq)
{
	struct socket *sock;

	sock = rcu_dereference_protected(vq->private_data,
					 lockdep_is_held(&vq->mutex));
	if (!sock)
		return;
	if (vq == n->vqs + VHOST_NET_VQ_TX) {
		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
		tx_poll_start(n, sock);
	} else
		vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
}

static struct socket *vhost_net_stop_vq(struct vhost_net *n,
					struct vhost_virtqueue *vq)
{
	struct socket *sock;

	mutex_lock(&vq->mutex);
	sock = rcu_dereference_protected(vq->private_data,
					 lockdep_is_held(&vq->mutex));
	vhost_net_disable_vq(n, vq);
	rcu_assign_pointer(vq->private_data, NULL);
	mutex_unlock(&vq->mutex);
	return sock;
}

static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
			   struct socket **rx_sock)
{
	*tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
	*rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
}

static void vhost_net_flush_vq(struct vhost_net *n, int index)
{
	vhost_poll_flush(n->poll + index);
	vhost_poll_flush(&n->dev.vqs[index].poll);
}

static void vhost_net_flush(struct vhost_net *n)
{
	vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
	vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
}

static int vhost_net_release(struct inode *inode, struct file *f)
{
	struct vhost_net *n = f->private_data;
	struct socket *tx_sock;
	struct socket *rx_sock;

	vhost_net_stop(n, &tx_sock, &rx_sock);
	vhost_net_flush(n);
	vhost_dev_cleanup(&n->dev);
	if (tx_sock)
		fput(tx_sock->file);
	if (rx_sock)
		fput(rx_sock->file);
	/* We do an extra flush before freeing memory,
	 * since jobs can re-queue themselves. */
	vhost_net_flush(n);
	kfree(n);
	return 0;
}

static struct socket *get_raw_socket(int fd)
{
	struct {
		struct sockaddr_ll sa;
		char  buf[MAX_ADDR_LEN];
	} uaddr;
	int uaddr_len = sizeof uaddr, r;
	struct socket *sock = sockfd_lookup(fd, &r);
	if (!sock)
		return ERR_PTR(-ENOTSOCK);

	/* Parameter checking */
	if (sock->sk->sk_type != SOCK_RAW) {
		r = -ESOCKTNOSUPPORT;
		goto err;
	}

	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
			       &uaddr_len, 0);
	if (r)
		goto err;

	if (uaddr.sa.sll_family != AF_PACKET) {
		r = -EPFNOSUPPORT;
		goto err;
	}
	return sock;
err:
	fput(sock->file);
	return ERR_PTR(r);
}

static struct socket *get_tap_socket(int fd)
{
	struct file *file = fget(fd);
	struct socket *sock;
	if (!file)
		return ERR_PTR(-EBADF);
	sock = tun_get_socket(file);
	if (!IS_ERR(sock))
		return sock;
	sock = macvtap_get_socket(file);
	if (IS_ERR(sock))
		fput(file);
	return sock;
}

static struct socket *get_socket(int fd)
{
	struct socket *sock;
	/* special case to disable backend */
	if (fd == -1)
		return NULL;
	sock = get_raw_socket(fd);
	if (!IS_ERR(sock))
		return sock;
	sock = get_tap_socket(fd);
	if (!IS_ERR(sock))
		return sock;
	return ERR_PTR(-ENOTSOCK);
}

static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
{
	struct socket *sock, *oldsock;
	struct vhost_virtqueue *vq;
	int r;

	mutex_lock(&n->dev.mutex);
	r = vhost_dev_check_owner(&n->dev);
	if (r)
		goto err;

	if (index >= VHOST_NET_VQ_MAX) {
		r = -ENOBUFS;
		goto err;
	}
	vq = n->vqs + index;
	mutex_lock(&vq->mutex);

	/* Verify that ring has been setup correctly. */
	if (!vhost_vq_access_ok(vq)) {
		r = -EFAULT;
		goto err_vq;
	}
	sock = get_socket(fd);
	if (IS_ERR(sock)) {
		r = PTR_ERR(sock);
		goto err_vq;
	}

	/* start polling new socket */
	oldsock = rcu_dereference_protected(vq->private_data,
					    lockdep_is_held(&vq->mutex));
	if (sock != oldsock) {
                vhost_net_disable_vq(n, vq);
                rcu_assign_pointer(vq->private_data, sock);
                vhost_net_enable_vq(n, vq);
	}

	mutex_unlock(&vq->mutex);

	if (oldsock) {
		vhost_net_flush_vq(n, index);
		fput(oldsock->file);
	}

	mutex_unlock(&n->dev.mutex);
	return 0;

err_vq:
	mutex_unlock(&vq->mutex);
err:
	mutex_unlock(&n->dev.mutex);
	return r;
}

static long vhost_net_reset_owner(struct vhost_net *n)
{
	struct socket *tx_sock = NULL;
	struct socket *rx_sock = NULL;
	long err;
	mutex_lock(&n->dev.mutex);
	err = vhost_dev_check_owner(&n->dev);
	if (err)
		goto done;
	vhost_net_stop(n, &tx_sock, &rx_sock);
	vhost_net_flush(n);
	err = vhost_dev_reset_owner(&n->dev);
done:
	mutex_unlock(&n->dev.mutex);
	if (tx_sock)
		fput(tx_sock->file);
	if (rx_sock)
		fput(rx_sock->file);
	return err;
}

static int vhost_net_set_features(struct vhost_net *n, u64 features)
{
	size_t vhost_hlen, sock_hlen, hdr_len;
	int i;

	hdr_len = (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ?
			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
			sizeof(struct virtio_net_hdr);
	if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
		/* vhost provides vnet_hdr */
		vhost_hlen = hdr_len;
		sock_hlen = 0;
	} else {
		/* socket provides vnet_hdr */
		vhost_hlen = 0;
		sock_hlen = hdr_len;
	}
	mutex_lock(&n->dev.mutex);
	if ((features & (1 << VHOST_F_LOG_ALL)) &&
	    !vhost_log_access_ok(&n->dev)) {
		mutex_unlock(&n->dev.mutex);
		return -EFAULT;
	}
	n->dev.acked_features = features;
	smp_wmb();
	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
		mutex_lock(&n->vqs[i].mutex);
		n->vqs[i].vhost_hlen = vhost_hlen;
		n->vqs[i].sock_hlen = sock_hlen;
		mutex_unlock(&n->vqs[i].mutex);
	}
	vhost_net_flush(n);
	mutex_unlock(&n->dev.mutex);
	return 0;
}