Exemplo n.º 1
0
/* Release ethdev TX queue */
static void
dpdk_ethdev_tx_queue_release(unsigned lcore_id, struct vr_interface *vif)
{
    int i;
    struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id];
    struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif->vif_idx];
    struct vr_dpdk_queue_params *tx_queue_params
                        = &lcore->lcore_tx_queue_params[vif->vif_idx];

    /* remove queue params from the list of bonds to TX */
    for (i = 0; i < lcore->lcore_nb_bonds_to_tx; i++) {
        if (likely(lcore->lcore_bonds_to_tx[i] == tx_queue_params)) {
            lcore->lcore_bonds_to_tx[i] = NULL;
            lcore->lcore_nb_bonds_to_tx--;
            RTE_VERIFY(lcore->lcore_nb_bonds_to_tx <= VR_DPDK_MAX_BONDS);
            /* copy the last element to the empty spot */
            lcore->lcore_bonds_to_tx[i] = lcore->lcore_bonds_to_tx[lcore->lcore_nb_bonds_to_tx];
            break;
        }
    }

    tx_queue->txq_ops.f_tx = NULL;
    rte_wmb();

    /* flush and free the queue */
    if (tx_queue->txq_ops.f_free(tx_queue->q_queue_h)) {
        RTE_LOG(ERR, VROUTER, "    error freeing lcore %u eth device TX queue\n",
                    lcore_id);
    }

    /* reset the queue */
    vrouter_put_interface(tx_queue->q_vif);
    memset(tx_queue, 0, sizeof(*tx_queue));
    memset(tx_queue_params, 0, sizeof(*tx_queue_params));
}
Exemplo n.º 2
0
Arquivo: mlx4.c Projeto: btw616/dpdk
/**
 * DPDK callback to close the device.
 *
 * Destroy all queues and objects, free memory.
 *
 * @param dev
 *   Pointer to Ethernet device structure.
 */
static void
mlx4_dev_close(struct rte_eth_dev *dev)
{
	struct priv *priv = dev->data->dev_private;
	unsigned int i;

	DEBUG("%p: closing device \"%s\"",
	      (void *)dev,
	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
	dev->rx_pkt_burst = mlx4_rx_burst_removed;
	dev->tx_pkt_burst = mlx4_tx_burst_removed;
	rte_wmb();
	mlx4_flow_clean(priv);
	mlx4_rss_deinit(priv);
	for (i = 0; i != dev->data->nb_rx_queues; ++i)
		mlx4_rx_queue_release(dev->data->rx_queues[i]);
	for (i = 0; i != dev->data->nb_tx_queues; ++i)
		mlx4_tx_queue_release(dev->data->tx_queues[i]);
	mlx4_mr_release(dev);
	if (priv->pd != NULL) {
		assert(priv->ctx != NULL);
		claim_zero(mlx4_glue->dealloc_pd(priv->pd));
		claim_zero(mlx4_glue->close_device(priv->ctx));
	} else
		assert(priv->ctx == NULL);
	mlx4_intr_uninstall(priv);
	memset(priv, 0, sizeof(*priv));
}
Exemplo n.º 3
0
void
set_burst_fn(struct rte_eth_dev *dev, int force_safe)
{
	struct sub_device *sdev;
	uint8_t i;
	int need_safe;
	int safe_set;

	need_safe = force_safe;
	FOREACH_SUBDEV(sdev, i, dev)
		need_safe |= fs_rx_unsafe(sdev);
	safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst);
	if (need_safe && !safe_set) {
		DEBUG("Using safe RX bursts%s",
		      (force_safe ? " (forced)" : ""));
		dev->rx_pkt_burst = &failsafe_rx_burst;
	} else if (!need_safe && safe_set) {
		DEBUG("Using fast RX bursts");
		dev->rx_pkt_burst = &failsafe_rx_burst_fast;
	}
	need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev));
	safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst);
	if (need_safe && !safe_set) {
		DEBUG("Using safe TX bursts%s",
		      (force_safe ? " (forced)" : ""));
		dev->tx_pkt_burst = &failsafe_tx_burst;
	} else if (!need_safe && safe_set) {
		DEBUG("Using fast TX bursts");
		dev->tx_pkt_burst = &failsafe_tx_burst_fast;
	}
	rte_wmb();
}
Exemplo n.º 4
0
/* Init eth TX queue */
struct vr_dpdk_queue *
vr_dpdk_ethdev_tx_queue_init(unsigned lcore_id, struct vr_interface *vif,
    unsigned queue_or_lcore_id)
{
    uint8_t port_id;
    uint16_t tx_queue_id = queue_or_lcore_id;
    unsigned int vif_idx = vif->vif_idx, dpdk_queue_index;
    const unsigned int socket_id = rte_lcore_to_socket_id(lcore_id);

    struct vr_dpdk_ethdev *ethdev;
    struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id];
    struct vr_dpdk_queue *tx_queue;
    struct vr_dpdk_queue_params *tx_queue_params;

    ethdev = (struct vr_dpdk_ethdev *)vif->vif_os;
    port_id = ethdev->ethdev_port_id;

    if (lcore->lcore_hw_queue_to_dpdk_index[vif->vif_idx]) {
        dpdk_queue_index =
            lcore->lcore_hw_queue_to_dpdk_index[vif->vif_idx][tx_queue_id];
    } else {
        dpdk_queue_index = 0;
    }

    tx_queue = &lcore->lcore_tx_queues[vif_idx][dpdk_queue_index];
    tx_queue_params = &lcore->lcore_tx_queue_params[vif_idx][dpdk_queue_index];

    /* init queue */
    tx_queue->txq_ops = rte_port_ethdev_writer_ops;
    tx_queue->q_queue_h = NULL;
    tx_queue->q_vif = vrouter_get_interface(vif->vif_rid, vif_idx);

    /* create the queue */
    struct rte_port_ethdev_writer_params writer_params = {
        .port_id = port_id,
        .queue_id = tx_queue_id,
        .tx_burst_sz = VR_DPDK_TX_BURST_SZ,
    };
    tx_queue->q_queue_h = tx_queue->txq_ops.f_create(&writer_params, socket_id);
    if (tx_queue->q_queue_h == NULL) {
        RTE_LOG(ERR, VROUTER, "    error creating eth device %" PRIu8
                " TX queue %" PRIu16 "\n", port_id, tx_queue_id);
        return NULL;
    }

    /* store queue params */
    tx_queue_params->qp_release_op = &dpdk_ethdev_tx_queue_release;
    tx_queue_params->qp_ethdev.queue_id = tx_queue_id;
    tx_queue_params->qp_ethdev.port_id = port_id;

    /* for the queue 0 add queue params to the list of bonds to TX */
    if (ethdev->ethdev_nb_slaves > 0 && tx_queue_id == 0) {
        /* make sure queue params have been stored */
        rte_wmb();
        lcore->lcore_bonds_to_tx[lcore->lcore_nb_bonds_to_tx++] = tx_queue_params;
        RTE_VERIFY(lcore->lcore_nb_bonds_to_tx <= VR_DPDK_MAX_BONDS);
    }

    return tx_queue;
}
Exemplo n.º 5
0
static int
i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
{
	struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
	struct i40e_hmc_obj_rxq rx_ctx;
	int err = I40E_SUCCESS;

	memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
	/* Init the RX queue in hardware */
	rx_ctx.dbuff = I40E_RXBUF_SZ_1024 >> I40E_RXQ_CTX_DBUFF_SHIFT;
	rx_ctx.hbuff = 0;
	rx_ctx.base = rxq->rx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT;
	rx_ctx.qlen = rxq->nb_rx_desc;
#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
	rx_ctx.dsize = 1;
#endif
	rx_ctx.dtype = i40e_header_split_none;
	rx_ctx.hsplit_0 = I40E_HEADER_SPLIT_NONE;
	rx_ctx.rxmax = ETHER_MAX_LEN;
	rx_ctx.tphrdesc_ena = 1;
	rx_ctx.tphwdesc_ena = 1;
	rx_ctx.tphdata_ena = 1;
	rx_ctx.tphhead_ena = 1;
	rx_ctx.lrxqthresh = 2;
	rx_ctx.crcstrip = 0;
	rx_ctx.l2tsel = 1;
	rx_ctx.showiv = 0;
	rx_ctx.prefena = 1;

	err = i40e_clear_lan_rx_queue_context(hw, rxq->reg_idx);
	if (err != I40E_SUCCESS) {
		PMD_DRV_LOG(ERR, "Failed to clear FDIR RX queue context.");
		return err;
	}
	err = i40e_set_lan_rx_queue_context(hw, rxq->reg_idx, &rx_ctx);
	if (err != I40E_SUCCESS) {
		PMD_DRV_LOG(ERR, "Failed to set FDIR RX queue context.");
		return err;
	}
	rxq->qrx_tail = hw->hw_addr +
		I40E_QRX_TAIL(rxq->vsi->base_queue);

	rte_wmb();
	/* Init the RX tail regieter. */
	I40E_PCI_REG_WRITE(rxq->qrx_tail, 0);
	I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);

	return err;
}
Exemplo n.º 6
0
/*
 * Create a scheduler on the current lcore
 */
struct lthread_sched *_lthread_sched_create(size_t stack_size)
{
	int status;
	struct lthread_sched *new_sched;
	unsigned lcoreid = rte_lcore_id();

	RTE_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);

	if (stack_size == 0)
		stack_size = LTHREAD_MAX_STACK_SIZE;

	new_sched =
	     rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
				RTE_CACHE_LINE_SIZE,
				rte_socket_id());
	if (new_sched == NULL) {
		RTE_LOG(CRIT, LTHREAD,
			"Failed to allocate memory for scheduler\n");
		return NULL;
	}

	_lthread_key_pool_init();

	new_sched->stack_size = stack_size;
	new_sched->birth = rte_rdtsc();
	THIS_SCHED = new_sched;

	status = _lthread_sched_alloc_resources(new_sched);
	if (status != SCHED_ALLOC_OK) {
		RTE_LOG(CRIT, LTHREAD,
			"Failed to allocate resources for scheduler code = %d\n",
			status);
		rte_free(new_sched);
		return NULL;
	}

	bzero(&new_sched->ctx, sizeof(struct ctx));

	new_sched->lcore_id = lcoreid;

	schedcore[lcoreid] = new_sched;

	new_sched->run_flag = 1;

	DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);

	rte_wmb();
	return new_sched;
}
Exemplo n.º 7
0
Arquivo: mlx4.c Projeto: btw616/dpdk
/**
 * DPDK callback to stop the device.
 *
 * Simulate device stop by detaching all configured flows.
 *
 * @param dev
 *   Pointer to Ethernet device structure.
 */
static void
mlx4_dev_stop(struct rte_eth_dev *dev)
{
	struct priv *priv = dev->data->dev_private;

	if (!priv->started)
		return;
	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
	priv->started = 0;
	dev->tx_pkt_burst = mlx4_tx_burst_removed;
	dev->rx_pkt_burst = mlx4_rx_burst_removed;
	rte_wmb();
	mlx4_flow_sync(priv, NULL);
	mlx4_rxq_intr_disable(priv);
	mlx4_rss_deinit(priv);
}
/*
 * vr_dpdk_virtio_stop - stop the virtio interface.
 *
 * Returns 0 on success, -1 otherwise.
 */
int
vr_dpdk_virtio_stop(unsigned int vif_idx)
{
    int i;

    if (vif_idx >= VR_MAX_INTERFACES) {
        return -1;
    }

    /* Disable and reset all the virtio queues. */
    for (i = 0; i < VR_DPDK_VIRTIO_MAX_QUEUES*2; i++) {
        vr_dpdk_set_virtq_ready(vif_idx, i, VQ_NOT_READY);
    }
    rte_wmb();
    synchronize_rcu();

    return 0;
}
Exemplo n.º 9
0
Arquivo: mlx4.c Projeto: btw616/dpdk
/**
 * DPDK callback to start the device.
 *
 * Simulate device start by initializing common RSS resources and attaching
 * all configured flows.
 *
 * @param dev
 *   Pointer to Ethernet device structure.
 *
 * @return
 *   0 on success, negative errno value otherwise and rte_errno is set.
 */
static int
mlx4_dev_start(struct rte_eth_dev *dev)
{
	struct priv *priv = dev->data->dev_private;
	struct rte_flow_error error;
	int ret;

	if (priv->started)
		return 0;
	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
	priv->started = 1;
	ret = mlx4_rss_init(priv);
	if (ret) {
		ERROR("%p: cannot initialize RSS resources: %s",
		      (void *)dev, strerror(-ret));
		goto err;
	}
#ifndef NDEBUG
	mlx4_mr_dump_dev(dev);
#endif
	ret = mlx4_rxq_intr_enable(priv);
	if (ret) {
		ERROR("%p: interrupt handler installation failed",
		     (void *)dev);
		goto err;
	}
	ret = mlx4_flow_sync(priv, &error);
	if (ret) {
		ERROR("%p: cannot attach flow rules (code %d, \"%s\"),"
		      " flow error type %d, cause %p, message: %s",
		      (void *)dev,
		      -ret, strerror(-ret), error.type, error.cause,
		      error.message ? error.message : "(unspecified)");
		goto err;
	}
	rte_wmb();
	dev->tx_pkt_burst = mlx4_tx_burst;
	dev->rx_pkt_burst = mlx4_rx_burst;
	return 0;
err:
	mlx4_dev_stop(dev);
	return ret;
}
/*
 * vr_dpdk_knidev_release - release KNI interface and remove it from the
 * global list.
 * Returns 0 on success, < 0 otherwise.
 */
int
vr_dpdk_knidev_release(struct vr_interface *vif)
{
    int i;
    struct rte_kni *kni = vif->vif_os;

    vif->vif_os = NULL;

    /* delete the interface from the table of KNIs */
    for (i = 0; i < VR_DPDK_MAX_KNI_INTERFACES; i++) {
        if (vr_dpdk.knis[i] == kni) {
            vr_dpdk.knis[i] = NULL;
            break;
        }
    }
    rte_wmb();

    return rte_kni_release(kni);
}
Exemplo n.º 11
0
/*
 * vr_dpdk_virtio_get_vring_base - gets the vring base for the specified vring
 * sent by the vhost client.
 *
 * Returns 0 on success, -1 otherwise.
 */
int
vr_dpdk_virtio_get_vring_base(unsigned int vif_idx, unsigned int vring_idx,
                               unsigned int *vring_basep)
{
    vr_dpdk_virtioq_t *vq;

    if ((vif_idx >= VR_MAX_INTERFACES)
        || (vring_idx >= (2 * VR_DPDK_VIRTIO_MAX_QUEUES))) {
        return -1;
    }

    /*
     * RX rings are even numbered and TX rings are odd numbered from the
     * VM's point of view. From vrouter's point of view, VM's TX ring is
     * vrouter's RX ring and vice versa.
     */
    if (vring_idx & 1) {
        vq = &vr_dpdk_virtio_rxqs[vif_idx][vring_idx/2];
    } else {
        vq = &vr_dpdk_virtio_txqs[vif_idx][vring_idx/2];
    }

    *vring_basep = vq->vdv_last_used_idx;

    /*
     * This is usually called when qemu shuts down a virtio queue. Set the
     * state to indicate that this queue should not be used any more.
     */
    vq->vdv_ready_state = VQ_NOT_READY;
    rte_wmb();
    synchronize_rcu();

    /* Reset the queue. We reset only those values we analyze in
     * uvhm_check_vring_ready()
     */
    vq->vdv_desc = NULL;
    if (vq->vdv_callfd) {
        close(vq->vdv_callfd);
        vq->vdv_callfd = 0;
    }

    return 0;
}
Exemplo n.º 12
0
/*
 * dpdk_virtio_tx_queue_release - releases a virtio TX queue.
 *
 * Returns nothing.
 */
static void
dpdk_virtio_tx_queue_release(unsigned lcore_id, struct vr_interface *vif)
{
    struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id];
    struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif->vif_idx];
    struct vr_dpdk_queue_params *tx_queue_params
                        = &lcore->lcore_tx_queue_params[vif->vif_idx];

    tx_queue->txq_ops.f_tx = NULL;
    rte_wmb();

    /* flush and free the queue */
    if (tx_queue->txq_ops.f_free(tx_queue->q_queue_h)) {
        RTE_LOG(ERR, VROUTER, "    error freeing lcore %u virtio device TX queue\n",
                    lcore_id);
    }

    /* reset the queue */
    vrouter_put_interface(tx_queue->q_vif);
    memset(tx_queue, 0, sizeof(*tx_queue));
    memset(tx_queue_params, 0, sizeof(*tx_queue_params));
}
/* Init KNI RX queue */
struct vr_dpdk_queue *
vr_dpdk_kni_rx_queue_init(unsigned lcore_id, struct vr_interface *vif,
    unsigned host_lcore_id)
{
    struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id];
    const unsigned socket_id = rte_lcore_to_socket_id(lcore_id);
    uint8_t port_id = 0;
    unsigned vif_idx = vif->vif_idx;
    struct vr_dpdk_queue *rx_queue = &lcore->lcore_rx_queues[vif_idx];
    struct vr_dpdk_queue_params *rx_queue_params
                    = &lcore->lcore_rx_queue_params[vif_idx];

    if (vif->vif_type == VIF_TYPE_HOST) {
        port_id = (((struct vr_dpdk_ethdev *)(vif->vif_bridge->vif_os))->
                ethdev_port_id);
    }

    /* init queue */
    rx_queue->rxq_ops = dpdk_knidev_reader_ops;
    rx_queue->q_queue_h = NULL;
    rx_queue->q_vif = vrouter_get_interface(vif->vif_rid, vif_idx);

    /* create the queue */
    struct dpdk_knidev_reader_params reader_params = {
        .kni = vif->vif_os,
    };
    rx_queue->q_queue_h = rx_queue->rxq_ops.f_create(&reader_params, socket_id);
    if (rx_queue->q_queue_h == NULL) {
        RTE_LOG(ERR, VROUTER, "    error creating KNI device %s RX queue"
            " at eth device %" PRIu8 "\n", vif->vif_name, port_id);
        return NULL;
    }

    /* store queue params */
    rx_queue_params->qp_release_op = &dpdk_kni_rx_queue_release;

    return rx_queue;
}

/* Release KNI TX queue */
static void
dpdk_kni_tx_queue_release(unsigned lcore_id, struct vr_interface *vif)
{
    struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id];
    struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif->vif_idx];
    struct vr_dpdk_queue_params *tx_queue_params
                        = &lcore->lcore_tx_queue_params[vif->vif_idx];

    tx_queue->txq_ops.f_tx = NULL;
    rte_wmb();

    /* flush and free the queue */
    if (tx_queue->txq_ops.f_free(tx_queue->q_queue_h)) {
        RTE_LOG(ERR, VROUTER, "    error freeing lcore %u KNI device TX queue\n",
                    lcore_id);
    }

    /* reset the queue */
    vrouter_put_interface(tx_queue->q_vif);
    memset(tx_queue, 0, sizeof(*tx_queue));
    memset(tx_queue_params, 0, sizeof(*tx_queue_params));
}

/* Init KNI TX queue */
struct vr_dpdk_queue *
vr_dpdk_kni_tx_queue_init(unsigned lcore_id, struct vr_interface *vif,
    unsigned host_lcore_id)
{
    struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id];
    const unsigned socket_id = rte_lcore_to_socket_id(lcore_id);
    uint8_t port_id = 0;
    unsigned vif_idx = vif->vif_idx;
    struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif_idx];
    struct vr_dpdk_queue_params *tx_queue_params
                    = &lcore->lcore_tx_queue_params[vif_idx];
    struct vr_dpdk_ethdev *ethdev;

    if (vif->vif_type == VIF_TYPE_HOST) {
        ethdev = vif->vif_bridge->vif_os;
        if (ethdev == NULL) {
            RTE_LOG(ERR, VROUTER, "    error creating KNI device %s TX queue:"
                " bridge vif %u ethdev is not initialized\n",
                vif->vif_name, vif->vif_bridge->vif_idx);
            return NULL;
        }
        port_id = ethdev->ethdev_port_id;
    }

    /* init queue */
    tx_queue->txq_ops = dpdk_knidev_writer_ops;
    tx_queue->q_queue_h = NULL;
    tx_queue->q_vif = vrouter_get_interface(vif->vif_rid, vif_idx);

    /* create the queue */
    struct dpdk_knidev_writer_params writer_params = {
        .kni = vif->vif_os,
        .tx_burst_sz = VR_DPDK_TX_BURST_SZ,
    };
    tx_queue->q_queue_h = tx_queue->txq_ops.f_create(&writer_params, socket_id);
    if (tx_queue->q_queue_h == NULL) {
        RTE_LOG(ERR, VROUTER, "    error creating KNI device %s TX queue"
            " at eth device %" PRIu8 "\n", vif->vif_name, port_id);
        return NULL;
    }

    /* store queue params */
    tx_queue_params->qp_release_op = &dpdk_kni_tx_queue_release;

    return tx_queue;
}

/* Change KNI MTU size callback */
static int
dpdk_knidev_change_mtu(uint8_t port_id, unsigned new_mtu)
{
    struct vrouter *router = vrouter_get(0);
    struct vr_interface *vif;
    int i, ret;
    uint8_t ethdev_port_id, slave_port_id;
    struct vr_dpdk_ethdev *ethdev = NULL;

    RTE_LOG(INFO, VROUTER, "Changing eth device %" PRIu8 " MTU to %u\n",
                    port_id, new_mtu);
    if (port_id >= rte_eth_dev_count()) {
        RTE_LOG(ERR, VROUTER, "Error changing eth device %"PRIu8" MTU: invalid eth device\n", port_id);
        return -EINVAL;
    }

    /*
     * TODO: DPDK bond PMD does not implement mtu_set op, so we need to
     * set the MTU manually for all the slaves.
     */
    /* Bond vif uses first slave port ID. */
    if (router->vr_eth_if) {
        ethdev = (struct vr_dpdk_ethdev *)router->vr_eth_if->vif_os;
        if (ethdev && ethdev->ethdev_nb_slaves > 0) {
            for (i = 0; i < ethdev->ethdev_nb_slaves; i++) {
                if (port_id == ethdev->ethdev_slaves[i])
                    break;
            }
            /* Clear ethdev if no port match. */
            if (i >= ethdev->ethdev_nb_slaves)
                ethdev = NULL;
        }
    }
    if (ethdev && ethdev->ethdev_nb_slaves > 0) {
        for (i = 0; i < ethdev->ethdev_nb_slaves; i++) {
            slave_port_id = ethdev->ethdev_slaves[i];
            RTE_LOG(INFO, VROUTER, "    changing bond member eth device %" PRIu8
                " MTU to %u\n", slave_port_id, new_mtu);

            ret =  rte_eth_dev_set_mtu(slave_port_id, new_mtu);
            if (ret < 0) {
                RTE_LOG(ERR, VROUTER, "    error changing bond member eth device %" PRIu8
                    " MTU: %s (%d)\n", slave_port_id, rte_strerror(-ret), -ret);
                return ret;
            }
        }
    } else {
        ret =  rte_eth_dev_set_mtu(port_id, new_mtu);
        if (ret < 0) {
            RTE_LOG(ERR, VROUTER, "Error changing eth device %" PRIu8
                " MTU: %s (%d)\n", port_id, rte_strerror(-ret), -ret);
        }
        return ret;
    }

    /* On success, inform vrouter about new MTU */
    for (i = 0; i < router->vr_max_interfaces; i++) {
        vif = __vrouter_get_interface(router, i);
        if (vif && (vif->vif_type == VIF_TYPE_PHYSICAL)) {
            ethdev_port_id = (((struct vr_dpdk_ethdev *)(vif->vif_os))->
                        ethdev_port_id);
            if (ethdev_port_id == port_id) {
                /* Ethernet header size */
                new_mtu += sizeof(struct vr_eth);
                if (vr_dpdk.vlan_tag != VLAN_ID_INVALID) {
                    /* 802.1q header size */
                    new_mtu += sizeof(uint32_t);
                }
                vif->vif_mtu = new_mtu;
                if (vif->vif_bridge)
                    vif->vif_bridge->vif_mtu = new_mtu;
            }
        }
    }

    return 0;
}


/* Configure KNI state callback */
static int
dpdk_knidev_config_network_if(uint8_t port_id, uint8_t if_up)
{
    int ret = 0;

    RTE_LOG(INFO, VROUTER, "Configuring eth device %" PRIu8 " %s\n",
                    port_id, if_up ? "UP" : "DOWN");
    if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
        RTE_LOG(ERR, VROUTER, "Invalid eth device %" PRIu8 "\n", port_id);
        return -EINVAL;
    }

    if (if_up)
        ret = rte_eth_dev_start(port_id);
    else
        rte_eth_dev_stop(port_id);

    if (ret < 0) {
        RTE_LOG(ERR, VROUTER, "Configuring eth device %" PRIu8 " UP"
                    "failed (%d)\n", port_id, ret);
    }

    return ret;
}

/* Init KNI */
int
vr_dpdk_knidev_init(uint8_t port_id, struct vr_interface *vif)
{
    int i;
    struct rte_eth_dev_info dev_info;
    struct rte_kni_conf kni_conf;
    struct rte_kni_ops kni_ops;
    struct rte_kni *kni;
    struct rte_config *rte_conf = rte_eal_get_configuration();

    if (!vr_dpdk.kni_inited) {
        /*
         * If the host does not support KNIs (i.e. RedHat), we'll get
         * a panic here.
         */
        rte_kni_init(VR_DPDK_MAX_KNI_INTERFACES);
        vr_dpdk.kni_inited = true;
    }

    /* get eth device info */
    memset(&dev_info, 0, sizeof(dev_info));
    rte_eth_dev_info_get(port_id, &dev_info);

    /* create KNI configuration */
    memset(&kni_conf, 0, sizeof(kni_conf));
    strncpy(kni_conf.name, (char *)vif->vif_name, sizeof(kni_conf.name) - 1);

    kni_conf.addr = dev_info.pci_dev->addr;
    kni_conf.id = dev_info.pci_dev->id;
    kni_conf.group_id = port_id;
    kni_conf.mbuf_size = VR_DPDK_MAX_PACKET_SZ;
    /*
     * Due to DPDK commit 41a6ebd, now to prevent packet reordering in KNI
     * we have to bind KNI kernel thread to a first online unused CPU.
     */
    for (i = 0; i < RTE_MAX_LCORE; i++) {
        if (lcore_config[i].detected
                && rte_conf->lcore_role[VR_DPDK_FWD_LCORE_ID + i] == ROLE_OFF) {
            kni_conf.force_bind = 1;
            kni_conf.core_id = i;
            RTE_LOG(INFO, VROUTER, "    bind KNI kernel thread to CPU %d\n", i);
            break;
        }
    }

    /* KNI options
     *
     * Changing state of the KNI interface can change state of the physical
     * interface. This is useful for the vhost, but not for the VLAN
     * forwarding interface.
     */
    if (vif->vif_type == VIF_TYPE_VLAN) {
        memset(&kni_ops, 0, sizeof(kni_ops));
    } else {
        kni_ops.port_id = port_id;
        kni_ops.change_mtu = dpdk_knidev_change_mtu;
        kni_ops.config_network_if = dpdk_knidev_config_network_if;
    }

    /* allocate KNI device */
    kni = rte_kni_alloc(vr_dpdk.rss_mempool, &kni_conf, &kni_ops);
    if (kni == NULL) {
        RTE_LOG(ERR, VROUTER, "    error allocation KNI device %s"
            " at eth device %" PRIu8 "\n", vif->vif_name, port_id);
        return -ENOMEM;
    }

    /* store pointer to KNI for further use */
    vif->vif_os = kni;

    /* add interface to the table of KNIs */
    for (i = 0; i < VR_DPDK_MAX_KNI_INTERFACES; i++) {
        if (vr_dpdk.knis[i] == NULL) {
            vr_dpdk.knis[i] = vif->vif_os;
            break;
        }
    }

    return 0;
}
Exemplo n.º 14
0
/*
 * vr_uvh_cl_msg_handler - handler for messages from user space vhost
 * clients. Calls the appropriate handler based on the message type.
 *
 * Returns 0 on success, -1 on error.
 *
 * TODO: upon error, this function currently makes the process exit.
 * Instead, it should close the socket and continue serving other clients.
 */
static int
vr_uvh_cl_msg_handler(int fd, void *arg)
{
    vr_uvh_client_t *vru_cl = (vr_uvh_client_t *) arg;
    struct msghdr mhdr;
    struct iovec iov;
    int i, err, ret = 0, read_len = 0;
    struct cmsghdr *cmsg;

    memset(&mhdr, 0, sizeof(mhdr));

    if (vru_cl->vruc_msg_bytes_read == 0) {
        mhdr.msg_control = &vru_cl->vruc_cmsg;
        mhdr.msg_controllen = sizeof(vru_cl->vruc_cmsg);

        iov.iov_base = (void *) &vru_cl->vruc_msg;
        iov.iov_len = VHOST_USER_HSIZE;

        mhdr.msg_iov = &iov;
        mhdr.msg_iovlen = 1;

        ret = recvmsg(fd, &mhdr, MSG_DONTWAIT);
        if (ret < 0) {
            if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
                ret = 0;
                goto cleanup;
            }

            vr_uvhost_log("Receive returned %d in vhost server for client %s\n",
                          ret, vru_cl->vruc_path);
            ret = -1;
            goto cleanup;
        } else if (ret > 0) {
            if (mhdr.msg_flags & MSG_CTRUNC) {
                vr_uvhost_log("Truncated control message from vhost client %s\n",
                             vru_cl->vruc_path);
                ret = -1;
                goto cleanup;
            }

            cmsg = CMSG_FIRSTHDR(&mhdr);
            if (cmsg && (cmsg->cmsg_len > 0) &&
                   (cmsg->cmsg_level == SOL_SOCKET) &&
                   (cmsg->cmsg_type == SCM_RIGHTS)) {
                   vru_cl->vruc_num_fds_sent = (cmsg->cmsg_len - CMSG_LEN(0))/
                                                   sizeof(int);
                   if (vru_cl->vruc_num_fds_sent > VHOST_MEMORY_MAX_NREGIONS) {
                        vr_uvhost_log("Too many FDs sent for client %s: %d\n",
                                vru_cl->vruc_path,  vru_cl->vruc_num_fds_sent);
                       vru_cl->vruc_num_fds_sent = VHOST_MEMORY_MAX_NREGIONS;
                   }

                   memcpy(vru_cl->vruc_fds_sent, CMSG_DATA(cmsg),
                          vru_cl->vruc_num_fds_sent*sizeof(int));
            }

            vru_cl->vruc_msg_bytes_read = ret;
            if (ret < VHOST_USER_HSIZE) {
                ret = 0;
                goto cleanup;
            }

            read_len = vru_cl->vruc_msg.size;
        } else {
            /*
             * recvmsg returned 0, so return error.
             */
            vr_uvhost_log("Receive returned %d in vhost server for client %s\n",
                          ret, vru_cl->vruc_path);
            ret = -1;
            goto cleanup;
        }
    } else if (vru_cl->vruc_msg_bytes_read < VHOST_USER_HSIZE) {
        read_len = VHOST_USER_HSIZE - vru_cl->vruc_msg_bytes_read;
    } else {
        read_len = vru_cl->vruc_msg.size -
                       (vru_cl->vruc_msg_bytes_read - VHOST_USER_HSIZE);
    }

    if (read_len) {
        if (vru_cl->vruc_owner != pthread_self()) {
            if (vru_cl->vruc_owner)
                RTE_LOG(WARNING, UVHOST, "WARNING: thread %lx is trying to read"
                    " uvhost client FD %d owned by thread %lx\n",
                    pthread_self(), fd, vru_cl->vruc_owner);
            vru_cl->vruc_owner = pthread_self();
        }
        ret = read(fd, (((char *)&vru_cl->vruc_msg) + vru_cl->vruc_msg_bytes_read),
                   read_len);
#ifdef VR_DPDK_RX_PKT_DUMP
        if (ret > 0) {
            RTE_LOG(DEBUG, UVHOST, "%s[%lx]: FD %d read %d bytes\n", __func__,
                pthread_self(), fd, ret);
            rte_hexdump(stdout, "uvhost full message dump:",
                (((char *)&vru_cl->vruc_msg)),
                    ret + vru_cl->vruc_msg_bytes_read);
        } else if (ret < 0) {
            RTE_LOG(DEBUG, UVHOST, "%s[%lx]: FD %d read returned error %d: %s (%d)\n", __func__,
                pthread_self(), fd, ret, rte_strerror(errno), errno);
        }
#endif
        if (ret < 0) {
            if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
                ret = 0;
                goto cleanup;
            }

            vr_uvhost_log(
                "Error: read returned %d, %d %d %d in vhost server for client %s\n",
                ret, errno, read_len,
                vru_cl->vruc_msg_bytes_read, vru_cl->vruc_path);
            ret = -1;
            goto cleanup;
        } else if (ret == 0) {
             vr_uvhost_log("Read returned %d in vhost server for client %s\n",
                           ret, vru_cl->vruc_path);
            ret = -1;
            goto cleanup;
        }

        vru_cl->vruc_msg_bytes_read += ret;
        if (vru_cl->vruc_msg_bytes_read < VHOST_USER_HSIZE) {
            ret = 0;
            goto cleanup;
        }

        if (vru_cl->vruc_msg_bytes_read <
                (vru_cl->vruc_msg.size + VHOST_USER_HSIZE)) {
            ret = 0;
            goto cleanup;
        }
    }

    ret = vr_uvh_cl_call_handler(vru_cl);
    if (ret < 0) {
        vr_uvhost_log("Error handling message %d client %s\n",
                      vru_cl->vruc_msg.request, vru_cl->vruc_path);
        ret = -1;
        goto cleanup;
    }

    ret = vr_uvh_cl_send_reply(fd, vru_cl);
    if (ret < 0) {
        vr_uvhost_log("Error sending reply for message %d client %s\n",
                      vru_cl->vruc_msg.request, vru_cl->vruc_path);
        ret = -1;
        goto cleanup;
    }

cleanup:
    err = errno;
    /* close all the FDs received */
    for (i = 0; i < vru_cl->vruc_num_fds_sent; i++) {
        if (vru_cl->vruc_fds_sent[i] > 0)
            close(vru_cl->vruc_fds_sent[i]);
    }
    if (ret == -1) {
        /* set VQ_NOT_READY state to vif's queues. */
        for (i = 0; i < VR_DPDK_VIRTIO_MAX_QUEUES; i++) {
            vr_dpdk_virtio_rxqs[vru_cl->vruc_idx][i].vdv_ready_state = VQ_NOT_READY;
            vr_dpdk_virtio_txqs[vru_cl->vruc_idx][i].vdv_ready_state = VQ_NOT_READY;
        }
        rte_wmb();
        synchronize_rcu();
        /*
        * Unmaps qemu's FDs.
        */
        vr_dpdk_virtio_uvh_vif_munmap(&vr_dpdk_virtio_uvh_vif_mmap[vru_cl->vruc_idx]);
    }
    /* clear state for next message from this client. */
    vru_cl->vruc_msg_bytes_read = 0;
    memset(&vru_cl->vruc_msg, 0, sizeof(vru_cl->vruc_msg));
    memset(vru_cl->vruc_cmsg, 0, sizeof(vru_cl->vruc_cmsg));
    memset(vru_cl->vruc_fds_sent, 0, sizeof(vru_cl->vruc_fds_sent));
    vru_cl->vruc_num_fds_sent = 0;
    errno = err;
    return ret;
}
Exemplo n.º 15
0
/**
 * DPDK callback to change the MTU.
 *
 * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be
 * received). Use this as a hint to enable/disable scattered packets support
 * and improve performance when not needed.
 * Since failure is not an option, reconfiguring queues on the fly is not
 * recommended.
 *
 * @param dev
 *   Pointer to Ethernet device structure.
 * @param in_mtu
 *   New MTU.
 *
 * @return
 *   0 on success, negative errno value on failure.
 */
int
mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
{
	struct priv *priv = dev->data->dev_private;
	int ret = 0;
	unsigned int i;
	uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
		mlx5_rx_burst;

	if (mlx5_is_secondary())
		return -E_RTE_SECONDARY;

	priv_lock(priv);
	/* Set kernel interface MTU first. */
	if (priv_set_mtu(priv, mtu)) {
		ret = errno;
		WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
		     strerror(ret));
		goto out;
	} else
		DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
	priv->mtu = mtu;
	/* Temporarily replace RX handler with a fake one, assuming it has not
	 * been copied elsewhere. */
	dev->rx_pkt_burst = removed_rx_burst;
	/* Make sure everyone has left mlx5_rx_burst() and uses
	 * removed_rx_burst() instead. */
	rte_wmb();
	usleep(1000);
	/* Reconfigure each RX queue. */
	for (i = 0; (i != priv->rxqs_n); ++i) {
		struct rxq *rxq = (*priv->rxqs)[i];
		unsigned int max_frame_len;
		int sp;

		if (rxq == NULL)
			continue;
		/* Calculate new maximum frame length according to MTU and
		 * toggle scattered support (sp) if necessary. */
		max_frame_len = (priv->mtu + ETHER_HDR_LEN +
				 (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
		sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM));
		/* Provide new values to rxq_setup(). */
		dev->data->dev_conf.rxmode.jumbo_frame = sp;
		dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
		ret = rxq_rehash(dev, rxq);
		if (ret) {
			/* Force SP RX if that queue requires it and abort. */
			if (rxq->sp)
				rx_func = mlx5_rx_burst_sp;
			break;
		}
		/* Scattered burst function takes priority. */
		if (rxq->sp)
			rx_func = mlx5_rx_burst_sp;
	}
	/* Burst functions can now be called again. */
	rte_wmb();
	dev->rx_pkt_burst = rx_func;
out:
	priv_unlock(priv);
	assert(ret >= 0);
	return -ret;
}
Exemplo n.º 16
0
/*
 * dpdk_virtio_from_vm_rx - receive packets from a virtio client so that
 * the packets can be handed to vrouter for forwarding. the virtio client is
 * usually a VM.
 *
 * Returns the number of packets received from the virtio.
 */
static int
dpdk_virtio_from_vm_rx(void *port, struct rte_mbuf **pkts, uint32_t max_pkts)
{
    struct dpdk_virtio_reader *p = (struct dpdk_virtio_reader *)port;
    vr_dpdk_virtioq_t *vq = p->rx_virtioq;
    uint16_t vq_hard_avail_idx, i;
    uint16_t avail_pkts, next_desc_idx, next_avail_idx;
    struct vring_desc *desc;
    char *pkt_addr, *tail_addr;
    struct rte_mbuf *mbuf;
    uint32_t pkt_len, nb_pkts = 0;
    vr_uvh_client_t *vru_cl;

    if (unlikely(vq->vdv_ready_state == VQ_NOT_READY)) {
        DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p is not ready\n",
                __func__, vq);
        return 0;
    }

    vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx);
    if (unlikely(vru_cl == NULL))
        return 0;

    vq_hard_avail_idx = (*((volatile uint16_t *)&vq->vdv_avail->idx));

    /* Unsigned subtraction gives the right result even with wrap around. */
    avail_pkts = vq_hard_avail_idx - vq->vdv_last_used_idx;
    avail_pkts = RTE_MIN(avail_pkts, max_pkts);
    if (unlikely(avail_pkts == 0)) {
        DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p has no packets\n",
                    __func__, vq);
        return 0;
    }

    DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p AVAILABLE %u packets\n",
            __func__, vq, avail_pkts);
    for (i = 0; i < avail_pkts; i++) {
        /* Allocate a mbuf. */
        mbuf = rte_pktmbuf_alloc(vr_dpdk.rss_mempool);
        if (unlikely(mbuf == NULL)) {
            p->nb_nombufs++;
            DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p no_mbufs=%"PRIu64"\n",
                    __func__, vq, p->nb_nombufs);
            break;
        }

        next_avail_idx = (vq->vdv_last_used_idx + i) & (vq->vdv_size - 1);
        next_desc_idx = vq->vdv_avail->ring[next_avail_idx];
        /*
         * Move the (chain of) descriptors to the used list. The used
         * index will, however, only be updated at the end of the loop.
         */
        vq->vdv_used->ring[next_avail_idx].id = next_desc_idx;
        vq->vdv_used->ring[next_avail_idx].len = 0;

        desc = &vq->vdv_desc[next_desc_idx];
        pkt_len = desc->len;
        pkt_addr = vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        /* Check the descriptor is sane. */
        if (unlikely(desc->len < sizeof(struct virtio_net_hdr) ||
                desc->addr == 0 || pkt_addr == NULL)) {
            goto free_mbuf;
        }
        /* Now pkt_addr points to the virtio_net_hdr. */

        if (((struct virtio_net_hdr *)pkt_addr)->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
                mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;

        /* Skip virtio_net_hdr as we don't support mergeable receive buffers. */
        if (likely(desc->flags & VRING_DESC_F_NEXT &&
                pkt_len == sizeof(struct virtio_net_hdr))) {
            DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p pkt %u F_NEXT\n",
                __func__, vq, i);
            desc = &vq->vdv_desc[desc->next];
            pkt_len = desc->len;
            pkt_addr = vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
        } else {
            DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p pkt %u no F_NEXT\n",
                __func__, vq, i);
            pkt_addr += sizeof(struct virtio_net_hdr);
            pkt_len -= sizeof(struct virtio_net_hdr);
        }
        /* Now pkt_addr points to the packet data. */

        tail_addr = rte_pktmbuf_append(mbuf, pkt_len);
        /* Check we ready to copy the data. */
        if (unlikely(desc->addr == 0 || pkt_addr == NULL ||
                tail_addr == NULL)) {
            goto free_mbuf;
        }
        /* Copy first descriptor data. */
        rte_memcpy(tail_addr, pkt_addr, pkt_len);

        /*
         * Gather mbuf from several virtio buffers. We do not support mbuf
         * chains, so all virtio buffers should fit into one mbuf.
         */
        while (unlikely(desc->flags & VRING_DESC_F_NEXT)) {
            desc = &vq->vdv_desc[desc->next];
            pkt_len = desc->len;
            pkt_addr = vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr);
            tail_addr = rte_pktmbuf_append(mbuf, pkt_len);
            /* Check we ready to copy the data. */
            if (unlikely(desc->addr == 0 || pkt_addr == NULL ||
                    tail_addr == NULL)) {
                goto free_mbuf;
            }
            /* Append next descriptor(s) data. */
            rte_memcpy(tail_addr, pkt_addr, pkt_len);
        }

        pkts[nb_pkts] = mbuf;
        nb_pkts++;
        continue;

    free_mbuf:
        DPDK_VIRTIO_READER_STATS_PKTS_DROP_ADD(p, 1);
        rte_pktmbuf_free(mbuf);
    }

    /*
     * Do not call the guest if there are no descriptors processed.
     *
     * If there are no free mbufs on host, the TX queue in guest gets
     * filled up. This makes the guest kernel to switch to interrupt mode
     * and clear the VRING_AVAIL_F_NO_INTERRUPT flag.
     *
     * Meanwhile the host polls the virtio queue, sees the available
     * descriptors and interrupts the guest. Those interrupts get unhandled by
     * the guest virtio driver, so after 100K of the interrupts the IRQ get
     * reported and disabled by the guest kernel.
     */
    if (likely(i > 0)) {
        vq->vdv_last_used_idx += i;
        rte_wmb();
        vq->vdv_used->idx += i;
        RTE_LOG(DEBUG, VROUTER,
                "%s: vif %d vq %p last_used_idx %d used->idx %u avail->idx %u\n",
                __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx,
                vq->vdv_used->idx, vq->vdv_avail->idx);

        /* Call guest if required. */
        if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) {
            p->nb_syscalls++;
            eventfd_write(vq->vdv_callfd, 1);
        }
    }

    DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p RETURNS %u pkts\n",
            __func__, vq, nb_pkts);

    DPDK_VIRTIO_READER_STATS_PKTS_IN_ADD(p, nb_pkts);

    return nb_pkts;
}