/* Release ethdev TX queue */ static void dpdk_ethdev_tx_queue_release(unsigned lcore_id, struct vr_interface *vif) { int i; struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id]; struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif->vif_idx]; struct vr_dpdk_queue_params *tx_queue_params = &lcore->lcore_tx_queue_params[vif->vif_idx]; /* remove queue params from the list of bonds to TX */ for (i = 0; i < lcore->lcore_nb_bonds_to_tx; i++) { if (likely(lcore->lcore_bonds_to_tx[i] == tx_queue_params)) { lcore->lcore_bonds_to_tx[i] = NULL; lcore->lcore_nb_bonds_to_tx--; RTE_VERIFY(lcore->lcore_nb_bonds_to_tx <= VR_DPDK_MAX_BONDS); /* copy the last element to the empty spot */ lcore->lcore_bonds_to_tx[i] = lcore->lcore_bonds_to_tx[lcore->lcore_nb_bonds_to_tx]; break; } } tx_queue->txq_ops.f_tx = NULL; rte_wmb(); /* flush and free the queue */ if (tx_queue->txq_ops.f_free(tx_queue->q_queue_h)) { RTE_LOG(ERR, VROUTER, " error freeing lcore %u eth device TX queue\n", lcore_id); } /* reset the queue */ vrouter_put_interface(tx_queue->q_vif); memset(tx_queue, 0, sizeof(*tx_queue)); memset(tx_queue_params, 0, sizeof(*tx_queue_params)); }
/** * DPDK callback to close the device. * * Destroy all queues and objects, free memory. * * @param dev * Pointer to Ethernet device structure. */ static void mlx4_dev_close(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; unsigned int i; DEBUG("%p: closing device \"%s\"", (void *)dev, ((priv->ctx != NULL) ? priv->ctx->device->name : "")); dev->rx_pkt_burst = mlx4_rx_burst_removed; dev->tx_pkt_burst = mlx4_tx_burst_removed; rte_wmb(); mlx4_flow_clean(priv); mlx4_rss_deinit(priv); for (i = 0; i != dev->data->nb_rx_queues; ++i) mlx4_rx_queue_release(dev->data->rx_queues[i]); for (i = 0; i != dev->data->nb_tx_queues; ++i) mlx4_tx_queue_release(dev->data->tx_queues[i]); mlx4_mr_release(dev); if (priv->pd != NULL) { assert(priv->ctx != NULL); claim_zero(mlx4_glue->dealloc_pd(priv->pd)); claim_zero(mlx4_glue->close_device(priv->ctx)); } else assert(priv->ctx == NULL); mlx4_intr_uninstall(priv); memset(priv, 0, sizeof(*priv)); }
void set_burst_fn(struct rte_eth_dev *dev, int force_safe) { struct sub_device *sdev; uint8_t i; int need_safe; int safe_set; need_safe = force_safe; FOREACH_SUBDEV(sdev, i, dev) need_safe |= fs_rx_unsafe(sdev); safe_set = (dev->rx_pkt_burst == &failsafe_rx_burst); if (need_safe && !safe_set) { DEBUG("Using safe RX bursts%s", (force_safe ? " (forced)" : "")); dev->rx_pkt_burst = &failsafe_rx_burst; } else if (!need_safe && safe_set) { DEBUG("Using fast RX bursts"); dev->rx_pkt_burst = &failsafe_rx_burst_fast; } need_safe = force_safe || fs_tx_unsafe(TX_SUBDEV(dev)); safe_set = (dev->tx_pkt_burst == &failsafe_tx_burst); if (need_safe && !safe_set) { DEBUG("Using safe TX bursts%s", (force_safe ? " (forced)" : "")); dev->tx_pkt_burst = &failsafe_tx_burst; } else if (!need_safe && safe_set) { DEBUG("Using fast TX bursts"); dev->tx_pkt_burst = &failsafe_tx_burst_fast; } rte_wmb(); }
/* Init eth TX queue */ struct vr_dpdk_queue * vr_dpdk_ethdev_tx_queue_init(unsigned lcore_id, struct vr_interface *vif, unsigned queue_or_lcore_id) { uint8_t port_id; uint16_t tx_queue_id = queue_or_lcore_id; unsigned int vif_idx = vif->vif_idx, dpdk_queue_index; const unsigned int socket_id = rte_lcore_to_socket_id(lcore_id); struct vr_dpdk_ethdev *ethdev; struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id]; struct vr_dpdk_queue *tx_queue; struct vr_dpdk_queue_params *tx_queue_params; ethdev = (struct vr_dpdk_ethdev *)vif->vif_os; port_id = ethdev->ethdev_port_id; if (lcore->lcore_hw_queue_to_dpdk_index[vif->vif_idx]) { dpdk_queue_index = lcore->lcore_hw_queue_to_dpdk_index[vif->vif_idx][tx_queue_id]; } else { dpdk_queue_index = 0; } tx_queue = &lcore->lcore_tx_queues[vif_idx][dpdk_queue_index]; tx_queue_params = &lcore->lcore_tx_queue_params[vif_idx][dpdk_queue_index]; /* init queue */ tx_queue->txq_ops = rte_port_ethdev_writer_ops; tx_queue->q_queue_h = NULL; tx_queue->q_vif = vrouter_get_interface(vif->vif_rid, vif_idx); /* create the queue */ struct rte_port_ethdev_writer_params writer_params = { .port_id = port_id, .queue_id = tx_queue_id, .tx_burst_sz = VR_DPDK_TX_BURST_SZ, }; tx_queue->q_queue_h = tx_queue->txq_ops.f_create(&writer_params, socket_id); if (tx_queue->q_queue_h == NULL) { RTE_LOG(ERR, VROUTER, " error creating eth device %" PRIu8 " TX queue %" PRIu16 "\n", port_id, tx_queue_id); return NULL; } /* store queue params */ tx_queue_params->qp_release_op = &dpdk_ethdev_tx_queue_release; tx_queue_params->qp_ethdev.queue_id = tx_queue_id; tx_queue_params->qp_ethdev.port_id = port_id; /* for the queue 0 add queue params to the list of bonds to TX */ if (ethdev->ethdev_nb_slaves > 0 && tx_queue_id == 0) { /* make sure queue params have been stored */ rte_wmb(); lcore->lcore_bonds_to_tx[lcore->lcore_nb_bonds_to_tx++] = tx_queue_params; RTE_VERIFY(lcore->lcore_nb_bonds_to_tx <= VR_DPDK_MAX_BONDS); } return tx_queue; }
static int i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq) { struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi); struct i40e_hmc_obj_rxq rx_ctx; int err = I40E_SUCCESS; memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq)); /* Init the RX queue in hardware */ rx_ctx.dbuff = I40E_RXBUF_SZ_1024 >> I40E_RXQ_CTX_DBUFF_SHIFT; rx_ctx.hbuff = 0; rx_ctx.base = rxq->rx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT; rx_ctx.qlen = rxq->nb_rx_desc; #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC rx_ctx.dsize = 1; #endif rx_ctx.dtype = i40e_header_split_none; rx_ctx.hsplit_0 = I40E_HEADER_SPLIT_NONE; rx_ctx.rxmax = ETHER_MAX_LEN; rx_ctx.tphrdesc_ena = 1; rx_ctx.tphwdesc_ena = 1; rx_ctx.tphdata_ena = 1; rx_ctx.tphhead_ena = 1; rx_ctx.lrxqthresh = 2; rx_ctx.crcstrip = 0; rx_ctx.l2tsel = 1; rx_ctx.showiv = 0; rx_ctx.prefena = 1; err = i40e_clear_lan_rx_queue_context(hw, rxq->reg_idx); if (err != I40E_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to clear FDIR RX queue context."); return err; } err = i40e_set_lan_rx_queue_context(hw, rxq->reg_idx, &rx_ctx); if (err != I40E_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to set FDIR RX queue context."); return err; } rxq->qrx_tail = hw->hw_addr + I40E_QRX_TAIL(rxq->vsi->base_queue); rte_wmb(); /* Init the RX tail regieter. */ I40E_PCI_REG_WRITE(rxq->qrx_tail, 0); I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1); return err; }
/* * Create a scheduler on the current lcore */ struct lthread_sched *_lthread_sched_create(size_t stack_size) { int status; struct lthread_sched *new_sched; unsigned lcoreid = rte_lcore_id(); RTE_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE); if (stack_size == 0) stack_size = LTHREAD_MAX_STACK_SIZE; new_sched = rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched), RTE_CACHE_LINE_SIZE, rte_socket_id()); if (new_sched == NULL) { RTE_LOG(CRIT, LTHREAD, "Failed to allocate memory for scheduler\n"); return NULL; } _lthread_key_pool_init(); new_sched->stack_size = stack_size; new_sched->birth = rte_rdtsc(); THIS_SCHED = new_sched; status = _lthread_sched_alloc_resources(new_sched); if (status != SCHED_ALLOC_OK) { RTE_LOG(CRIT, LTHREAD, "Failed to allocate resources for scheduler code = %d\n", status); rte_free(new_sched); return NULL; } bzero(&new_sched->ctx, sizeof(struct ctx)); new_sched->lcore_id = lcoreid; schedcore[lcoreid] = new_sched; new_sched->run_flag = 1; DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0); rte_wmb(); return new_sched; }
/** * DPDK callback to stop the device. * * Simulate device stop by detaching all configured flows. * * @param dev * Pointer to Ethernet device structure. */ static void mlx4_dev_stop(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; if (!priv->started) return; DEBUG("%p: detaching flows from all RX queues", (void *)dev); priv->started = 0; dev->tx_pkt_burst = mlx4_tx_burst_removed; dev->rx_pkt_burst = mlx4_rx_burst_removed; rte_wmb(); mlx4_flow_sync(priv, NULL); mlx4_rxq_intr_disable(priv); mlx4_rss_deinit(priv); }
/* * vr_dpdk_virtio_stop - stop the virtio interface. * * Returns 0 on success, -1 otherwise. */ int vr_dpdk_virtio_stop(unsigned int vif_idx) { int i; if (vif_idx >= VR_MAX_INTERFACES) { return -1; } /* Disable and reset all the virtio queues. */ for (i = 0; i < VR_DPDK_VIRTIO_MAX_QUEUES*2; i++) { vr_dpdk_set_virtq_ready(vif_idx, i, VQ_NOT_READY); } rte_wmb(); synchronize_rcu(); return 0; }
/** * DPDK callback to start the device. * * Simulate device start by initializing common RSS resources and attaching * all configured flows. * * @param dev * Pointer to Ethernet device structure. * * @return * 0 on success, negative errno value otherwise and rte_errno is set. */ static int mlx4_dev_start(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; struct rte_flow_error error; int ret; if (priv->started) return 0; DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); priv->started = 1; ret = mlx4_rss_init(priv); if (ret) { ERROR("%p: cannot initialize RSS resources: %s", (void *)dev, strerror(-ret)); goto err; } #ifndef NDEBUG mlx4_mr_dump_dev(dev); #endif ret = mlx4_rxq_intr_enable(priv); if (ret) { ERROR("%p: interrupt handler installation failed", (void *)dev); goto err; } ret = mlx4_flow_sync(priv, &error); if (ret) { ERROR("%p: cannot attach flow rules (code %d, \"%s\")," " flow error type %d, cause %p, message: %s", (void *)dev, -ret, strerror(-ret), error.type, error.cause, error.message ? error.message : "(unspecified)"); goto err; } rte_wmb(); dev->tx_pkt_burst = mlx4_tx_burst; dev->rx_pkt_burst = mlx4_rx_burst; return 0; err: mlx4_dev_stop(dev); return ret; }
/* * vr_dpdk_knidev_release - release KNI interface and remove it from the * global list. * Returns 0 on success, < 0 otherwise. */ int vr_dpdk_knidev_release(struct vr_interface *vif) { int i; struct rte_kni *kni = vif->vif_os; vif->vif_os = NULL; /* delete the interface from the table of KNIs */ for (i = 0; i < VR_DPDK_MAX_KNI_INTERFACES; i++) { if (vr_dpdk.knis[i] == kni) { vr_dpdk.knis[i] = NULL; break; } } rte_wmb(); return rte_kni_release(kni); }
/* * vr_dpdk_virtio_get_vring_base - gets the vring base for the specified vring * sent by the vhost client. * * Returns 0 on success, -1 otherwise. */ int vr_dpdk_virtio_get_vring_base(unsigned int vif_idx, unsigned int vring_idx, unsigned int *vring_basep) { vr_dpdk_virtioq_t *vq; if ((vif_idx >= VR_MAX_INTERFACES) || (vring_idx >= (2 * VR_DPDK_VIRTIO_MAX_QUEUES))) { return -1; } /* * RX rings are even numbered and TX rings are odd numbered from the * VM's point of view. From vrouter's point of view, VM's TX ring is * vrouter's RX ring and vice versa. */ if (vring_idx & 1) { vq = &vr_dpdk_virtio_rxqs[vif_idx][vring_idx/2]; } else { vq = &vr_dpdk_virtio_txqs[vif_idx][vring_idx/2]; } *vring_basep = vq->vdv_last_used_idx; /* * This is usually called when qemu shuts down a virtio queue. Set the * state to indicate that this queue should not be used any more. */ vq->vdv_ready_state = VQ_NOT_READY; rte_wmb(); synchronize_rcu(); /* Reset the queue. We reset only those values we analyze in * uvhm_check_vring_ready() */ vq->vdv_desc = NULL; if (vq->vdv_callfd) { close(vq->vdv_callfd); vq->vdv_callfd = 0; } return 0; }
/* * dpdk_virtio_tx_queue_release - releases a virtio TX queue. * * Returns nothing. */ static void dpdk_virtio_tx_queue_release(unsigned lcore_id, struct vr_interface *vif) { struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id]; struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif->vif_idx]; struct vr_dpdk_queue_params *tx_queue_params = &lcore->lcore_tx_queue_params[vif->vif_idx]; tx_queue->txq_ops.f_tx = NULL; rte_wmb(); /* flush and free the queue */ if (tx_queue->txq_ops.f_free(tx_queue->q_queue_h)) { RTE_LOG(ERR, VROUTER, " error freeing lcore %u virtio device TX queue\n", lcore_id); } /* reset the queue */ vrouter_put_interface(tx_queue->q_vif); memset(tx_queue, 0, sizeof(*tx_queue)); memset(tx_queue_params, 0, sizeof(*tx_queue_params)); }
/* Init KNI RX queue */ struct vr_dpdk_queue * vr_dpdk_kni_rx_queue_init(unsigned lcore_id, struct vr_interface *vif, unsigned host_lcore_id) { struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id]; const unsigned socket_id = rte_lcore_to_socket_id(lcore_id); uint8_t port_id = 0; unsigned vif_idx = vif->vif_idx; struct vr_dpdk_queue *rx_queue = &lcore->lcore_rx_queues[vif_idx]; struct vr_dpdk_queue_params *rx_queue_params = &lcore->lcore_rx_queue_params[vif_idx]; if (vif->vif_type == VIF_TYPE_HOST) { port_id = (((struct vr_dpdk_ethdev *)(vif->vif_bridge->vif_os))-> ethdev_port_id); } /* init queue */ rx_queue->rxq_ops = dpdk_knidev_reader_ops; rx_queue->q_queue_h = NULL; rx_queue->q_vif = vrouter_get_interface(vif->vif_rid, vif_idx); /* create the queue */ struct dpdk_knidev_reader_params reader_params = { .kni = vif->vif_os, }; rx_queue->q_queue_h = rx_queue->rxq_ops.f_create(&reader_params, socket_id); if (rx_queue->q_queue_h == NULL) { RTE_LOG(ERR, VROUTER, " error creating KNI device %s RX queue" " at eth device %" PRIu8 "\n", vif->vif_name, port_id); return NULL; } /* store queue params */ rx_queue_params->qp_release_op = &dpdk_kni_rx_queue_release; return rx_queue; } /* Release KNI TX queue */ static void dpdk_kni_tx_queue_release(unsigned lcore_id, struct vr_interface *vif) { struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id]; struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif->vif_idx]; struct vr_dpdk_queue_params *tx_queue_params = &lcore->lcore_tx_queue_params[vif->vif_idx]; tx_queue->txq_ops.f_tx = NULL; rte_wmb(); /* flush and free the queue */ if (tx_queue->txq_ops.f_free(tx_queue->q_queue_h)) { RTE_LOG(ERR, VROUTER, " error freeing lcore %u KNI device TX queue\n", lcore_id); } /* reset the queue */ vrouter_put_interface(tx_queue->q_vif); memset(tx_queue, 0, sizeof(*tx_queue)); memset(tx_queue_params, 0, sizeof(*tx_queue_params)); } /* Init KNI TX queue */ struct vr_dpdk_queue * vr_dpdk_kni_tx_queue_init(unsigned lcore_id, struct vr_interface *vif, unsigned host_lcore_id) { struct vr_dpdk_lcore *lcore = vr_dpdk.lcores[lcore_id]; const unsigned socket_id = rte_lcore_to_socket_id(lcore_id); uint8_t port_id = 0; unsigned vif_idx = vif->vif_idx; struct vr_dpdk_queue *tx_queue = &lcore->lcore_tx_queues[vif_idx]; struct vr_dpdk_queue_params *tx_queue_params = &lcore->lcore_tx_queue_params[vif_idx]; struct vr_dpdk_ethdev *ethdev; if (vif->vif_type == VIF_TYPE_HOST) { ethdev = vif->vif_bridge->vif_os; if (ethdev == NULL) { RTE_LOG(ERR, VROUTER, " error creating KNI device %s TX queue:" " bridge vif %u ethdev is not initialized\n", vif->vif_name, vif->vif_bridge->vif_idx); return NULL; } port_id = ethdev->ethdev_port_id; } /* init queue */ tx_queue->txq_ops = dpdk_knidev_writer_ops; tx_queue->q_queue_h = NULL; tx_queue->q_vif = vrouter_get_interface(vif->vif_rid, vif_idx); /* create the queue */ struct dpdk_knidev_writer_params writer_params = { .kni = vif->vif_os, .tx_burst_sz = VR_DPDK_TX_BURST_SZ, }; tx_queue->q_queue_h = tx_queue->txq_ops.f_create(&writer_params, socket_id); if (tx_queue->q_queue_h == NULL) { RTE_LOG(ERR, VROUTER, " error creating KNI device %s TX queue" " at eth device %" PRIu8 "\n", vif->vif_name, port_id); return NULL; } /* store queue params */ tx_queue_params->qp_release_op = &dpdk_kni_tx_queue_release; return tx_queue; } /* Change KNI MTU size callback */ static int dpdk_knidev_change_mtu(uint8_t port_id, unsigned new_mtu) { struct vrouter *router = vrouter_get(0); struct vr_interface *vif; int i, ret; uint8_t ethdev_port_id, slave_port_id; struct vr_dpdk_ethdev *ethdev = NULL; RTE_LOG(INFO, VROUTER, "Changing eth device %" PRIu8 " MTU to %u\n", port_id, new_mtu); if (port_id >= rte_eth_dev_count()) { RTE_LOG(ERR, VROUTER, "Error changing eth device %"PRIu8" MTU: invalid eth device\n", port_id); return -EINVAL; } /* * TODO: DPDK bond PMD does not implement mtu_set op, so we need to * set the MTU manually for all the slaves. */ /* Bond vif uses first slave port ID. */ if (router->vr_eth_if) { ethdev = (struct vr_dpdk_ethdev *)router->vr_eth_if->vif_os; if (ethdev && ethdev->ethdev_nb_slaves > 0) { for (i = 0; i < ethdev->ethdev_nb_slaves; i++) { if (port_id == ethdev->ethdev_slaves[i]) break; } /* Clear ethdev if no port match. */ if (i >= ethdev->ethdev_nb_slaves) ethdev = NULL; } } if (ethdev && ethdev->ethdev_nb_slaves > 0) { for (i = 0; i < ethdev->ethdev_nb_slaves; i++) { slave_port_id = ethdev->ethdev_slaves[i]; RTE_LOG(INFO, VROUTER, " changing bond member eth device %" PRIu8 " MTU to %u\n", slave_port_id, new_mtu); ret = rte_eth_dev_set_mtu(slave_port_id, new_mtu); if (ret < 0) { RTE_LOG(ERR, VROUTER, " error changing bond member eth device %" PRIu8 " MTU: %s (%d)\n", slave_port_id, rte_strerror(-ret), -ret); return ret; } } } else { ret = rte_eth_dev_set_mtu(port_id, new_mtu); if (ret < 0) { RTE_LOG(ERR, VROUTER, "Error changing eth device %" PRIu8 " MTU: %s (%d)\n", port_id, rte_strerror(-ret), -ret); } return ret; } /* On success, inform vrouter about new MTU */ for (i = 0; i < router->vr_max_interfaces; i++) { vif = __vrouter_get_interface(router, i); if (vif && (vif->vif_type == VIF_TYPE_PHYSICAL)) { ethdev_port_id = (((struct vr_dpdk_ethdev *)(vif->vif_os))-> ethdev_port_id); if (ethdev_port_id == port_id) { /* Ethernet header size */ new_mtu += sizeof(struct vr_eth); if (vr_dpdk.vlan_tag != VLAN_ID_INVALID) { /* 802.1q header size */ new_mtu += sizeof(uint32_t); } vif->vif_mtu = new_mtu; if (vif->vif_bridge) vif->vif_bridge->vif_mtu = new_mtu; } } } return 0; } /* Configure KNI state callback */ static int dpdk_knidev_config_network_if(uint8_t port_id, uint8_t if_up) { int ret = 0; RTE_LOG(INFO, VROUTER, "Configuring eth device %" PRIu8 " %s\n", port_id, if_up ? "UP" : "DOWN"); if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) { RTE_LOG(ERR, VROUTER, "Invalid eth device %" PRIu8 "\n", port_id); return -EINVAL; } if (if_up) ret = rte_eth_dev_start(port_id); else rte_eth_dev_stop(port_id); if (ret < 0) { RTE_LOG(ERR, VROUTER, "Configuring eth device %" PRIu8 " UP" "failed (%d)\n", port_id, ret); } return ret; } /* Init KNI */ int vr_dpdk_knidev_init(uint8_t port_id, struct vr_interface *vif) { int i; struct rte_eth_dev_info dev_info; struct rte_kni_conf kni_conf; struct rte_kni_ops kni_ops; struct rte_kni *kni; struct rte_config *rte_conf = rte_eal_get_configuration(); if (!vr_dpdk.kni_inited) { /* * If the host does not support KNIs (i.e. RedHat), we'll get * a panic here. */ rte_kni_init(VR_DPDK_MAX_KNI_INTERFACES); vr_dpdk.kni_inited = true; } /* get eth device info */ memset(&dev_info, 0, sizeof(dev_info)); rte_eth_dev_info_get(port_id, &dev_info); /* create KNI configuration */ memset(&kni_conf, 0, sizeof(kni_conf)); strncpy(kni_conf.name, (char *)vif->vif_name, sizeof(kni_conf.name) - 1); kni_conf.addr = dev_info.pci_dev->addr; kni_conf.id = dev_info.pci_dev->id; kni_conf.group_id = port_id; kni_conf.mbuf_size = VR_DPDK_MAX_PACKET_SZ; /* * Due to DPDK commit 41a6ebd, now to prevent packet reordering in KNI * we have to bind KNI kernel thread to a first online unused CPU. */ for (i = 0; i < RTE_MAX_LCORE; i++) { if (lcore_config[i].detected && rte_conf->lcore_role[VR_DPDK_FWD_LCORE_ID + i] == ROLE_OFF) { kni_conf.force_bind = 1; kni_conf.core_id = i; RTE_LOG(INFO, VROUTER, " bind KNI kernel thread to CPU %d\n", i); break; } } /* KNI options * * Changing state of the KNI interface can change state of the physical * interface. This is useful for the vhost, but not for the VLAN * forwarding interface. */ if (vif->vif_type == VIF_TYPE_VLAN) { memset(&kni_ops, 0, sizeof(kni_ops)); } else { kni_ops.port_id = port_id; kni_ops.change_mtu = dpdk_knidev_change_mtu; kni_ops.config_network_if = dpdk_knidev_config_network_if; } /* allocate KNI device */ kni = rte_kni_alloc(vr_dpdk.rss_mempool, &kni_conf, &kni_ops); if (kni == NULL) { RTE_LOG(ERR, VROUTER, " error allocation KNI device %s" " at eth device %" PRIu8 "\n", vif->vif_name, port_id); return -ENOMEM; } /* store pointer to KNI for further use */ vif->vif_os = kni; /* add interface to the table of KNIs */ for (i = 0; i < VR_DPDK_MAX_KNI_INTERFACES; i++) { if (vr_dpdk.knis[i] == NULL) { vr_dpdk.knis[i] = vif->vif_os; break; } } return 0; }
/* * vr_uvh_cl_msg_handler - handler for messages from user space vhost * clients. Calls the appropriate handler based on the message type. * * Returns 0 on success, -1 on error. * * TODO: upon error, this function currently makes the process exit. * Instead, it should close the socket and continue serving other clients. */ static int vr_uvh_cl_msg_handler(int fd, void *arg) { vr_uvh_client_t *vru_cl = (vr_uvh_client_t *) arg; struct msghdr mhdr; struct iovec iov; int i, err, ret = 0, read_len = 0; struct cmsghdr *cmsg; memset(&mhdr, 0, sizeof(mhdr)); if (vru_cl->vruc_msg_bytes_read == 0) { mhdr.msg_control = &vru_cl->vruc_cmsg; mhdr.msg_controllen = sizeof(vru_cl->vruc_cmsg); iov.iov_base = (void *) &vru_cl->vruc_msg; iov.iov_len = VHOST_USER_HSIZE; mhdr.msg_iov = &iov; mhdr.msg_iovlen = 1; ret = recvmsg(fd, &mhdr, MSG_DONTWAIT); if (ret < 0) { if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) { ret = 0; goto cleanup; } vr_uvhost_log("Receive returned %d in vhost server for client %s\n", ret, vru_cl->vruc_path); ret = -1; goto cleanup; } else if (ret > 0) { if (mhdr.msg_flags & MSG_CTRUNC) { vr_uvhost_log("Truncated control message from vhost client %s\n", vru_cl->vruc_path); ret = -1; goto cleanup; } cmsg = CMSG_FIRSTHDR(&mhdr); if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) && (cmsg->cmsg_type == SCM_RIGHTS)) { vru_cl->vruc_num_fds_sent = (cmsg->cmsg_len - CMSG_LEN(0))/ sizeof(int); if (vru_cl->vruc_num_fds_sent > VHOST_MEMORY_MAX_NREGIONS) { vr_uvhost_log("Too many FDs sent for client %s: %d\n", vru_cl->vruc_path, vru_cl->vruc_num_fds_sent); vru_cl->vruc_num_fds_sent = VHOST_MEMORY_MAX_NREGIONS; } memcpy(vru_cl->vruc_fds_sent, CMSG_DATA(cmsg), vru_cl->vruc_num_fds_sent*sizeof(int)); } vru_cl->vruc_msg_bytes_read = ret; if (ret < VHOST_USER_HSIZE) { ret = 0; goto cleanup; } read_len = vru_cl->vruc_msg.size; } else { /* * recvmsg returned 0, so return error. */ vr_uvhost_log("Receive returned %d in vhost server for client %s\n", ret, vru_cl->vruc_path); ret = -1; goto cleanup; } } else if (vru_cl->vruc_msg_bytes_read < VHOST_USER_HSIZE) { read_len = VHOST_USER_HSIZE - vru_cl->vruc_msg_bytes_read; } else { read_len = vru_cl->vruc_msg.size - (vru_cl->vruc_msg_bytes_read - VHOST_USER_HSIZE); } if (read_len) { if (vru_cl->vruc_owner != pthread_self()) { if (vru_cl->vruc_owner) RTE_LOG(WARNING, UVHOST, "WARNING: thread %lx is trying to read" " uvhost client FD %d owned by thread %lx\n", pthread_self(), fd, vru_cl->vruc_owner); vru_cl->vruc_owner = pthread_self(); } ret = read(fd, (((char *)&vru_cl->vruc_msg) + vru_cl->vruc_msg_bytes_read), read_len); #ifdef VR_DPDK_RX_PKT_DUMP if (ret > 0) { RTE_LOG(DEBUG, UVHOST, "%s[%lx]: FD %d read %d bytes\n", __func__, pthread_self(), fd, ret); rte_hexdump(stdout, "uvhost full message dump:", (((char *)&vru_cl->vruc_msg)), ret + vru_cl->vruc_msg_bytes_read); } else if (ret < 0) { RTE_LOG(DEBUG, UVHOST, "%s[%lx]: FD %d read returned error %d: %s (%d)\n", __func__, pthread_self(), fd, ret, rte_strerror(errno), errno); } #endif if (ret < 0) { if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) { ret = 0; goto cleanup; } vr_uvhost_log( "Error: read returned %d, %d %d %d in vhost server for client %s\n", ret, errno, read_len, vru_cl->vruc_msg_bytes_read, vru_cl->vruc_path); ret = -1; goto cleanup; } else if (ret == 0) { vr_uvhost_log("Read returned %d in vhost server for client %s\n", ret, vru_cl->vruc_path); ret = -1; goto cleanup; } vru_cl->vruc_msg_bytes_read += ret; if (vru_cl->vruc_msg_bytes_read < VHOST_USER_HSIZE) { ret = 0; goto cleanup; } if (vru_cl->vruc_msg_bytes_read < (vru_cl->vruc_msg.size + VHOST_USER_HSIZE)) { ret = 0; goto cleanup; } } ret = vr_uvh_cl_call_handler(vru_cl); if (ret < 0) { vr_uvhost_log("Error handling message %d client %s\n", vru_cl->vruc_msg.request, vru_cl->vruc_path); ret = -1; goto cleanup; } ret = vr_uvh_cl_send_reply(fd, vru_cl); if (ret < 0) { vr_uvhost_log("Error sending reply for message %d client %s\n", vru_cl->vruc_msg.request, vru_cl->vruc_path); ret = -1; goto cleanup; } cleanup: err = errno; /* close all the FDs received */ for (i = 0; i < vru_cl->vruc_num_fds_sent; i++) { if (vru_cl->vruc_fds_sent[i] > 0) close(vru_cl->vruc_fds_sent[i]); } if (ret == -1) { /* set VQ_NOT_READY state to vif's queues. */ for (i = 0; i < VR_DPDK_VIRTIO_MAX_QUEUES; i++) { vr_dpdk_virtio_rxqs[vru_cl->vruc_idx][i].vdv_ready_state = VQ_NOT_READY; vr_dpdk_virtio_txqs[vru_cl->vruc_idx][i].vdv_ready_state = VQ_NOT_READY; } rte_wmb(); synchronize_rcu(); /* * Unmaps qemu's FDs. */ vr_dpdk_virtio_uvh_vif_munmap(&vr_dpdk_virtio_uvh_vif_mmap[vru_cl->vruc_idx]); } /* clear state for next message from this client. */ vru_cl->vruc_msg_bytes_read = 0; memset(&vru_cl->vruc_msg, 0, sizeof(vru_cl->vruc_msg)); memset(vru_cl->vruc_cmsg, 0, sizeof(vru_cl->vruc_cmsg)); memset(vru_cl->vruc_fds_sent, 0, sizeof(vru_cl->vruc_fds_sent)); vru_cl->vruc_num_fds_sent = 0; errno = err; return ret; }
/** * DPDK callback to change the MTU. * * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be * received). Use this as a hint to enable/disable scattered packets support * and improve performance when not needed. * Since failure is not an option, reconfiguring queues on the fly is not * recommended. * * @param dev * Pointer to Ethernet device structure. * @param in_mtu * New MTU. * * @return * 0 on success, negative errno value on failure. */ int mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) { struct priv *priv = dev->data->dev_private; int ret = 0; unsigned int i; uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) = mlx5_rx_burst; if (mlx5_is_secondary()) return -E_RTE_SECONDARY; priv_lock(priv); /* Set kernel interface MTU first. */ if (priv_set_mtu(priv, mtu)) { ret = errno; WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, strerror(ret)); goto out; } else DEBUG("adapter port %u MTU set to %u", priv->port, mtu); priv->mtu = mtu; /* Temporarily replace RX handler with a fake one, assuming it has not * been copied elsewhere. */ dev->rx_pkt_burst = removed_rx_burst; /* Make sure everyone has left mlx5_rx_burst() and uses * removed_rx_burst() instead. */ rte_wmb(); usleep(1000); /* Reconfigure each RX queue. */ for (i = 0; (i != priv->rxqs_n); ++i) { struct rxq *rxq = (*priv->rxqs)[i]; unsigned int max_frame_len; int sp; if (rxq == NULL) continue; /* Calculate new maximum frame length according to MTU and * toggle scattered support (sp) if necessary. */ max_frame_len = (priv->mtu + ETHER_HDR_LEN + (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN)); sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM)); /* Provide new values to rxq_setup(). */ dev->data->dev_conf.rxmode.jumbo_frame = sp; dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len; ret = rxq_rehash(dev, rxq); if (ret) { /* Force SP RX if that queue requires it and abort. */ if (rxq->sp) rx_func = mlx5_rx_burst_sp; break; } /* Scattered burst function takes priority. */ if (rxq->sp) rx_func = mlx5_rx_burst_sp; } /* Burst functions can now be called again. */ rte_wmb(); dev->rx_pkt_burst = rx_func; out: priv_unlock(priv); assert(ret >= 0); return -ret; }
/* * dpdk_virtio_from_vm_rx - receive packets from a virtio client so that * the packets can be handed to vrouter for forwarding. the virtio client is * usually a VM. * * Returns the number of packets received from the virtio. */ static int dpdk_virtio_from_vm_rx(void *port, struct rte_mbuf **pkts, uint32_t max_pkts) { struct dpdk_virtio_reader *p = (struct dpdk_virtio_reader *)port; vr_dpdk_virtioq_t *vq = p->rx_virtioq; uint16_t vq_hard_avail_idx, i; uint16_t avail_pkts, next_desc_idx, next_avail_idx; struct vring_desc *desc; char *pkt_addr, *tail_addr; struct rte_mbuf *mbuf; uint32_t pkt_len, nb_pkts = 0; vr_uvh_client_t *vru_cl; if (unlikely(vq->vdv_ready_state == VQ_NOT_READY)) { DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p is not ready\n", __func__, vq); return 0; } vru_cl = vr_dpdk_virtio_get_vif_client(vq->vdv_vif_idx); if (unlikely(vru_cl == NULL)) return 0; vq_hard_avail_idx = (*((volatile uint16_t *)&vq->vdv_avail->idx)); /* Unsigned subtraction gives the right result even with wrap around. */ avail_pkts = vq_hard_avail_idx - vq->vdv_last_used_idx; avail_pkts = RTE_MIN(avail_pkts, max_pkts); if (unlikely(avail_pkts == 0)) { DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p has no packets\n", __func__, vq); return 0; } DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p AVAILABLE %u packets\n", __func__, vq, avail_pkts); for (i = 0; i < avail_pkts; i++) { /* Allocate a mbuf. */ mbuf = rte_pktmbuf_alloc(vr_dpdk.rss_mempool); if (unlikely(mbuf == NULL)) { p->nb_nombufs++; DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p no_mbufs=%"PRIu64"\n", __func__, vq, p->nb_nombufs); break; } next_avail_idx = (vq->vdv_last_used_idx + i) & (vq->vdv_size - 1); next_desc_idx = vq->vdv_avail->ring[next_avail_idx]; /* * Move the (chain of) descriptors to the used list. The used * index will, however, only be updated at the end of the loop. */ vq->vdv_used->ring[next_avail_idx].id = next_desc_idx; vq->vdv_used->ring[next_avail_idx].len = 0; desc = &vq->vdv_desc[next_desc_idx]; pkt_len = desc->len; pkt_addr = vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); /* Check the descriptor is sane. */ if (unlikely(desc->len < sizeof(struct virtio_net_hdr) || desc->addr == 0 || pkt_addr == NULL)) { goto free_mbuf; } /* Now pkt_addr points to the virtio_net_hdr. */ if (((struct virtio_net_hdr *)pkt_addr)->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD; /* Skip virtio_net_hdr as we don't support mergeable receive buffers. */ if (likely(desc->flags & VRING_DESC_F_NEXT && pkt_len == sizeof(struct virtio_net_hdr))) { DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p pkt %u F_NEXT\n", __func__, vq, i); desc = &vq->vdv_desc[desc->next]; pkt_len = desc->len; pkt_addr = vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); } else { DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p pkt %u no F_NEXT\n", __func__, vq, i); pkt_addr += sizeof(struct virtio_net_hdr); pkt_len -= sizeof(struct virtio_net_hdr); } /* Now pkt_addr points to the packet data. */ tail_addr = rte_pktmbuf_append(mbuf, pkt_len); /* Check we ready to copy the data. */ if (unlikely(desc->addr == 0 || pkt_addr == NULL || tail_addr == NULL)) { goto free_mbuf; } /* Copy first descriptor data. */ rte_memcpy(tail_addr, pkt_addr, pkt_len); /* * Gather mbuf from several virtio buffers. We do not support mbuf * chains, so all virtio buffers should fit into one mbuf. */ while (unlikely(desc->flags & VRING_DESC_F_NEXT)) { desc = &vq->vdv_desc[desc->next]; pkt_len = desc->len; pkt_addr = vr_dpdk_guest_phys_to_host_virt(vru_cl, desc->addr); tail_addr = rte_pktmbuf_append(mbuf, pkt_len); /* Check we ready to copy the data. */ if (unlikely(desc->addr == 0 || pkt_addr == NULL || tail_addr == NULL)) { goto free_mbuf; } /* Append next descriptor(s) data. */ rte_memcpy(tail_addr, pkt_addr, pkt_len); } pkts[nb_pkts] = mbuf; nb_pkts++; continue; free_mbuf: DPDK_VIRTIO_READER_STATS_PKTS_DROP_ADD(p, 1); rte_pktmbuf_free(mbuf); } /* * Do not call the guest if there are no descriptors processed. * * If there are no free mbufs on host, the TX queue in guest gets * filled up. This makes the guest kernel to switch to interrupt mode * and clear the VRING_AVAIL_F_NO_INTERRUPT flag. * * Meanwhile the host polls the virtio queue, sees the available * descriptors and interrupts the guest. Those interrupts get unhandled by * the guest virtio driver, so after 100K of the interrupts the IRQ get * reported and disabled by the guest kernel. */ if (likely(i > 0)) { vq->vdv_last_used_idx += i; rte_wmb(); vq->vdv_used->idx += i; RTE_LOG(DEBUG, VROUTER, "%s: vif %d vq %p last_used_idx %d used->idx %u avail->idx %u\n", __func__, vq->vdv_vif_idx, vq, vq->vdv_last_used_idx, vq->vdv_used->idx, vq->vdv_avail->idx); /* Call guest if required. */ if (unlikely(!(vq->vdv_avail->flags & VRING_AVAIL_F_NO_INTERRUPT))) { p->nb_syscalls++; eventfd_write(vq->vdv_callfd, 1); } } DPDK_UDEBUG(VROUTER, &vq->vdv_hash, "%s: queue %p RETURNS %u pkts\n", __func__, vq, nb_pkts); DPDK_VIRTIO_READER_STATS_PKTS_IN_ADD(p, nb_pkts); return nb_pkts; }