/** * DPDK callback to start the device. * * Simulate device start by attaching all configured flows. * * @param dev * Pointer to Ethernet device structure. * * @return * 0 on success, negative errno value on failure. */ int mlx5_dev_start(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; int err; if (mlx5_is_secondary()) return -E_RTE_SECONDARY; priv_lock(priv); if (priv->started) { priv_unlock(priv); return 0; } DEBUG("%p: allocating and configuring hash RX queues", (void *)dev); err = priv_create_hash_rxqs(priv); if (!err) err = priv_rehash_flows(priv); if (!err) priv->started = 1; else { ERROR("%p: an error occurred while configuring hash RX queues:" " %s", (void *)priv, strerror(err)); /* Rollback. */ priv_special_flow_disable_all(priv); priv_mac_addrs_disable(priv); priv_destroy_hash_rxqs(priv); } if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) priv_fdir_enable(priv); priv_dev_interrupt_handler_install(priv, dev); priv_unlock(priv); return -err; }
/** * DPDK callback to retrieve physical link information. * * @param dev * Pointer to Ethernet device structure. * @param wait_to_complete * Wait for request completion (ignored). */ int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) { struct priv *priv = mlx5_get_priv(dev); int ret; priv_lock(priv); ret = mlx5_link_update_unlocked(dev, wait_to_complete); priv_unlock(priv); return ret; }
/** * DPDK callback to get information about the device. * * @param dev * Pointer to Ethernet device structure. * @param[out] info * Info structure output buffer. */ void mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) { struct priv *priv = mlx5_get_priv(dev); unsigned int max; char ifname[IF_NAMESIZE]; priv_lock(priv); /* FIXME: we should ask the device for these values. */ info->min_rx_bufsize = 32; info->max_rx_pktlen = 65536; /* * Since we need one CQ per QP, the limit is the minimum number * between the two values. */ max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? priv->device_attr.max_qp : priv->device_attr.max_cq); /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ if (max >= 65535) max = 65535; info->max_rx_queues = max; info->max_tx_queues = max; info->max_mac_addrs = RTE_DIM(priv->mac); info->rx_offload_capa = (priv->hw_csum ? (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM) : 0); info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; if (priv->hw_csum) info->tx_offload_capa |= (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM); if (priv_get_ifname(priv, &ifname) == 0) info->if_index = if_nametoindex(ifname); /* FIXME: RETA update/query API expects the callee to know the size of * the indirection table, for this PMD the size varies depending on * the number of RX queues, it becomes impossible to find the correct * size if it is not fixed. * The API should be updated to solve this problem. */ info->reta_size = priv->ind_table_max_size; info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G | ETH_LINK_SPEED_20G | ETH_LINK_SPEED_25G | ETH_LINK_SPEED_40G | ETH_LINK_SPEED_50G | ETH_LINK_SPEED_56G | ETH_LINK_SPEED_100G; priv_unlock(priv); }
/** * DPDK callback to bring the link UP. * * @param dev * Pointer to Ethernet device structure. * * @return * 0 on success, errno value on failure. */ int mlx5_set_link_up(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; int err; priv_lock(priv); err = priv_set_link(priv, 1); priv_unlock(priv); return err; }
/** * DPDK callback to configure a VLAN filter. * * @param dev * Pointer to Ethernet device structure. * @param vlan_id * VLAN ID to filter. * @param on * Toggle filter. * * @return * 0 on success, negative errno value on failure. */ int mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) { struct priv *priv = dev->data->dev_private; int ret; priv_lock(priv); ret = vlan_filter_set(dev, vlan_id, on); priv_unlock(priv); assert(ret >= 0); return -ret; }
/** * Handle interrupts from the NIC. * * @param[in] intr_handle * Interrupt handler. * @param cb_arg * Callback argument. */ void mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) { struct rte_eth_dev *dev = cb_arg; struct priv *priv = dev->data->dev_private; int ret; (void)intr_handle; priv_lock(priv); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); if (ret) _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); }
/** * Handle delayed link status event. * * @param arg * Registered argument. */ void mlx5_dev_link_status_handler(void *arg) { struct rte_eth_dev *dev = arg; struct priv *priv = dev->data->dev_private; int ret; priv_lock(priv); assert(priv->pending_alarm == 1); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); if (ret) _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); }
/** * DPDK callback for Ethernet device configuration. * * @param dev * Pointer to Ethernet device structure. * * @return * 0 on success, negative errno value on failure. */ int mlx5_dev_configure(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; int ret; if (mlx5_is_secondary()) return -E_RTE_SECONDARY; priv_lock(priv); ret = dev_configure(dev); assert(ret >= 0); priv_unlock(priv); return -ret; }
/** * DPDK callback to modify flow control parameters. * * @param dev * Pointer to Ethernet device structure. * @param[in] fc_conf * Flow control parameters. * * @return * 0 on success, negative errno value on failure. */ int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) { struct priv *priv = dev->data->dev_private; struct ifreq ifr; struct ethtool_pauseparam ethpause = { .cmd = ETHTOOL_SPAUSEPARAM }; int ret; if (mlx5_is_secondary()) return -E_RTE_SECONDARY; ifr.ifr_data = ðpause; ethpause.autoneg = fc_conf->autoneg; if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || (fc_conf->mode & RTE_FC_RX_PAUSE)) ethpause.rx_pause = 1; else ethpause.rx_pause = 0; if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || (fc_conf->mode & RTE_FC_TX_PAUSE)) ethpause.tx_pause = 1; else ethpause.tx_pause = 0; priv_lock(priv); if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { ret = errno; WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" " failed: %s", strerror(ret)); goto out; } ret = 0; out: priv_unlock(priv); assert(ret >= 0); return -ret; }
/** * DPDK callback to close the device. * * Destroy all queues and objects, free memory. * * @param dev * Pointer to Ethernet device structure. */ static void mlx5_dev_close(struct rte_eth_dev *dev) { struct priv *priv = mlx5_get_priv(dev); unsigned int i; priv_lock(priv); DEBUG("%p: closing device \"%s\"", (void *)dev, ((priv->ctx != NULL) ? priv->ctx->device->name : "")); /* In case mlx5_dev_stop() has not been called. */ priv_dev_interrupt_handler_uninstall(priv, dev); priv_special_flow_disable_all(priv); priv_mac_addrs_disable(priv); priv_destroy_hash_rxqs(priv); /* Remove flow director elements. */ priv_fdir_disable(priv); priv_fdir_delete_filters_list(priv); /* Prevent crashes when queues are still in use. */ dev->rx_pkt_burst = removed_rx_burst; dev->tx_pkt_burst = removed_tx_burst; if (priv->rxqs != NULL) { /* XXX race condition if mlx5_rx_burst() is still running. */ usleep(1000); for (i = 0; (i != priv->rxqs_n); ++i) { struct rxq *rxq = (*priv->rxqs)[i]; struct rxq_ctrl *rxq_ctrl; if (rxq == NULL) continue; rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq); (*priv->rxqs)[i] = NULL; rxq_cleanup(rxq_ctrl); rte_free(rxq_ctrl); } priv->rxqs_n = 0; priv->rxqs = NULL; }
/** * DPDK callback to stop the device. * * Simulate device stop by detaching all configured flows. * * @param dev * Pointer to Ethernet device structure. */ void mlx5_dev_stop(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; if (mlx5_is_secondary()) return; priv_lock(priv); if (!priv->started) { priv_unlock(priv); return; } DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev); priv_special_flow_disable_all(priv); priv_mac_addrs_disable(priv); priv_destroy_hash_rxqs(priv); priv_fdir_disable(priv); priv_dev_interrupt_handler_uninstall(priv, dev); priv->started = 0; priv_unlock(priv); }
/** * DPDK callback to close the device. * * Destroy all queues and objects, free memory. * * @param dev * Pointer to Ethernet device structure. */ static void mlx5_dev_close(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; void *tmp; unsigned int i; priv_lock(priv); DEBUG("%p: closing device \"%s\"", (void *)dev, ((priv->ctx != NULL) ? priv->ctx->device->name : "")); /* In case mlx5_dev_stop() has not been called. */ priv_dev_interrupt_handler_uninstall(priv, dev); priv_allmulticast_disable(priv); priv_promiscuous_disable(priv); priv_mac_addrs_disable(priv); priv_destroy_hash_rxqs(priv); /* Prevent crashes when queues are still in use. */ dev->rx_pkt_burst = removed_rx_burst; dev->tx_pkt_burst = removed_tx_burst; if (priv->rxqs != NULL) { /* XXX race condition if mlx5_rx_burst() is still running. */ usleep(1000); for (i = 0; (i != priv->rxqs_n); ++i) { tmp = (*priv->rxqs)[i]; if (tmp == NULL) continue; (*priv->rxqs)[i] = NULL; rxq_cleanup(tmp); rte_free(tmp); } priv->rxqs_n = 0; priv->rxqs = NULL; } if (priv->txqs != NULL) { /* XXX race condition if mlx5_tx_burst() is still running. */ usleep(1000); for (i = 0; (i != priv->txqs_n); ++i) { tmp = (*priv->txqs)[i]; if (tmp == NULL) continue; (*priv->txqs)[i] = NULL; txq_cleanup(tmp); rte_free(tmp); } priv->txqs_n = 0; priv->txqs = NULL; } if (priv->pd != NULL) { assert(priv->ctx != NULL); claim_zero(ibv_dealloc_pd(priv->pd)); claim_zero(ibv_close_device(priv->ctx)); } else assert(priv->ctx == NULL); if (priv->rss_conf != NULL) { for (i = 0; (i != hash_rxq_init_n); ++i) rte_free((*priv->rss_conf)[i]); rte_free(priv->rss_conf); } priv_unlock(priv); memset(priv, 0, sizeof(*priv)); }
/** * DPDK callback to change the MTU. * * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be * received). Use this as a hint to enable/disable scattered packets support * and improve performance when not needed. * Since failure is not an option, reconfiguring queues on the fly is not * recommended. * * @param dev * Pointer to Ethernet device structure. * @param in_mtu * New MTU. * * @return * 0 on success, negative errno value on failure. */ int mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) { struct priv *priv = dev->data->dev_private; int ret = 0; unsigned int i; uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) = mlx5_rx_burst; if (mlx5_is_secondary()) return -E_RTE_SECONDARY; priv_lock(priv); /* Set kernel interface MTU first. */ if (priv_set_mtu(priv, mtu)) { ret = errno; WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, strerror(ret)); goto out; } else DEBUG("adapter port %u MTU set to %u", priv->port, mtu); priv->mtu = mtu; /* Temporarily replace RX handler with a fake one, assuming it has not * been copied elsewhere. */ dev->rx_pkt_burst = removed_rx_burst; /* Make sure everyone has left mlx5_rx_burst() and uses * removed_rx_burst() instead. */ rte_wmb(); usleep(1000); /* Reconfigure each RX queue. */ for (i = 0; (i != priv->rxqs_n); ++i) { struct rxq *rxq = (*priv->rxqs)[i]; unsigned int max_frame_len; int sp; if (rxq == NULL) continue; /* Calculate new maximum frame length according to MTU and * toggle scattered support (sp) if necessary. */ max_frame_len = (priv->mtu + ETHER_HDR_LEN + (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN)); sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM)); /* Provide new values to rxq_setup(). */ dev->data->dev_conf.rxmode.jumbo_frame = sp; dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len; ret = rxq_rehash(dev, rxq); if (ret) { /* Force SP RX if that queue requires it and abort. */ if (rxq->sp) rx_func = mlx5_rx_burst_sp; break; } /* Scattered burst function takes priority. */ if (rxq->sp) rx_func = mlx5_rx_burst_sp; } /* Burst functions can now be called again. */ rte_wmb(); dev->rx_pkt_burst = rx_func; out: priv_unlock(priv); assert(ret >= 0); return -ret; }
/** * Configure secondary process queues from a private data pointer (primary * or secondary) and update burst callbacks. Can take place only once. * * All queues must have been previously created by the primary process to * avoid undefined behavior. * * @param priv * Private data pointer from either primary or secondary process. * * @return * Private data pointer from secondary process, NULL in case of error. */ struct priv * mlx5_secondary_data_setup(struct priv *priv) { unsigned int port_id = 0; struct mlx5_secondary_data *sd; void **tx_queues; void **rx_queues; unsigned int nb_tx_queues; unsigned int nb_rx_queues; unsigned int i; /* priv must be valid at this point. */ assert(priv != NULL); /* priv->dev must also be valid but may point to local memory from * another process, possibly with the same address and must not * be dereferenced yet. */ assert(priv->dev != NULL); /* Determine port ID by finding out where priv comes from. */ while (1) { sd = &mlx5_secondary_data[port_id]; rte_spinlock_lock(&sd->lock); /* Primary process? */ if (sd->primary_priv == priv) break; /* Secondary process? */ if (sd->data.dev_private == priv) break; rte_spinlock_unlock(&sd->lock); if (++port_id == RTE_DIM(mlx5_secondary_data)) port_id = 0; } /* Switch to secondary private structure. If private data has already * been updated by another thread, there is nothing else to do. */ priv = sd->data.dev_private; if (priv->dev->data == &sd->data) goto end; /* Sanity checks. Secondary private structure is supposed to point * to local eth_dev, itself still pointing to the shared device data * structure allocated by the primary process. */ assert(sd->shared_dev_data != &sd->data); assert(sd->data.nb_tx_queues == 0); assert(sd->data.tx_queues == NULL); assert(sd->data.nb_rx_queues == 0); assert(sd->data.rx_queues == NULL); assert(priv != sd->primary_priv); assert(priv->dev->data == sd->shared_dev_data); assert(priv->txqs_n == 0); assert(priv->txqs == NULL); assert(priv->rxqs_n == 0); assert(priv->rxqs == NULL); nb_tx_queues = sd->shared_dev_data->nb_tx_queues; nb_rx_queues = sd->shared_dev_data->nb_rx_queues; /* Allocate local storage for queues. */ tx_queues = rte_zmalloc("secondary ethdev->tx_queues", sizeof(sd->data.tx_queues[0]) * nb_tx_queues, RTE_CACHE_LINE_SIZE); rx_queues = rte_zmalloc("secondary ethdev->rx_queues", sizeof(sd->data.rx_queues[0]) * nb_rx_queues, RTE_CACHE_LINE_SIZE); if (tx_queues == NULL || rx_queues == NULL) goto error; /* Lock to prevent control operations during setup. */ priv_lock(priv); /* TX queues. */ for (i = 0; i != nb_tx_queues; ++i) { struct txq *primary_txq = (*sd->primary_priv->txqs)[i]; struct txq *txq; if (primary_txq == NULL) continue; txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0, primary_txq->socket); if (txq != NULL) { if (txq_setup(priv->dev, txq, primary_txq->elts_n * MLX5_PMD_SGE_WR_N, primary_txq->socket, NULL) == 0) { txq->stats.idx = primary_txq->stats.idx; tx_queues[i] = txq; continue; } rte_free(txq); } while (i) { txq = tx_queues[--i]; txq_cleanup(txq); rte_free(txq); } goto error; } /* RX queues. */ for (i = 0; i != nb_rx_queues; ++i) { struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i]; if (primary_rxq == NULL) continue; /* Not supported yet. */ rx_queues[i] = NULL; } /* Update everything. */ priv->txqs = (void *)tx_queues; priv->txqs_n = nb_tx_queues; priv->rxqs = (void *)rx_queues; priv->rxqs_n = nb_rx_queues; sd->data.rx_queues = rx_queues; sd->data.tx_queues = tx_queues; sd->data.nb_rx_queues = nb_rx_queues; sd->data.nb_tx_queues = nb_tx_queues; sd->data.dev_link = sd->shared_dev_data->dev_link; sd->data.mtu = sd->shared_dev_data->mtu; memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state, sizeof(sd->data.rx_queue_state)); memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state, sizeof(sd->data.tx_queue_state)); sd->data.dev_flags = sd->shared_dev_data->dev_flags; /* Use local data from now on. */ rte_mb(); priv->dev->data = &sd->data; rte_mb(); priv->dev->tx_pkt_burst = mlx5_tx_burst; priv->dev->rx_pkt_burst = removed_rx_burst; priv_unlock(priv); end: /* More sanity checks. */ assert(priv->dev->tx_pkt_burst == mlx5_tx_burst); assert(priv->dev->rx_pkt_burst == removed_rx_burst); assert(priv->dev->data == &sd->data); rte_spinlock_unlock(&sd->lock); return priv; error: priv_unlock(priv); rte_free(tx_queues); rte_free(rx_queues); rte_spinlock_unlock(&sd->lock); return NULL; }