int rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) { struct rte_pci_device *pci_dev = NULL; struct bond_dev_private *internals = NULL; struct rte_eth_dev *eth_dev = NULL; struct eth_driver *eth_drv = NULL; struct rte_pci_driver *pci_drv = NULL; struct rte_pci_id *pci_id_table = NULL; /* now do all data allocation - for eth_dev structure, dummy pci driver * and internal (private) data */ if (name == NULL) { RTE_BOND_LOG(ERR, "Invalid name specified"); goto err; } if (socket_id >= number_of_sockets()) { RTE_BOND_LOG(ERR, "Invalid socket id specified to create bonded device on."); goto err; } pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id); if (pci_dev == NULL) { RTE_BOND_LOG(ERR, "Unable to malloc pci dev on socket"); goto err; } eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, socket_id); if (eth_drv == NULL) { RTE_BOND_LOG(ERR, "Unable to malloc eth_drv on socket"); goto err; } pci_drv = ð_drv->pci_drv; pci_id_table = rte_zmalloc_socket(name, sizeof(*pci_id_table), 0, socket_id); if (pci_id_table == NULL) { RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket"); goto err; } pci_drv->id_table = pci_id_table; pci_drv->id_table->device_id = PCI_ANY_ID; pci_drv->id_table->subsystem_device_id = PCI_ANY_ID; pci_drv->id_table->vendor_id = PCI_ANY_ID; pci_drv->id_table->subsystem_vendor_id = PCI_ANY_ID; pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC; internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id); if (internals == NULL) { RTE_BOND_LOG(ERR, "Unable to malloc internals on socket"); goto err; } /* reserve an ethdev entry */ eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); if (eth_dev == NULL) { RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); goto err; } pci_dev->numa_node = socket_id; pci_drv->name = driver_name; eth_dev->driver = eth_drv; eth_dev->data->dev_private = internals; eth_dev->data->nb_rx_queues = (uint16_t)1; eth_dev->data->nb_tx_queues = (uint16_t)1; TAILQ_INIT(&(eth_dev->link_intr_cbs)); eth_dev->data->dev_link.link_status = 0; eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, socket_id); eth_dev->data->dev_started = 0; eth_dev->data->promiscuous = 0; eth_dev->data->scattered_rx = 0; eth_dev->data->all_multicast = 0; eth_dev->dev_ops = &default_dev_ops; eth_dev->pci_dev = pci_dev; rte_spinlock_init(&internals->lock); internals->port_id = eth_dev->data->port_id; internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = 0; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; internals->xmit_hash = xmit_l2_hash; internals->user_defined_mac = 0; internals->link_props_set = 0; internals->link_status_polling_enabled = 0; internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS; internals->link_down_delay_ms = 0; internals->link_up_delay_ms = 0; internals->slave_count = 0; internals->active_slave_count = 0; internals->rx_offload_capa = 0; internals->tx_offload_capa = 0; memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); memset(internals->slaves, 0, sizeof(internals->slaves)); /* Set mode 4 default configuration */ bond_mode_8023ad_setup(eth_dev, NULL); if (bond_ethdev_mode_set(eth_dev, mode)) { RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", eth_dev->data->port_id, mode); goto err; } return eth_dev->data->port_id; err: if (pci_dev) rte_free(pci_dev); if (pci_id_table) rte_free(pci_id_table); if (eth_drv) rte_free(eth_drv); if (internals) rte_free(internals); return -1; }
/** * DPDK callback to register a PCI device. * * This function creates an Ethernet device for each port of a given * PCI device. * * @param[in] pci_drv * PCI driver structure (mlx5_driver). * @param[in] pci_dev * PCI device information. * * @return * 0 on success, negative errno value on failure. */ static int mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) { struct ibv_device **list; struct ibv_device *ibv_dev; int err = 0; struct ibv_context *attr_ctx = NULL; struct ibv_device_attr device_attr; unsigned int vf; int idx; int i; (void)pci_drv; assert(pci_drv == &mlx5_driver.pci_drv); /* Get mlx5_dev[] index. */ idx = mlx5_dev_idx(&pci_dev->addr); if (idx == -1) { ERROR("this driver cannot support any more adapters"); return -ENOMEM; } DEBUG("using driver device index %d", idx); /* Save PCI address. */ mlx5_dev[idx].pci_addr = pci_dev->addr; list = ibv_get_device_list(&i); if (list == NULL) { assert(errno); if (errno == ENOSYS) { WARN("cannot list devices, is ib_uverbs loaded?"); return 0; } return -errno; } assert(i >= 0); /* * For each listed device, check related sysfs entry against * the provided PCI ID. */ while (i != 0) { struct rte_pci_addr pci_addr; --i; DEBUG("checking device \"%s\"", list[i]->name); if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) continue; if ((pci_dev->addr.domain != pci_addr.domain) || (pci_dev->addr.bus != pci_addr.bus) || (pci_dev->addr.devid != pci_addr.devid) || (pci_dev->addr.function != pci_addr.function)) continue; vf = ((pci_dev->id.device_id == PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || (pci_dev->id.device_id == PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); INFO("PCI information matches, using device \"%s\" (VF: %s)", list[i]->name, (vf ? "true" : "false")); attr_ctx = ibv_open_device(list[i]); err = errno; break; } if (attr_ctx == NULL) { ibv_free_device_list(list); switch (err) { case 0: WARN("cannot access device, is mlx5_ib loaded?"); return 0; case EINVAL: WARN("cannot use device, are drivers up to date?"); return 0; } assert(err > 0); return -err; } ibv_dev = list[i]; DEBUG("device opened"); if (ibv_query_device(attr_ctx, &device_attr)) goto error; INFO("%u port(s) detected", device_attr.phys_port_cnt); for (i = 0; i < device_attr.phys_port_cnt; i++) { uint32_t port = i + 1; /* ports are indexed from one */ uint32_t test = (1 << i); struct ibv_context *ctx = NULL; struct ibv_port_attr port_attr; struct ibv_pd *pd = NULL; struct priv *priv = NULL; struct rte_eth_dev *eth_dev; #ifdef HAVE_EXP_QUERY_DEVICE struct ibv_exp_device_attr exp_device_attr; #endif /* HAVE_EXP_QUERY_DEVICE */ struct ether_addr mac; #ifdef HAVE_EXP_QUERY_DEVICE exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | IBV_EXP_DEVICE_ATTR_RX_HASH; #endif /* HAVE_EXP_QUERY_DEVICE */ DEBUG("using port %u (%08" PRIx32 ")", port, test); ctx = ibv_open_device(ibv_dev); if (ctx == NULL) goto port_error; /* Check port status. */ err = ibv_query_port(ctx, port, &port_attr); if (err) { ERROR("port query failed: %s", strerror(err)); goto port_error; } if (port_attr.state != IBV_PORT_ACTIVE) DEBUG("port %d is not active: \"%s\" (%d)", port, ibv_port_state_str(port_attr.state), port_attr.state); /* Allocate protection domain. */ pd = ibv_alloc_pd(ctx); if (pd == NULL) { ERROR("PD allocation failure"); err = ENOMEM; goto port_error; } mlx5_dev[idx].ports |= test; /* from rte_ethdev.c */ priv = rte_zmalloc("ethdev private structure", sizeof(*priv), RTE_CACHE_LINE_SIZE); if (priv == NULL) { ERROR("priv allocation failure"); err = ENOMEM; goto port_error; } priv->ctx = ctx; priv->device_attr = device_attr; priv->port = port; priv->pd = pd; priv->mtu = ETHER_MTU; #ifdef HAVE_EXP_QUERY_DEVICE if (ibv_exp_query_device(ctx, &exp_device_attr)) { ERROR("ibv_exp_query_device() failed"); goto port_error; } priv->hw_csum = ((exp_device_attr.exp_device_cap_flags & IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) && (exp_device_attr.exp_device_cap_flags & IBV_EXP_DEVICE_RX_CSUM_IP_PKT)); DEBUG("checksum offloading is %ssupported", (priv->hw_csum ? "" : "not ")); priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & IBV_EXP_DEVICE_VXLAN_SUPPORT); DEBUG("L2 tunnel checksum offloads are %ssupported", (priv->hw_csum_l2tun ? "" : "not ")); priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; DEBUG("maximum RX indirection table size is %u", priv->ind_table_max_size); #else /* HAVE_EXP_QUERY_DEVICE */ priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; #endif /* HAVE_EXP_QUERY_DEVICE */ priv->vf = vf; /* Allocate and register default RSS hash keys. */ priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, sizeof((*priv->rss_conf)[0]), 0); if (priv->rss_conf == NULL) { err = ENOMEM; goto port_error; } err = rss_hash_rss_conf_new_key(priv, rss_hash_default_key, rss_hash_default_key_len, ETH_RSS_PROTO_MASK); if (err) goto port_error; /* Configure the first MAC address by default. */ if (priv_get_mac(priv, &mac.addr_bytes)) { ERROR("cannot get MAC address, is mlx5_en loaded?" " (errno: %s)", strerror(errno)); goto port_error; } INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", priv->port, mac.addr_bytes[0], mac.addr_bytes[1], mac.addr_bytes[2], mac.addr_bytes[3], mac.addr_bytes[4], mac.addr_bytes[5]); /* Register MAC and broadcast addresses. */ claim_zero(priv_mac_addr_add(priv, 0, (const uint8_t (*)[ETHER_ADDR_LEN]) mac.addr_bytes)); claim_zero(priv_mac_addr_add(priv, (RTE_DIM(priv->mac) - 1), &(const uint8_t [ETHER_ADDR_LEN]) { "\xff\xff\xff\xff\xff\xff" })); #ifndef NDEBUG { char ifname[IF_NAMESIZE]; if (priv_get_ifname(priv, &ifname) == 0) DEBUG("port %u ifname is \"%s\"", priv->port, ifname); else DEBUG("port %u ifname is unknown", priv->port); } #endif /* Get actual MTU if possible. */ priv_get_mtu(priv, &priv->mtu); DEBUG("port %u MTU is %u", priv->port, priv->mtu); /* from rte_ethdev.c */ { char name[RTE_ETH_NAME_MAX_LEN]; snprintf(name, sizeof(name), "%s port %u", ibv_get_device_name(ibv_dev), port); eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); } if (eth_dev == NULL) { ERROR("can not allocate rte ethdev"); err = ENOMEM; goto port_error; } eth_dev->data->dev_private = priv; eth_dev->pci_dev = pci_dev; eth_dev->driver = &mlx5_driver; eth_dev->data->rx_mbuf_alloc_failed = 0; eth_dev->data->mtu = ETHER_MTU; priv->dev = eth_dev; eth_dev->dev_ops = &mlx5_dev_ops; eth_dev->data->mac_addrs = priv->mac; TAILQ_INIT(ð_dev->link_intr_cbs); /* Bring Ethernet device up. */ DEBUG("forcing Ethernet interface up"); priv_set_flags(priv, ~IFF_UP, IFF_UP); continue; port_error: rte_free(priv->rss_conf); rte_free(priv); if (pd) claim_zero(ibv_dealloc_pd(pd)); if (ctx) claim_zero(ibv_close_device(ctx)); break; }
int rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) { struct bond_dev_private *internals = NULL; struct rte_eth_dev *eth_dev = NULL; uint32_t vlan_filter_bmp_size; /* now do all data allocation - for eth_dev structure, dummy pci driver * and internal (private) data */ if (name == NULL) { RTE_BOND_LOG(ERR, "Invalid name specified"); goto err; } if (socket_id >= number_of_sockets()) { RTE_BOND_LOG(ERR, "Invalid socket id specified to create bonded device on."); goto err; } internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id); if (internals == NULL) { RTE_BOND_LOG(ERR, "Unable to malloc internals on socket"); goto err; } /* reserve an ethdev entry */ eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) { RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); goto err; } eth_dev->data->dev_private = internals; eth_dev->data->nb_rx_queues = (uint16_t)1; eth_dev->data->nb_tx_queues = (uint16_t)1; eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, socket_id); if (eth_dev->data->mac_addrs == NULL) { RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs"); goto err; } eth_dev->dev_ops = &default_dev_ops; eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_DETACHABLE; eth_dev->driver = NULL; eth_dev->data->kdrv = RTE_KDRV_NONE; eth_dev->data->drv_name = pmd_bond_drv.driver.name; eth_dev->data->numa_node = socket_id; rte_spinlock_init(&internals->lock); internals->port_id = eth_dev->data->port_id; internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = RTE_MAX_ETHPORTS + 1; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; internals->xmit_hash = xmit_l2_hash; internals->user_defined_mac = 0; internals->link_props_set = 0; internals->link_status_polling_enabled = 0; internals->link_status_polling_interval_ms = DEFAULT_POLLING_INTERVAL_10_MS; internals->link_down_delay_ms = 0; internals->link_up_delay_ms = 0; internals->slave_count = 0; internals->active_slave_count = 0; internals->rx_offload_capa = 0; internals->tx_offload_capa = 0; internals->candidate_max_rx_pktlen = 0; internals->max_rx_pktlen = 0; /* Initially allow to choose any offload type */ internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); memset(internals->slaves, 0, sizeof(internals->slaves)); /* Set mode 4 default configuration */ bond_mode_8023ad_setup(eth_dev, NULL); if (bond_ethdev_mode_set(eth_dev, mode)) { RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", eth_dev->data->port_id, mode); goto err; } vlan_filter_bmp_size = rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1); internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size, RTE_CACHE_LINE_SIZE); if (internals->vlan_filter_bmpmem == NULL) { RTE_BOND_LOG(ERR, "Failed to allocate vlan bitmap for bonded device %u\n", eth_dev->data->port_id); goto err; } internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1, internals->vlan_filter_bmpmem, vlan_filter_bmp_size); if (internals->vlan_filter_bmp == NULL) { RTE_BOND_LOG(ERR, "Failed to init vlan bitmap for bonded device %u\n", eth_dev->data->port_id); rte_free(internals->vlan_filter_bmpmem); goto err; } return eth_dev->data->port_id; err: rte_free(internals); if (eth_dev != NULL) { rte_free(eth_dev->data->mac_addrs); rte_eth_dev_release_port(eth_dev); } return -1; }