/* Create a new flow table made of an rte_hash table and a fixed size * data array for storing values. Only supports IPv4 5-tuple lookups. */ struct onvm_ft* onvm_ft_create(int cnt, int entry_size) { struct rte_hash* hash; struct onvm_ft* ft; struct rte_hash_parameters ipv4_hash_params = { .name = NULL, .entries = cnt, .key_len = sizeof(struct onvm_ft_ipv4_5tuple), .hash_func = NULL, .hash_func_init_val = 0, }; char s[64]; /* create ipv4 hash table. use core number and cycle counter to get a unique name. */ ipv4_hash_params.name = s; ipv4_hash_params.socket_id = rte_socket_id(); snprintf(s, sizeof(s), "onvm_ft_%d-%"PRIu64, rte_lcore_id(), rte_get_tsc_cycles()); hash = rte_hash_create(&ipv4_hash_params); if (hash == NULL) { return NULL; } ft = (struct onvm_ft*)rte_calloc("table", 1, sizeof(struct onvm_ft), 0); if (ft == NULL) { rte_hash_free(hash); return NULL; } ft->hash = hash; ft->cnt = cnt; ft->entry_size = entry_size; /* Create data array for storing values */ ft->data = rte_calloc("entry", cnt, entry_size, 0); if (ft->data == NULL) { rte_hash_free(hash); rte_free(ft); return NULL; } return ft; } /* Add an entry in flow table and set data to point to the new value. Returns: index in the array on success -EPROTONOSUPPORT if packet is not ipv4. -EINVAL if the parameters are invalid. -ENOSPC if there is no space in the hash for this key. */ int onvm_ft_add_pkt(struct onvm_ft* table, struct rte_mbuf *pkt, char** data) { int32_t tbl_index; struct onvm_ft_ipv4_5tuple key; int err; err = onvm_ft_fill_key(&key, pkt); if (err < 0) { return err; } tbl_index = rte_hash_add_key_with_hash(table->hash, (const void *)&key, pkt->hash.rss); if (tbl_index >= 0) { *data = &table->data[tbl_index*table->entry_size]; } return tbl_index; }
/*---------------------------------------------------------------------------*/ sb_queue_t CreateSBQueue(int capacity) { sb_queue_t sq; sq = (sb_queue_t) rte_calloc("sb_queue", 1, sizeof(struct sb_queue), 0); if (!sq) return NULL; sq->_q = (struct tcp_send_buffer **) rte_calloc("tcp_send_buffer", capacity + 1, sizeof(struct tcp_send_buffer *), 0); if (!sq->_q) { rte_free(sq); return NULL; } sq->_capacity = capacity; sq->_head = sq->_tail = 0; return sq; }
struct onvm_service_chain* onvm_sc_create(void) { struct onvm_service_chain *chain; chain = rte_calloc("ONVM_sercice_chain", 1, sizeof(struct onvm_service_chain), 0); if (chain == NULL) { rte_exit(EXIT_FAILURE, "Cannot allocate memory for service chain\n"); } return chain; }
int32_t rte_service_init(void) { if (rte_service_library_initialized) { printf("service library init() called, init flag %d\n", rte_service_library_initialized); return -EALREADY; } rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX, sizeof(struct rte_service_spec_impl), RTE_CACHE_LINE_SIZE); if (!rte_services) { printf("error allocating rte services array\n"); return -ENOMEM; } lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE, sizeof(struct core_state), RTE_CACHE_LINE_SIZE); if (!lcore_states) { printf("error allocating core states array\n"); return -ENOMEM; } int i; int count = 0; struct rte_config *cfg = rte_eal_get_configuration(); for (i = 0; i < RTE_MAX_LCORE; i++) { if (lcore_config[i].core_role == ROLE_SERVICE) { if ((unsigned int)i == cfg->master_lcore) continue; rte_service_lcore_add(i); count++; } } rte_service_library_initialized = 1; return 0; }
/** * DPDK callback to register a PCI device. * * This function creates an Ethernet device for each port of a given * PCI device. * * @param[in] pci_drv * PCI driver structure (mlx5_driver). * @param[in] pci_dev * PCI device information. * * @return * 0 on success, negative errno value on failure. */ static int mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) { struct ibv_device **list; struct ibv_device *ibv_dev; int err = 0; struct ibv_context *attr_ctx = NULL; struct ibv_device_attr device_attr; unsigned int vf; int idx; int i; (void)pci_drv; assert(pci_drv == &mlx5_driver.pci_drv); /* Get mlx5_dev[] index. */ idx = mlx5_dev_idx(&pci_dev->addr); if (idx == -1) { ERROR("this driver cannot support any more adapters"); return -ENOMEM; } DEBUG("using driver device index %d", idx); /* Save PCI address. */ mlx5_dev[idx].pci_addr = pci_dev->addr; list = ibv_get_device_list(&i); if (list == NULL) { assert(errno); if (errno == ENOSYS) { WARN("cannot list devices, is ib_uverbs loaded?"); return 0; } return -errno; } assert(i >= 0); /* * For each listed device, check related sysfs entry against * the provided PCI ID. */ while (i != 0) { struct rte_pci_addr pci_addr; --i; DEBUG("checking device \"%s\"", list[i]->name); if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) continue; if ((pci_dev->addr.domain != pci_addr.domain) || (pci_dev->addr.bus != pci_addr.bus) || (pci_dev->addr.devid != pci_addr.devid) || (pci_dev->addr.function != pci_addr.function)) continue; vf = ((pci_dev->id.device_id == PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || (pci_dev->id.device_id == PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); INFO("PCI information matches, using device \"%s\" (VF: %s)", list[i]->name, (vf ? "true" : "false")); attr_ctx = ibv_open_device(list[i]); err = errno; break; } if (attr_ctx == NULL) { ibv_free_device_list(list); switch (err) { case 0: WARN("cannot access device, is mlx5_ib loaded?"); return 0; case EINVAL: WARN("cannot use device, are drivers up to date?"); return 0; } assert(err > 0); return -err; } ibv_dev = list[i]; DEBUG("device opened"); if (ibv_query_device(attr_ctx, &device_attr)) goto error; INFO("%u port(s) detected", device_attr.phys_port_cnt); for (i = 0; i < device_attr.phys_port_cnt; i++) { uint32_t port = i + 1; /* ports are indexed from one */ uint32_t test = (1 << i); struct ibv_context *ctx = NULL; struct ibv_port_attr port_attr; struct ibv_pd *pd = NULL; struct priv *priv = NULL; struct rte_eth_dev *eth_dev; #ifdef HAVE_EXP_QUERY_DEVICE struct ibv_exp_device_attr exp_device_attr; #endif /* HAVE_EXP_QUERY_DEVICE */ struct ether_addr mac; #ifdef HAVE_EXP_QUERY_DEVICE exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | IBV_EXP_DEVICE_ATTR_RX_HASH; #endif /* HAVE_EXP_QUERY_DEVICE */ DEBUG("using port %u (%08" PRIx32 ")", port, test); ctx = ibv_open_device(ibv_dev); if (ctx == NULL) goto port_error; /* Check port status. */ err = ibv_query_port(ctx, port, &port_attr); if (err) { ERROR("port query failed: %s", strerror(err)); goto port_error; } if (port_attr.state != IBV_PORT_ACTIVE) DEBUG("port %d is not active: \"%s\" (%d)", port, ibv_port_state_str(port_attr.state), port_attr.state); /* Allocate protection domain. */ pd = ibv_alloc_pd(ctx); if (pd == NULL) { ERROR("PD allocation failure"); err = ENOMEM; goto port_error; } mlx5_dev[idx].ports |= test; /* from rte_ethdev.c */ priv = rte_zmalloc("ethdev private structure", sizeof(*priv), RTE_CACHE_LINE_SIZE); if (priv == NULL) { ERROR("priv allocation failure"); err = ENOMEM; goto port_error; } priv->ctx = ctx; priv->device_attr = device_attr; priv->port = port; priv->pd = pd; priv->mtu = ETHER_MTU; #ifdef HAVE_EXP_QUERY_DEVICE if (ibv_exp_query_device(ctx, &exp_device_attr)) { ERROR("ibv_exp_query_device() failed"); goto port_error; } priv->hw_csum = ((exp_device_attr.exp_device_cap_flags & IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) && (exp_device_attr.exp_device_cap_flags & IBV_EXP_DEVICE_RX_CSUM_IP_PKT)); DEBUG("checksum offloading is %ssupported", (priv->hw_csum ? "" : "not ")); priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & IBV_EXP_DEVICE_VXLAN_SUPPORT); DEBUG("L2 tunnel checksum offloads are %ssupported", (priv->hw_csum_l2tun ? "" : "not ")); priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; DEBUG("maximum RX indirection table size is %u", priv->ind_table_max_size); #else /* HAVE_EXP_QUERY_DEVICE */ priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; #endif /* HAVE_EXP_QUERY_DEVICE */ priv->vf = vf; /* Allocate and register default RSS hash keys. */ priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, sizeof((*priv->rss_conf)[0]), 0); if (priv->rss_conf == NULL) { err = ENOMEM; goto port_error; } err = rss_hash_rss_conf_new_key(priv, rss_hash_default_key, rss_hash_default_key_len, ETH_RSS_PROTO_MASK); if (err) goto port_error; /* Configure the first MAC address by default. */ if (priv_get_mac(priv, &mac.addr_bytes)) { ERROR("cannot get MAC address, is mlx5_en loaded?" " (errno: %s)", strerror(errno)); goto port_error; } INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", priv->port, mac.addr_bytes[0], mac.addr_bytes[1], mac.addr_bytes[2], mac.addr_bytes[3], mac.addr_bytes[4], mac.addr_bytes[5]); /* Register MAC and broadcast addresses. */ claim_zero(priv_mac_addr_add(priv, 0, (const uint8_t (*)[ETHER_ADDR_LEN]) mac.addr_bytes)); claim_zero(priv_mac_addr_add(priv, (RTE_DIM(priv->mac) - 1), &(const uint8_t [ETHER_ADDR_LEN]) { "\xff\xff\xff\xff\xff\xff" })); #ifndef NDEBUG { char ifname[IF_NAMESIZE]; if (priv_get_ifname(priv, &ifname) == 0) DEBUG("port %u ifname is \"%s\"", priv->port, ifname); else DEBUG("port %u ifname is unknown", priv->port); } #endif /* Get actual MTU if possible. */ priv_get_mtu(priv, &priv->mtu); DEBUG("port %u MTU is %u", priv->port, priv->mtu); /* from rte_ethdev.c */ { char name[RTE_ETH_NAME_MAX_LEN]; snprintf(name, sizeof(name), "%s port %u", ibv_get_device_name(ibv_dev), port); eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); } if (eth_dev == NULL) { ERROR("can not allocate rte ethdev"); err = ENOMEM; goto port_error; } eth_dev->data->dev_private = priv; eth_dev->pci_dev = pci_dev; eth_dev->driver = &mlx5_driver; eth_dev->data->rx_mbuf_alloc_failed = 0; eth_dev->data->mtu = ETHER_MTU; priv->dev = eth_dev; eth_dev->dev_ops = &mlx5_dev_ops; eth_dev->data->mac_addrs = priv->mac; TAILQ_INIT(ð_dev->link_intr_cbs); /* Bring Ethernet device up. */ DEBUG("forcing Ethernet interface up"); priv_set_flags(priv, ~IFF_UP, IFF_UP); continue; port_error: rte_free(priv->rss_conf); rte_free(priv); if (pd) claim_zero(ibv_dealloc_pd(pd)); if (ctx) claim_zero(ibv_close_device(ctx)); break; }
int init(int argc, char *argv[]) { int retval; const struct rte_memzone *mz_nf; const struct rte_memzone *mz_port; const struct rte_memzone *mz_cores; const struct rte_memzone *mz_scp; const struct rte_memzone *mz_services; const struct rte_memzone *mz_nf_per_service; uint8_t i, total_ports, port_id; /* init EAL, parsing EAL args */ retval = rte_eal_init(argc, argv); if (retval < 0) return -1; argc -= retval; argv += retval; #ifdef RTE_LIBRTE_PDUMP rte_pdump_init(NULL); #endif /* get total number of ports */ total_ports = rte_eth_dev_count_avail(); /* set up array for NF tx data */ mz_nf = rte_memzone_reserve(MZ_NF_INFO, sizeof(*nfs) * MAX_NFS, rte_socket_id(), NO_FLAGS); if (mz_nf == NULL) rte_exit(EXIT_FAILURE, "Cannot reserve memory zone for nf information\n"); memset(mz_nf->addr, 0, sizeof(*nfs) * MAX_NFS); nfs = mz_nf->addr; /* set up ports info */ mz_port = rte_memzone_reserve(MZ_PORT_INFO, sizeof(*ports), rte_socket_id(), NO_FLAGS); if (mz_port == NULL) rte_exit(EXIT_FAILURE, "Cannot reserve memory zone for port information\n"); ports = mz_port->addr; /* set up core status */ mz_cores = rte_memzone_reserve(MZ_CORES_STATUS, sizeof(*cores) * onvm_threading_get_num_cores(), rte_socket_id(), NO_FLAGS); if (mz_cores == NULL) rte_exit(EXIT_FAILURE, "Cannot reserve memory zone for core information\n"); memset(mz_cores->addr, 0, sizeof(*cores) * 64); cores = mz_cores->addr; /* set up array for NF tx data */ mz_services = rte_memzone_reserve(MZ_SERVICES_INFO, sizeof(uint16_t *) * num_services, rte_socket_id(), NO_FLAGS); if (mz_services == NULL) rte_exit(EXIT_FAILURE, "Cannot reserve memory zone for services information\n"); services = mz_services->addr; for (i = 0; i < num_services; i++) { services[i] = rte_calloc("one service NFs", MAX_NFS_PER_SERVICE, sizeof(uint16_t), 0); } mz_nf_per_service = rte_memzone_reserve(MZ_NF_PER_SERVICE_INFO, sizeof(uint16_t) * num_services, rte_socket_id(), NO_FLAGS); if (mz_nf_per_service == NULL) { rte_exit(EXIT_FAILURE, "Cannot reserve memory zone for NF per service information.\n"); } nf_per_service_count = mz_nf_per_service->addr; /* parse additional, application arguments */ retval = parse_app_args(total_ports, argc, argv); if (retval != 0) return -1; /* initialise mbuf pools */ retval = init_mbuf_pools(); if (retval != 0) rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n"); /* initialise nf info pool */ retval = init_nf_info_pool(); if (retval != 0) { rte_exit(EXIT_FAILURE, "Cannot create nf info mbuf pool: %s\n", rte_strerror(rte_errno)); } /* initialise pool for NF messages */ retval = init_nf_msg_pool(); if (retval != 0) { rte_exit(EXIT_FAILURE, "Cannot create nf message pool: %s\n", rte_strerror(rte_errno)); } /* now initialise the ports we will use */ for (i = 0; i < ports->num_ports; i++) { port_id = ports->id[i]; rte_eth_macaddr_get(port_id, &ports->mac[port_id]); retval = init_port(port_id); if (retval != 0) rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n", port_id); char event_msg_buf[20]; sprintf(event_msg_buf, "Port %d initialized", port_id); onvm_stats_add_event(event_msg_buf, NULL); } check_all_ports_link_status(ports->num_ports, (~0x0)); /* initialise the NF queues/rings for inter-eu comms */ init_shm_rings(); /* initialise a queue for newly created NFs */ init_info_queue(); /*initialize a default service chain*/ default_chain = onvm_sc_create(); retval = onvm_sc_append_entry(default_chain, ONVM_NF_ACTION_TONF, 1); if (retval == ENOSPC) { printf("chain length can not be larger than the maximum chain length\n"); exit(1); } printf("Default service chain: send to sdn NF\n"); /* set up service chain pointer shared to NFs*/ mz_scp = rte_memzone_reserve(MZ_SCP_INFO, sizeof(struct onvm_service_chain *), rte_socket_id(), NO_FLAGS); if (mz_scp == NULL) rte_exit(EXIT_FAILURE, "Canot reserve memory zone for service chain pointer\n"); memset(mz_scp->addr, 0, sizeof(struct onvm_service_chain *)); default_sc_p = mz_scp->addr; *default_sc_p = default_chain; onvm_sc_print(default_chain); onvm_flow_dir_init(); return 0; }