Esempio n. 1
0
static const struct rte_memzone *
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
		int socket_id, unsigned flags, unsigned align, unsigned bound)
{
	struct rte_mem_config *mcfg;
	size_t requested_len;
	int socket, i;

	/* get pointer to global configuration */
	mcfg = rte_eal_get_configuration()->mem_config;

	/* no more room in config */
	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
		RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
		rte_errno = ENOSPC;
		return NULL;
	}

	/* zone already exist */
	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
			__func__, name);
		rte_errno = EEXIST;
		return NULL;
	}

	/* if alignment is not a power of two */
	if (align && !rte_is_power_of_2(align)) {
		RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
				align);
		rte_errno = EINVAL;
		return NULL;
	}

	/* alignment less than cache size is not allowed */
	if (align < RTE_CACHE_LINE_SIZE)
		align = RTE_CACHE_LINE_SIZE;

	/* align length on cache boundary. Check for overflow before doing so */
	if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
		rte_errno = EINVAL; /* requested size too big */
		return NULL;
	}

	len += RTE_CACHE_LINE_MASK;
	len &= ~((size_t) RTE_CACHE_LINE_MASK);

	/* save minimal requested  length */
	requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);

	/* check that boundary condition is valid */
	if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
		rte_errno = EINVAL;
		return NULL;
	}

	if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
		rte_errno = EINVAL;
		return NULL;
	}

	if (!rte_eal_has_hugepages())
		socket_id = SOCKET_ID_ANY;

	if (len == 0) {
		if (bound != 0)
			requested_len = bound;
		else
			requested_len = find_heap_max_free_elem(&socket_id, align);
	}

	if (socket_id == SOCKET_ID_ANY)
		socket = malloc_get_numa_socket();
	else
		socket = socket_id;

	/* allocate memory on heap */
	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
			requested_len, flags, align, bound);

	if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
		/* try other heaps */
		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
			if (socket == i)
				continue;

			mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
					NULL, requested_len, flags, align, bound);
			if (mz_addr != NULL)
				break;
		}
	}

	if (mz_addr == NULL) {
		rte_errno = ENOMEM;
		return NULL;
	}

	const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);

	/* fill the zone in config */
	struct rte_memzone *mz = get_next_free_memzone();

	if (mz == NULL) {
		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
				"in config!\n", __func__);
		rte_errno = ENOSPC;
		return NULL;
	}

	mcfg->memzone_cnt++;
	snprintf(mz->name, sizeof(mz->name), "%s", name);
	mz->phys_addr = rte_malloc_virt2phy(mz_addr);
	mz->addr = mz_addr;
	mz->len = (requested_len == 0 ? elem->size : requested_len);
	mz->hugepage_sz = elem->ms->hugepage_sz;
	mz->socket_id = elem->ms->socket_id;
	mz->flags = 0;
	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;

	return mz;
}
Esempio n. 2
0
void
log_receiver(struct receiver_t *receiver) {
    RTE_LOG(INFO, RECEIVER, "------------- Receiver -------------\n");
    RTE_LOG(INFO, RECEIVER, "| Core ID:               %"PRIu32"\n", receiver->core_id);
    RTE_LOG(INFO, RECEIVER, "| In port:               %"PRIu32"\n", receiver->in_port);
    RTE_LOG(INFO, RECEIVER, "| MAC:                   "FORMAT_MAC"\n", ARG_V_MAC(receiver->mac));
    RTE_LOG(INFO, RECEIVER, "| Packets received:      %"PRIu64"\n", receiver->pkts_received);
    if (receiver->nb_polls != 0)
        RTE_LOG(INFO, RECEIVER, "| Load:                  %f\n", receiver->nb_rec / (float) receiver->nb_polls);
    RTE_LOG(INFO, RECEIVER, "| sum Time (CPU Cycles): %f\n", receiver->time_a /(float) receiver->nb_measurements);
    RTE_LOG(INFO, RECEIVER, "| rec Time (CPU Cycles): %f\n", receiver->time_b /(float) receiver->nb_measurements);
    RTE_LOG(INFO, RECEIVER, "|***********************************\n");

    receiver->nb_polls = 0;
    receiver->nb_rec = 0;

    struct rte_eth_stats stats;
    rte_eth_stats_get(receiver->in_port, &stats);

    RTE_LOG(INFO, RECEIVER, "| RX: %"PRIu64" TX: %"PRIu64" \n", stats.ipackets, stats.opackets);
    RTE_LOG(INFO, RECEIVER, "| RX dropped: %"PRIu64" RX error: %"PRIu64" TX error: %"PRIu64"\n", stats.imissed, stats.ierrors, stats.oerrors);
    RTE_LOG(INFO, RECEIVER, "------------------------------------\n");
}
Esempio n. 3
0
static void *
rte_table_lpm_ipv6_create(void *params, int socket_id, uint32_t entry_size)
{
	struct rte_table_lpm_ipv6_params *p =
		params;
	struct rte_table_lpm_ipv6 *lpm;
	struct rte_lpm6_config lpm6_config;
	uint32_t total_size, nht_size;

	/* Check input parameters */
	if (p == NULL) {
		RTE_LOG(ERR, TABLE, "%s: NULL input parameters\n", __func__);
		return NULL;
	}
	if (p->n_rules == 0) {
		RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__);
		return NULL;
	}
	if (p->number_tbl8s == 0) {
		RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__);
		return NULL;
	}
	if (p->entry_unique_size == 0) {
		RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n",
			__func__);
		return NULL;
	}
	if (p->entry_unique_size > entry_size) {
		RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n",
			__func__);
		return NULL;
	}
	if (p->name == NULL) {
		RTE_LOG(ERR, TABLE, "%s: Table name is NULL\n",
			__func__);
		return NULL;
	}
	entry_size = RTE_ALIGN(entry_size, sizeof(uint64_t));

	/* Memory allocation */
	nht_size = RTE_TABLE_LPM_MAX_NEXT_HOPS * entry_size;
	total_size = sizeof(struct rte_table_lpm_ipv6) + nht_size;
	lpm = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE,
		socket_id);
	if (lpm == NULL) {
		RTE_LOG(ERR, TABLE,
			"%s: Cannot allocate %u bytes for LPM IPv6 table\n",
			__func__, total_size);
		return NULL;
	}

	/* LPM low-level table creation */
	lpm6_config.max_rules = p->n_rules;
	lpm6_config.number_tbl8s = p->number_tbl8s;
	lpm6_config.flags = 0;
	lpm->lpm = rte_lpm6_create(p->name, socket_id, &lpm6_config);
	if (lpm->lpm == NULL) {
		rte_free(lpm);
		RTE_LOG(ERR, TABLE,
			"Unable to create low-level LPM IPv6 table\n");
		return NULL;
	}

	/* Memory initialization */
	lpm->entry_size = entry_size;
	lpm->entry_unique_size = p->entry_unique_size;
	lpm->n_rules = p->n_rules;
	lpm->offset = p->offset;

	return lpm;
}
Esempio n. 4
0
/*
 * process mempool node idx#_mempool_gref, idx = 0, 1, 2...
 * until we encounter a node that doesn't exist.
 */
int
parse_mempoolnode(struct xen_guest *guest)
{
	uint32_t i, len;
	char path[PATH_MAX] = {0};
	struct xen_gntnode *gntnode = NULL;
	struct xen_mempool *mempool = NULL;
	char *buf;

	bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
	guest->pool_num = 0;

	while (1) {
		/* check if null terminated */
		snprintf(path, sizeof(path),
			XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX,
			guest->dom_id,
			guest->pool_num);

		if ((buf = xen_read_node(path, &len)) != NULL) {
			/* this node exists */
			free(buf);
		} else {
			if (guest->pool_num == 0) {
				RTE_LOG(ERR, PMD, "no mempool found\n");
				return -1;
			}
			break;
		}

		mempool = &guest->mempool[guest->pool_num];
		mempool->dom_id = guest->dom_id;
		mempool->pool_idx = guest->pool_num;

		RTE_LOG(INFO, XENHOST, "  %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path);
		gntnode = parse_gntnode(guest->dom_id, path);
		if (gntnode == NULL)
			goto err;

		if (parse_mpool_va(mempool))
			goto err;

		RTE_LOG(INFO, XENHOST, "  %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path);
		if (map_mempoolnode(gntnode, mempool))
			goto err;

		xen_free_gntnode(gntnode);
		guest->pool_num++;
	}

	return 0;
err:
	if (gntnode)
		xen_free_gntnode(gntnode);
	for (i = 0; i <  MAX_XENVIRT_MEMPOOL ; i++) {
		cleanup_mempool(&guest->mempool[i]);
	}
	/* reinitialise mempool */
	bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
	return -1;
}
Esempio n. 5
0
/*
 * Generate the runtime structure using build structure
 */
int
rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
	uint32_t num_categories, uint32_t data_index_sz, int match_num)
{
	void *mem;
	size_t total_size;
	uint64_t *node_array, no_match;
	uint32_t n, match_index;
	struct rte_acl_match_results *match;
	struct acl_node_counters counts;
	struct rte_acl_indices indices;

	/* Fill counts and indices arrays from the nodes. */
	match_num = acl_calc_counts_indices(&counts, &indices, trie,
		node_bld_trie, num_tries, match_num);

	/* Allocate runtime memory (align to cache boundary) */
	total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) +
		indices.match_index * sizeof(uint64_t) +
		(match_num + 2) * sizeof(struct rte_acl_match_results) +
		XMM_SIZE;

	mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
			ctx->socket_id);
	if (mem == NULL) {
		RTE_LOG(ERR, ACL,
			"allocation of %zu bytes on socket %d for %s failed\n",
			total_size, ctx->socket_id, ctx->name);
		return -ENOMEM;
	}

	/* Fill the runtime structure */
	match_index = indices.match_index;
	node_array = (uint64_t *)((uintptr_t)mem +
		RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE));

	/*
	 * Setup the NOMATCH node (a SINGLE at the
	 * highest index, that points to itself)
	 */

	node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE;
	no_match = RTE_ACL_NODE_MATCH;

	for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
		node_array[n] = no_match;

	match = ((struct rte_acl_match_results *)(node_array + match_index));
	memset(match, 0, sizeof(*match));

	for (n = 0; n < num_tries; n++) {

		acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
			&indices, num_categories);

		if (node_bld_trie[n].trie->node_index == no_match)
			trie[n].root_index = 0;
		else
			trie[n].root_index = node_bld_trie[n].trie->node_index;
	}

	ctx->mem = mem;
	ctx->mem_sz = total_size;
	ctx->data_indexes = mem;
	ctx->num_tries = num_tries;
	ctx->num_categories = num_categories;
	ctx->match_index = match_index;
	ctx->no_match = no_match;
	ctx->idle = node_array[RTE_ACL_DFA_SIZE];
	ctx->trans_table = node_array;
	memcpy(ctx->trie, trie, sizeof(ctx->trie));

	acl_gen_log_stats(ctx, &counts);
	return 0;
}
Esempio n. 6
0
int
user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
{
	struct VhostUserMemory memory = pmsg->payload.memory;
	struct virtio_memory_regions *pregion;
	uint64_t mapped_address, mapped_size;
	struct virtio_net *dev;
	unsigned int idx = 0;
	struct orig_region_map *pregion_orig;
	uint64_t alignment;

	/* unmap old memory regions one by one*/
	dev = get_device(vid);
	if (dev == NULL)
		return -1;

	/* Remove from the data plane. */
	if (dev->flags & VIRTIO_DEV_RUNNING) {
		dev->flags &= ~VIRTIO_DEV_RUNNING;
		notify_ops->destroy_device(vid);
	}

	if (dev->mem) {
		free_mem_region(dev);
		free(dev->mem);
		dev->mem = NULL;
	}

	dev->mem = calloc(1,
		sizeof(struct virtio_memory) +
		sizeof(struct virtio_memory_regions) * memory.nregions +
		sizeof(struct orig_region_map) * memory.nregions);
	if (dev->mem == NULL) {
		RTE_LOG(ERR, VHOST_CONFIG,
			"(%d) failed to allocate memory for dev->mem\n",
			dev->vid);
		return -1;
	}
	dev->mem->nregions = memory.nregions;

	pregion_orig = orig_region(dev->mem, memory.nregions);
	for (idx = 0; idx < memory.nregions; idx++) {
		pregion = &dev->mem->regions[idx];
		pregion->guest_phys_address =
			memory.regions[idx].guest_phys_addr;
		pregion->guest_phys_address_end =
			memory.regions[idx].guest_phys_addr +
			memory.regions[idx].memory_size;
		pregion->memory_size =
			memory.regions[idx].memory_size;
		pregion->userspace_address =
			memory.regions[idx].userspace_addr;

		/* This is ugly */
		mapped_size = memory.regions[idx].memory_size +
			memory.regions[idx].mmap_offset;

		/* mmap() without flag of MAP_ANONYMOUS, should be called
		 * with length argument aligned with hugepagesz at older
		 * longterm version Linux, like 2.6.32 and 3.2.72, or
		 * mmap() will fail with EINVAL.
		 *
		 * to avoid failure, make sure in caller to keep length
		 * aligned.
		 */
		alignment = get_blk_size(pmsg->fds[idx]);
		if (alignment == (uint64_t)-1) {
			RTE_LOG(ERR, VHOST_CONFIG,
				"couldn't get hugepage size through fstat\n");
			goto err_mmap;
		}
		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);

		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
			mapped_size,
			PROT_READ | PROT_WRITE, MAP_SHARED,
			pmsg->fds[idx],
			0);

		RTE_LOG(INFO, VHOST_CONFIG,
			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
			mapped_size, memory.regions[idx].mmap_offset,
			alignment);

		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
			RTE_LOG(ERR, VHOST_CONFIG,
				"mmap qemu guest failed.\n");
			goto err_mmap;
		}

		pregion_orig[idx].mapped_address = mapped_address;
		pregion_orig[idx].mapped_size = mapped_size;
		pregion_orig[idx].blksz = alignment;
		pregion_orig[idx].fd = pmsg->fds[idx];

		mapped_address +=  memory.regions[idx].mmap_offset;

		pregion->address_offset = mapped_address -
			pregion->guest_phys_address;

		if (memory.regions[idx].guest_phys_addr == 0) {
			dev->mem->base_address =
				memory.regions[idx].userspace_addr;
			dev->mem->mapped_address =
				pregion->address_offset;
		}

		LOG_DEBUG(VHOST_CONFIG,
			"REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
			idx,
			(void *)(uintptr_t)pregion->guest_phys_address,
			(void *)(uintptr_t)pregion->userspace_address,
			 pregion->memory_size);
	}

	return 0;

err_mmap:
	while (idx--) {
		munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
				pregion_orig[idx].mapped_size);
		close(pregion_orig[idx].fd);
	}
	free(dev->mem);
	dev->mem = NULL;
	return -1;
}
Esempio n. 7
0
/*
 * Parse a grant node.
 * @param domid
 *  Guest domain id.
 * @param path
 *  Full path string for a grant node, like for the following (key, val) pair
 *  idx#_mempool_gref = "gref#, gref#, gref#"
 *  path = 'local/domain/domid/control/dpdk/idx#_mempool_gref'
 *  gref# is a shared page contain packed (gref,pfn) entries
 * @return
 *  Returns the pointer to xen_gntnode
 */
static struct xen_gntnode *
parse_gntnode(int dom_id, char *path)
{
	char **gref_list = NULL;
	uint32_t i, len, gref_num;
	void *addr = NULL;
	char *buf = NULL;
	struct xen_gntnode *gntnode = NULL;
	struct xen_gnt *gnt = NULL;
	int pg_sz = getpagesize();
	char *end;
	uint64_t index;

	if ((buf = xen_read_node(path, &len)) == NULL)
		goto err;

	gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *));
	if (gref_list == NULL)
		goto err;

	gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE,
			XEN_GREF_SPLITTOKEN);
	if (gref_num == 0) {
		RTE_LOG(ERR, XENHOST, "  %s: invalid grant node format\n", __func__);
		goto err;
	}

	gntnode = calloc(1, sizeof(struct xen_gntnode));
	gnt = calloc(gref_num, sizeof(struct xen_gnt));
	if (gnt == NULL || gntnode == NULL)
		goto err;

	for (i = 0; i < gref_num; i++) {
		errno = 0;
		gnt[i].gref = strtol(gref_list[i], &end, 0);
		if (errno != 0 || end == NULL || end == gref_list[i] ||
			(*end != '\0' &&  *end != XEN_GREF_SPLITTOKEN)) {
			RTE_LOG(ERR, XENHOST, "  %s: parse grant node item failed\n", __func__);
			goto err;
		}
		addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index);
		if (addr == NULL) {
			RTE_LOG(ERR, XENHOST, "  %s: map gref %u failed\n", __func__, gnt[i].gref);
			goto err;
		}
		RTE_LOG(INFO, XENHOST, "      %s: map gref %u to %p\n", __func__, gnt[i].gref, addr);
		memcpy(gnt[i].gref_pfn, addr, pg_sz);
		if (munmap(addr, pg_sz)) {
			RTE_LOG(INFO, XENHOST, "  %s: unmap gref %u failed\n", __func__, gnt[i].gref);
			goto err;
		}
		if (xen_unmap_grant_ref(index)) {
			RTE_LOG(INFO, XENHOST, "  %s: release gref %u failed\n", __func__, gnt[i].gref);
			goto err;
		}

	}

	gntnode->gnt_num  = gref_num;
	gntnode->gnt_info = gnt;

	free(buf);
	free(gref_list);
	return gntnode;

err:
	free(gnt);
	free(gntnode);
	free(gref_list);
	free(buf);
	return NULL;
}
Esempio n. 8
0
/*
 * This is a complex function. What it does is the following:
 *  1. Goes through metadata and gets list of hugepages involved
 *  2. Sorts the hugepages by size (1G first)
 *  3. Goes through metadata again and writes correct offsets
 *  4. Goes through pages and finds out their filenames, offsets etc.
 */
static int
build_config(struct rte_ivshmem_metadata * metadata)
{
	struct rte_ivshmem_metadata_entry * e_local;
	struct memseg_cache_entry * ms_local;
	struct rte_memseg pages[IVSHMEM_MAX_PAGES];
	struct rte_ivshmem_metadata_entry *entry;
	struct memseg_cache_entry * c_entry, * prev_entry;
	struct ivshmem_config * config;
	unsigned i, j, mz_iter, ms_iter;
	uint64_t biggest_len;
	int biggest_idx;

	/* return error if we try to use an unknown config file */
	config = get_config_by_name(metadata->name);
	if (config == NULL) {
		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
		goto fail_e;
	}

	memset(pages, 0, sizeof(pages));

	e_local = malloc(sizeof(config->metadata->entry));
	if (e_local == NULL)
		goto fail_e;
	ms_local = malloc(sizeof(config->memseg_cache));
	if (ms_local == NULL)
		goto fail_ms;


	/* make local copies before doing anything */
	memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
	memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));

	qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
			entry_compare);

	/* first pass - collect all huge pages */
	for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {

		entry = &e_local[mz_iter];

		uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
				entry->mz.hugepage_sz);
		uint64_t offset = entry->mz.addr_64 - start_addr;
		uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
				entry->mz.hugepage_sz);

		if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
			continue;

		int start_page;

		/* find first unused page - mz are phys_addr sorted so we don't have to
		 * look out for holes */
		for (i = 0; i < RTE_DIM(pages); i++) {

			/* skip if we already have this page */
			if (pages[i].addr_64 == start_addr) {
				start_addr += entry->mz.hugepage_sz;
				len -= entry->mz.hugepage_sz;
				continue;
			}
			/* we found a new page */
			else if (pages[i].addr_64 == 0) {
				start_page = i;
				break;
			}
		}
		if (i == RTE_DIM(pages)) {
			RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
			goto fail;
		}

		/* populate however many pages the memzone has */
		for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {

			pages[i].addr_64 = start_addr;
			pages[i].len = entry->mz.hugepage_sz;
			start_addr += entry->mz.hugepage_sz;
			len -= entry->mz.hugepage_sz;
		}
		/* if there's still length left */
		if (len != 0) {
			RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
			goto fail;
		}
	}

	/* second pass - sort pages by size */
	for (i = 0; i < RTE_DIM(pages); i++) {

		if (pages[i].addr == NULL)
			break;

		biggest_len = 0;
		biggest_idx = -1;

		/*
		 * browse all entries starting at 'i', and find the
		 * entry with the smallest addr
		 */
		for (j=i; j< RTE_DIM(pages); j++) {
			if (pages[j].addr == NULL)
					break;
			if (biggest_len == 0 ||
				pages[j].len > biggest_len) {
				biggest_len = pages[j].len;
				biggest_idx = j;
			}
		}

		/* should not happen */
		if (biggest_idx == -1) {
			RTE_LOG(ERR, EAL, "Error sorting by size!\n");
			goto fail;
		}
		if (i != (unsigned) biggest_idx) {
			struct rte_memseg tmp;

			memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));

			/* we don't want to break contiguousness, so instead of just
			 * swapping segments, we move all the preceding segments to the
			 * right and then put the old segment @ biggest_idx in place of
			 * segment @ i */
			for (j = biggest_idx - 1; j >= i; j--) {
				memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
				memset(&pages[j], 0, sizeof(struct rte_memseg));
				if (j == 0)
					break;
			}

			/* put old biggest segment to its new place */
			memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
		}
	}

	/* third pass - write correct offsets */
	for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {

		uint64_t offset = 0;

		entry = &e_local[mz_iter];

		if (entry->mz.addr_64 == 0)
			break;

		/* find page for current memzone */
		for (i = 0; i < RTE_DIM(pages); i++) {
			/* we found our page */
			if (entry->mz.addr_64 >= pages[i].addr_64 &&
					entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
				entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
						offset;
				break;
			}
			offset += pages[i].len;
		}
		if (i == RTE_DIM(pages)) {
			RTE_LOG(ERR, EAL, "Page not found!\n");
			goto fail;
		}
	}

	ms_iter = 0;
	prev_entry = NULL;

	/* fourth pass - create proper memseg cache */
	for (i = 0; i < RTE_DIM(pages) &&
			ms_iter <= RTE_DIM(config->memseg_cache); i++) {
		if (pages[i].addr_64 == 0)
			break;


		if (ms_iter == RTE_DIM(pages)) {
			RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
			goto fail;
		}

		c_entry = &ms_local[ms_iter];
		c_entry->len = pages[i].len;

		if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
			goto fail;

		/* if previous entry has the same filename and is contiguous,
		 * clear current entry and increase previous entry's length
		 */
		if (prev_entry != NULL &&
				strncmp(c_entry->filepath, prev_entry->filepath,
				sizeof(c_entry->filepath)) == 0 &&
				prev_entry->offset + prev_entry->len == c_entry->offset) {
			prev_entry->len += pages[i].len;
			memset(c_entry, 0, sizeof(struct memseg_cache_entry));
		}
		else {
			prev_entry = c_entry;
			ms_iter++;
		}
	}

	/* update current configuration with new valid data */
	memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
	memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));

	free(ms_local);
	free(e_local);

	return 0;
fail:
	free(ms_local);
fail_ms:
	free(e_local);
fail_e:
	return -1;
}
Esempio n. 9
0
int
rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
{
	const struct memseg_cache_entry * ms_cache, *entry;
	struct ivshmem_config * config;
	char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
	char cfg_file_path[PATH_MAX];
	unsigned remaining_len, tmplen, iter;
	uint64_t shared_mem_size, zero_size, total_size;

	if (buffer == NULL || name == NULL)
		return -1;

	config = get_config_by_name(name);

	if (config == NULL) {
		RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
		return -1;
	}

	rte_spinlock_lock(&config->sl);

	/* prepare metadata file path */
	snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
			config->metadata->name);

	ms_cache = config->memseg_cache;

	cmdline_ptr = cmdline;
	remaining_len = sizeof(cmdline);

	shared_mem_size = 0;
	iter = 0;

	while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {

		entry = &ms_cache[iter];

		/* Offset and sizes within the current pathname */
		tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
				entry->filepath, entry->offset, entry->len);

		shared_mem_size += entry->len;

		cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
		remaining_len -= tmplen;

		if (remaining_len == 0) {
			RTE_LOG(ERR, EAL, "Command line too long!\n");
			rte_spinlock_unlock(&config->sl);
			return -1;
		}

		iter++;
	}

	total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
	zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;

	/* add /dev/zero to command-line to fill the space */
	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
			"/dev/zero",
			(uint64_t)0x0,
			zero_size);

	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
	remaining_len -= tmplen;

	if (remaining_len == 0) {
		RTE_LOG(ERR, EAL, "Command line too long!\n");
		rte_spinlock_unlock(&config->sl);
		return -1;
	}

	/* add metadata file to the end of command-line */
	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
			cfg_file_path,
			(uint64_t)0x0,
			METADATA_SIZE_ALIGNED);

	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
	remaining_len -= tmplen;

	if (remaining_len == 0) {
		RTE_LOG(ERR, EAL, "Command line too long!\n");
		rte_spinlock_unlock(&config->sl);
		return -1;
	}

	/* if current length of the command line is bigger than the buffer supplied
	 * by the user, or if command-line is bigger than what IVSHMEM accepts */
	if ((sizeof(cmdline) - remaining_len) > size) {
		RTE_LOG(ERR, EAL, "Buffer is too short!\n");
		rte_spinlock_unlock(&config->sl);
		return -1;
	}
	/* complete the command-line */
	snprintf(buffer, size,
			IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
			total_size >> 20,
			cmdline);

	rte_spinlock_unlock(&config->sl);

	return 0;
}
Esempio n. 10
0
/*
 * This creates the memory mappings in the secondary process to match that of
 * the server process. It goes through each memory segment in the DPDK runtime
 * configuration, mapping them in order to form a contiguous block in the
 * virtual memory space
 */
int
rte_xen_dom0_memory_attach(void)
{
	const struct rte_mem_config *mcfg;
	unsigned s = 0; /* s used to track the segment number */
	int xen_fd = -1;
	int ret = -1;
	void *vir_addr;
	char name[DOM0_NAME_MAX] = {0};
	int page_size = getpagesize();

	mcfg = rte_eal_get_configuration()->mem_config;

	/* Check FD and open once */
	if (xen_fd < 0) {
		xen_fd = open(DOM0_MM_DEV, O_RDWR);
		if (xen_fd < 0) {
			RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
			goto error;
		}
	}

	/* construct memory mangement name for Dom0 */
	snprintf(name, DOM0_NAME_MAX, "%s-%s",
		internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
	/* attach to memory segments of primary process */
	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
	if (ret) {
		RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
		goto error;
	}

	/* map all segments into memory to make sure we get the addrs */
	for (s = 0; s < RTE_MAX_MEMSEG; ++s) {

		/*
		 * the first memory segment with len==0 is the one that
		 * follows the last valid segment.
		 */
		if (mcfg->memseg[s].len == 0)
			break;

		vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
				PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
				s * page_size);
		if (vir_addr == MAP_FAILED) {
			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
				"in %s to requested address [%p]\n",
				(unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
				mcfg->memseg[s].addr);
			goto error;
		}
	}
	return 0;

error:
	if (xen_fd >= 0) {
		close(xen_fd);
		xen_fd = -1;
	}
	return -1;
}
Esempio n. 11
0
/* fills hugepage cache entry for a given start virt_addr */
static int
get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
{
	uint64_t start_addr, end_addr;
	char *start,*path_end;
	char buf[PATH_MAX*2];
	FILE *f;

	start = NULL;
	path_end = NULL;
	start_addr = 0;

	memset(e->filepath, 0, sizeof(e->filepath));

	/* open /proc/self/maps */
	f = fopen("/proc/self/maps", "r");
	if (f == NULL) {
		RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
		return -1;
	}

	/* parse maps */
	while (fgets(buf, sizeof(buf), f) != NULL) {

		/* get endptr to end of start addr */
		start = buf;

		GET_PAGEMAP_ADDR(start,start_addr,'-',
				"Cannot find start address in maps!\n");

		/* if start address is bigger than our address, skip */
		if (start_addr > virt_addr)
			continue;

		GET_PAGEMAP_ADDR(start,end_addr,' ',
				"Cannot find end address in maps!\n");

		/* if end address is less than our address, skip */
		if (end_addr <= virt_addr)
			continue;

		/* find where the path starts */
		start = strstr(start, "/");

		if (start == NULL)
			continue;

		/* at this point, we know that this is our map.
		 * now let's find the file */
		path_end = strstr(start, "\n");
		break;
	}

	if (path_end == NULL) {
		RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
		goto error;
	}

	/* calculate offset and copy the file path */
	snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);

	e->offset = virt_addr - start_addr;

	fclose(f);

	return 0;
error:
	fclose(f);
	return -1;
}
Esempio n. 12
0
int
rte_xen_dom0_memory_init(void)
{
	void *vir_addr, *vma_addr = NULL;
	int err, ret = 0;
	uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
	size_t vma_len = 0;
	struct memory_info meminfo;
	struct memseg_info seginfo[RTE_MAX_MEMSEG];
	int flags, page_size = getpagesize();
	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
	struct rte_memseg *memseg = mcfg->memseg;
	uint64_t total_mem = internal_config.memory;

	memset(seginfo, 0, sizeof(seginfo));
	memset(&meminfo, 0, sizeof(struct memory_info));

	mem_size = get_xen_memory_size();
	requested = (unsigned) (total_mem / 0x100000);
	if (requested > mem_size)
		/* if we didn't satisfy total memory requirements */
		rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
				" available: %uMB\n", requested, mem_size);
	else if (total_mem != 0)
		mem_size = requested;

	/* Check FD and open once */
	if (xen_fd < 0) {
		xen_fd = open(DOM0_MM_DEV, O_RDWR);
		if (xen_fd < 0) {
			RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
			return -1;
		}
	}

	meminfo.size = mem_size;

	/* construct memory mangement name for Dom0 */
	snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
		internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);

	/* Notify kernel driver to allocate memory */
	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
	if (ret < 0) {
		RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
		err = -EIO;
		goto fail;
	}

	/* Get number of memory segment from driver */
	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
	if (ret < 0) {
		RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
		err = -EIO;
		goto fail;
	}

	if(num_memseg > RTE_MAX_MEMSEG){
		RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
			" than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
		err = -EIO;
		goto fail;
	}

	/* get all memory segements information */
	ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
	if (ret < 0) {
		RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
		err = -EIO;
		goto fail;
	}

	/* map all memory segments to contiguous user space */
	for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
	{
		vma_len = seginfo[memseg_idx].size;

		/**
		 * get the biggest virtual memory area up to vma_len. If it fails,
		 * vma_addr is NULL, so let the kernel provide the address.
		 */
		vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
		if (vma_addr == NULL) {
			flags = MAP_SHARED;
			vma_len = RTE_PGSIZE_2M;
		} else
			flags = MAP_SHARED | MAP_FIXED;

		seginfo[memseg_idx].size = vma_len;
		vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
			PROT_READ|PROT_WRITE, flags, xen_fd,
			memseg_idx * page_size);
		if (vir_addr == MAP_FAILED) {
			RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
				DOM0_MM_DEV);
			err = -EIO;
			goto fail;
		}

		memseg[memseg_idx].addr = vir_addr;
		memseg[memseg_idx].phys_addr = page_size *
			seginfo[memseg_idx].pfn ;
		memseg[memseg_idx].len = seginfo[memseg_idx].size;
		for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
			memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];

		/* MFNs are continuous in 2M, so assume that page size is 2M */
		memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;

		memseg[memseg_idx].nchannel = mcfg->nchannel;
		memseg[memseg_idx].nrank = mcfg->nrank;

		/* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
		memseg[memseg_idx].socket_id = 0;
	}

	return 0;
fail:
	if (xen_fd > 0) {
		close(xen_fd);
		xen_fd = -1;
	}
	return err;
}
Esempio n. 13
0
int
cperf_verify_test_runner(void *test_ctx)
{
	struct cperf_verify_ctx *ctx = test_ctx;

	uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0;
	uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0;
	uint64_t ops_failed = 0;

	static int only_once;

	uint64_t i;
	uint16_t ops_unused = 0;

	struct rte_crypto_op *ops[ctx->options->max_burst_size];
	struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];

	uint32_t lcore = rte_lcore_id();

#ifdef CPERF_LINEARIZATION_ENABLE
	struct rte_cryptodev_info dev_info;
	int linearize = 0;

	/* Check if source mbufs require coalescing */
	if (ctx->options->segment_sz < ctx->options->max_buffer_size) {
		rte_cryptodev_info_get(ctx->dev_id, &dev_info);
		if ((dev_info.feature_flags &
				RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
			linearize = 1;
	}
#endif /* CPERF_LINEARIZATION_ENABLE */

	ctx->lcore_id = lcore;

	if (!ctx->options->csv)
		printf("\n# Running verify test on device: %u, lcore: %u\n",
			ctx->dev_id, lcore);

	uint16_t iv_offset = sizeof(struct rte_crypto_op) +
		sizeof(struct rte_crypto_sym_op);

	while (ops_enqd_total < ctx->options->total_ops) {

		uint16_t burst_size = ((ops_enqd_total + ctx->options->max_burst_size)
				<= ctx->options->total_ops) ?
						ctx->options->max_burst_size :
						ctx->options->total_ops -
						ops_enqd_total;

		uint16_t ops_needed = burst_size - ops_unused;

		/* Allocate objects containing crypto operations and mbufs */
		if (rte_mempool_get_bulk(ctx->pool, (void **)ops,
					ops_needed) != 0) {
			RTE_LOG(ERR, USER1,
				"Failed to allocate more crypto operations "
				"from the the crypto operation pool.\n"
				"Consider increasing the pool size "
				"with --pool-sz\n");
			return -1;
		}

		/* Setup crypto op, attach mbuf etc */
		(ctx->populate_ops)(ops, ctx->src_buf_offset,
				ctx->dst_buf_offset,
				ops_needed, ctx->sess, ctx->options,
				ctx->test_vector, iv_offset);


		/* Populate the mbuf with the test vector, for verification */
		for (i = 0; i < ops_needed; i++)
			cperf_mbuf_set(ops[i]->sym->m_src,
					ctx->options,
					ctx->test_vector);

#ifdef CPERF_LINEARIZATION_ENABLE
		if (linearize) {
			/* PMD doesn't support scatter-gather and source buffer
			 * is segmented.
			 * We need to linearize it before enqueuing.
			 */
			for (i = 0; i < burst_size; i++)
				rte_pktmbuf_linearize(ops[i]->sym->m_src);
		}
#endif /* CPERF_LINEARIZATION_ENABLE */

		/* Enqueue burst of ops on crypto device */
		ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
				ops, burst_size);
		if (ops_enqd < burst_size)
			ops_enqd_failed++;

		/**
		 * Calculate number of ops not enqueued (mainly for hw
		 * accelerators whose ingress queue can fill up).
		 */
		ops_unused = burst_size - ops_enqd;
		ops_enqd_total += ops_enqd;


		/* Dequeue processed burst of ops from crypto device */
		ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
				ops_processed, ctx->options->max_burst_size);

		if (ops_deqd == 0) {
			/**
			 * Count dequeue polls which didn't return any
			 * processed operations. This statistic is mainly
			 * relevant to hw accelerators.
			 */
			ops_deqd_failed++;
			continue;
		}

		for (i = 0; i < ops_deqd; i++) {
			if (cperf_verify_op(ops_processed[i], ctx->options,
						ctx->test_vector))
				ops_failed++;
		}
		/* Free crypto ops so they can be reused. */
		rte_mempool_put_bulk(ctx->pool,
					(void **)ops_processed, ops_deqd);
		ops_deqd_total += ops_deqd;
	}

	/* Dequeue any operations still in the crypto device */

	while (ops_deqd_total < ctx->options->total_ops) {
		/* Sending 0 length burst to flush sw crypto device */
		rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);

		/* dequeue burst */
		ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
				ops_processed, ctx->options->max_burst_size);
		if (ops_deqd == 0) {
			ops_deqd_failed++;
			continue;
		}

		for (i = 0; i < ops_deqd; i++) {
			if (cperf_verify_op(ops_processed[i], ctx->options,
						ctx->test_vector))
				ops_failed++;
		}
		/* Free crypto ops so they can be reused. */
		rte_mempool_put_bulk(ctx->pool,
					(void **)ops_processed, ops_deqd);
		ops_deqd_total += ops_deqd;
	}

	if (!ctx->options->csv) {
		if (!only_once)
			printf("%12s%12s%12s%12s%12s%12s%12s%12s\n\n",
				"lcore id", "Buf Size", "Burst size",
				"Enqueued", "Dequeued", "Failed Enq",
				"Failed Deq", "Failed Ops");
		only_once = 1;

		printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64
				"%12"PRIu64"%12"PRIu64"\n",
				ctx->lcore_id,
				ctx->options->max_buffer_size,
				ctx->options->max_burst_size,
				ops_enqd_total,
				ops_deqd_total,
				ops_enqd_failed,
				ops_deqd_failed,
				ops_failed);
	} else {
		if (!only_once)
			printf("\n# lcore id, Buffer Size(B), "
				"Burst Size,Enqueued,Dequeued,Failed Enq,"
				"Failed Deq,Failed Ops\n");
		only_once = 1;

		printf("%10u;%10u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";"
				"%"PRIu64"\n",
				ctx->lcore_id,
				ctx->options->max_buffer_size,
				ctx->options->max_burst_size,
				ops_enqd_total,
				ops_deqd_total,
				ops_enqd_failed,
				ops_deqd_failed,
				ops_failed);
	}

	return 0;
}
Esempio n. 14
0
int main(int argc, char ** argv)
{
    int ret, socket;
    unsigned pid, nb_ports, lcore_id, rx_lcore_id;
    struct sock_parameter sk_param;
    struct sock *sk;
    struct txrx_queue *rxq;
    struct port_queue_conf *port_q;
    struct lcore_queue_conf *lcore_q;

    ret = rte_eal_init(argc, argv);
    if (ret < 0)
        return -1;
    argc -= ret;
    argv += ret;

    /*parse gw ip and mac from cmdline*/
    if (argc > 1) {
        default_host_addr = argv[1];
        if (argc == 3)
            default_gw_addr = argv[2];
        else if (argc == 4)
            default_gw_mac = argv[3];
        else
            rte_exit(EXIT_FAILURE, "invalid arguments\n");
    }

    /*config nic*/
    nb_ports = rte_eth_dev_count();
    if (nb_ports == 0)
        rte_exit(EXIT_FAILURE, "No available NIC\n");
    for (pid = 0; pid < nb_ports; pid++) {
        ret = net_device_init(pid);
        if (ret) {
            RTE_LOG(WARNING, LDNS, "fail to initialize port %u\n", pid);
            goto release_net_device;
        }
    }
    pkt_rx_pool = rte_pktmbuf_pool_create("ldns rx pkt pool",
            PKT_RX_NB,
            32,
            0,
            RTE_MBUF_DEFAULT_BUF_SIZE,
            rte_socket_id());
    if (pkt_rx_pool == NULL)
        rte_exit(EXIT_FAILURE, "cannot alloc rx_mbuf_pool");
    
    /*sock create*/
    sk_param.mode = SOCK_MODE_COMPLETE;
    sk_param.func = dns_process;
    sk = create_sock(0, SOCK_PTOTO_IPPROTO_UDP, &sk_param);
    if (sk == NULL)
        rte_exit(EXIT_FAILURE, "cannot create sock\n");
    if (sock_bind(sk, inet_network(default_host_addr), DNS_PORT))
        rte_exit(EXIT_FAILURE, "cannot bind addr:%s port:%u",
                default_host_addr, DNS_PORT);

    /*init ethdev*/
    lcore_id = 0;
    lcore_q = lcore_q_conf_get(lcore_id);
    for (pid = 0; pid < nb_ports; pid++) {
        port_q = port_q_conf_get(pid);
        ret = rte_eth_dev_configure(pid, rx_rings, tx_rings, &default_rte_eth_conf);
        if (ret != 0)
            rte_exit(EXIT_FAILURE, "port %u configure error\n", pid);

        while (rx_lcore_id == rte_get_master_lcore()
                || !rte_lcore_is_enabled(rx_lcore_id)
                || lcore_q->nb_rxq == nb_rx_queue_per_core) {
            rx_lcore_id++;
            if (rx_lcore_id == RTE_MAX_LCORE)
                rte_exit(EXIT_FAILURE, "not enough core for port %u\n", pid);
            lcore_q = lcore_q_conf_get(lcore_id);
        }

        rxq = &lcore_q->rxq[lcore_q->nb_rxq];
        rxq->port = pid;
        rxq->lcore = rx_lcore_id;
        rxq->qid = port_q->nb_rxq;
        lcore_q->nb_rxq++;
        port_q->nb_rxq++;

        socket = rte_lcore_to_socket_id(rx_lcore_id);
        if (socket == SOCKET_ID_ANY)
            socket = 0;

        ret = rte_eth_tx_queue_setup(pid, rxq->qid, nb_txd, socket, NULL);
        if (ret < 0)
            rte_exit(EXIT_FAILURE, "fail to setup txq %u on port %u",
                    rxq->qid, pid);
        ret = rte_eth_rx_queue_setup(pid, rxq->qid, nb_rxd, socket, NULL, pkt_rx_pool);
        if (ret < 0)
            rte_exit(EXIT_FAILURE, "failt to setup rxq %u on port %u",
                    rxq->qid, pid);

        ret = rte_eth_dev_start(pid);
        if (ret < 0)
            rte_exit(EXIT_FAILURE, "fail to start port %u\n", pid);
    }

	if (dns_set_cfg(&default_dns_cfg))
		rte_exit(EXIT_FAILURE, "fail to set dns configuration%u\n", pid);

    rte_eal_mp_remote_launch(packet_launch_one_lcore, NULL, SKIP_MASTER);
    RTE_LCORE_FOREACH_SLAVE(lcore_id) {
        if (rte_eal_wait_lcore(lcore_id) < 0)
            return -1;
    }

    return 0;

release_net_device:
    for (pid; pid != 0; pid--) {
        net_device_release(pid - 1);
    }
    return -1;
}
/*
 * socket listening thread for primary process
 */
static __attribute__((noreturn)) void *
vfio_mp_sync_thread(void __rte_unused * arg)
{
	int ret, fd, vfio_group_no;

	/* wait for requests on the socket */
	for (;;) {
		int conn_sock;
		struct sockaddr_un addr;
		socklen_t sockaddr_len = sizeof(addr);

		/* this is a blocking call */
		conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
				&sockaddr_len);

		/* just restart on error */
		if (conn_sock == -1)
			continue;

		/* set socket to linger after close */
		struct linger l;
		l.l_onoff = 1;
		l.l_linger = 60;

		if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
			RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
					"on listen socket (%s)\n", strerror(errno));

		ret = vfio_mp_sync_receive_request(conn_sock);

		switch (ret) {
		case SOCKET_REQ_CONTAINER:
			fd = vfio_get_container_fd();
			if (fd < 0)
				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
			else
				vfio_mp_sync_send_fd(conn_sock, fd);
			break;
		case SOCKET_REQ_GROUP:
			/* wait for group number */
			vfio_group_no = vfio_mp_sync_receive_request(conn_sock);
			if (vfio_group_no < 0) {
				close(conn_sock);
				continue;
			}

			fd = vfio_get_group_fd(vfio_group_no);

			if (fd < 0)
				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
			/* if VFIO group exists but isn't bound to VFIO driver */
			else if (fd == 0)
				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
			/* if group exists and is bound to VFIO driver */
			else {
				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
				vfio_mp_sync_send_fd(conn_sock, fd);
			}
			break;
		default:
			vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
			break;
		}
		close(conn_sock);
	}
}
Esempio n. 16
0
static struct virtio_net*
numa_realloc(struct virtio_net *dev, int index)
{
	int oldnode, newnode;
	struct virtio_net *old_dev;
	struct vhost_virtqueue *old_vq, *vq;
	int ret;

	/*
	 * vq is allocated on pairs, we should try to do realloc
	 * on first queue of one queue pair only.
	 */
	if (index % VIRTIO_QNUM != 0)
		return dev;

	old_dev = dev;
	vq = old_vq = dev->virtqueue[index];

	ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
			    MPOL_F_NODE | MPOL_F_ADDR);

	/* check if we need to reallocate vq */
	ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
			     MPOL_F_NODE | MPOL_F_ADDR);
	if (ret) {
		RTE_LOG(ERR, VHOST_CONFIG,
			"Unable to get vq numa information.\n");
		return dev;
	}
	if (oldnode != newnode) {
		RTE_LOG(INFO, VHOST_CONFIG,
			"reallocate vq from %d to %d node\n", oldnode, newnode);
		vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0,
				       newnode);
		if (!vq)
			return dev;

		memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM);
		rte_free(old_vq);
	}

	/* check if we need to reallocate dev */
	ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
			    MPOL_F_NODE | MPOL_F_ADDR);
	if (ret) {
		RTE_LOG(ERR, VHOST_CONFIG,
			"Unable to get dev numa information.\n");
		goto out;
	}
	if (oldnode != newnode) {
		RTE_LOG(INFO, VHOST_CONFIG,
			"reallocate dev from %d to %d node\n",
			oldnode, newnode);
		dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
		if (!dev) {
			dev = old_dev;
			goto out;
		}

		memcpy(dev, old_dev, sizeof(*dev));
		rte_free(old_dev);
	}

out:
	dev->virtqueue[index] = vq;
	dev->virtqueue[index + 1] = vq + 1;
	vhost_devices[dev->device_fh] = dev;

	return dev;
}
Esempio n. 17
0
void
app_main_loop_worker_pipeline_lpm_ipv6(void) {
	struct rte_pipeline_params pipeline_params = {
		.name = "pipeline",
		.socket_id = rte_socket_id(),
	};

	struct rte_pipeline *p;
	uint32_t port_in_id[APP_MAX_PORTS];
	uint32_t port_out_id[APP_MAX_PORTS];
	uint32_t table_id;
	uint32_t i;

	RTE_LOG(INFO, USER1,
		"Core %u is doing work (pipeline with IPv6 LPM table)\n",
		rte_lcore_id());

	/* Pipeline configuration */
	p = rte_pipeline_create(&pipeline_params);
	if (p == NULL)
		rte_panic("Unable to configure the pipeline\n");

	/* Input port configuration */
	for (i = 0; i < app.n_ports; i++) {
		struct rte_port_ring_reader_params port_ring_params = {
			.ring = app.rings_rx[i],
		};

		struct rte_pipeline_port_in_params port_params = {
			.ops = &rte_port_ring_reader_ops,
			.arg_create = (void *) &port_ring_params,
			.f_action = NULL,
			.arg_ah = NULL,
			.burst_size = app.burst_size_worker_read,
		};

		if (rte_pipeline_port_in_create(p, &port_params,
			&port_in_id[i]))
			rte_panic("Unable to configure input port for "
				"ring %d\n", i);
	}

	/* Output port configuration */
	for (i = 0; i < app.n_ports; i++) {
		struct rte_port_ring_writer_params port_ring_params = {
			.ring = app.rings_tx[i],
			.tx_burst_sz = app.burst_size_worker_write,
		};

		struct rte_pipeline_port_out_params port_params = {
			.ops = &rte_port_ring_writer_ops,
			.arg_create = (void *) &port_ring_params,
			.f_action = NULL,
			.arg_ah = NULL,
		};

		if (rte_pipeline_port_out_create(p, &port_params,
			&port_out_id[i]))
			rte_panic("Unable to configure output port for "
				"ring %d\n", i);
	}

	/* Table configuration */
	{
		struct rte_table_lpm_ipv6_params table_lpm_ipv6_params = {
			.name = "LPM",
			.n_rules = 1 << 24,
			.number_tbl8s = 1 << 21,
			.entry_unique_size =
				sizeof(struct rte_pipeline_table_entry),
			.offset = APP_METADATA_OFFSET(32),
		};

		struct rte_pipeline_table_params table_params = {
			.ops = &rte_table_lpm_ipv6_ops,
			.arg_create = &table_lpm_ipv6_params,
			.f_action_hit = NULL,
			.f_action_miss = NULL,
			.arg_ah = NULL,
			.action_data_size = 0,
		};

		if (rte_pipeline_table_create(p, &table_params, &table_id))
			rte_panic("Unable to configure the IPv6 LPM table\n");
	}

	/* Interconnecting ports and tables */
	for (i = 0; i < app.n_ports; i++)
		if (rte_pipeline_port_in_connect_to_table(p, port_in_id[i],
			table_id))
			rte_panic("Unable to connect input port %u to "
				"table %u\n", port_in_id[i],  table_id);

	/* Add entries to tables */
	for (i = 0; i < app.n_ports; i++) {
		struct rte_pipeline_table_entry entry = {
			.action = RTE_PIPELINE_ACTION_PORT,
			{.port_id = port_out_id[i & (app.n_ports - 1)]},
		};

		struct rte_table_lpm_ipv6_key key;
		struct rte_pipeline_table_entry *entry_ptr;
		uint32_t ip;
		int key_found, status;

		key.depth = 8 + __builtin_popcount(app.n_ports - 1);

		ip = rte_bswap32(i << (24 -
			__builtin_popcount(app.n_ports - 1)));
		memcpy(key.ip, &ip, sizeof(uint32_t));

		printf("Adding rule to IPv6 LPM table (IPv6 destination = "
			"%.2x%.2x:%.2x%.2x:%.2x%.2x:%.2x%.2x:"
			"%.2x%.2x:%.2x%.2x:%.2x%.2x:%.2x%.2x/%u => "
			"port out = %u)\n",
			key.ip[0], key.ip[1], key.ip[2], key.ip[3],
			key.ip[4], key.ip[5], key.ip[6], key.ip[7],
			key.ip[8], key.ip[9], key.ip[10], key.ip[11],
			key.ip[12], key.ip[13], key.ip[14], key.ip[15],
			key.depth, i);

		status = rte_pipeline_table_entry_add(p, table_id, &key, &entry,
			&key_found, &entry_ptr);
		if (status < 0)
			rte_panic("Unable to add entry to table %u (%d)\n",
				table_id, status);
	}

	/* Enable input ports */
	for (i = 0; i < app.n_ports; i++)
		if (rte_pipeline_port_in_enable(p, port_in_id[i]))
			rte_panic("Unable to enable input port %u\n",
				port_in_id[i]);

	/* Check pipeline consistency */
	if (rte_pipeline_check(p) < 0)
		rte_panic("Pipeline consistency check failed\n");

	/* Run-time */
#if APP_FLUSH == 0
	for ( ; ; )
		rte_pipeline_run(p);
#else
	for (i = 0; ; i++) {
		rte_pipeline_run(p);

		if ((i & APP_FLUSH) == 0)
			rte_pipeline_flush(p);
	}
#endif
}
Esempio n. 18
0
/*
 * Application main function - loops through
 * receiving and processing packets. Never returns
 */
int
main(int argc, char *argv[])
{
    const struct rte_memzone *mz;
    struct rte_ring *rx_ring;
    struct rte_mempool *mp;
    struct port_info *ports;
    int need_flush = 0; /* indicates whether we have unsent packets */
    int retval;
    void *pkts[PKT_READ_SIZE];
    uint16_t sent;

    if ((retval = rte_eal_init(argc, argv)) < 0)
        return -1;
    argc -= retval;
    argv += retval;

    if (parse_app_args(argc, argv) < 0)
        rte_exit(EXIT_FAILURE, "Invalid command-line arguments\n");

    if (rte_eth_dev_count() == 0)
        rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");

    rx_ring = rte_ring_lookup(get_rx_queue_name(client_id));
    if (rx_ring == NULL)
        rte_exit(EXIT_FAILURE, "Cannot get RX ring - is server process running?\n");

    mp = rte_mempool_lookup(PKTMBUF_POOL_NAME);
    if (mp == NULL)
        rte_exit(EXIT_FAILURE, "Cannot get mempool for mbufs\n");

    mz = rte_memzone_lookup(MZ_PORT_INFO);
    if (mz == NULL)
        rte_exit(EXIT_FAILURE, "Cannot get port info structure\n");
    ports = mz->addr;
    tx_stats = &(ports->tx_stats[client_id]);

    configure_output_ports(ports);

    RTE_LOG(INFO, APP, "Finished Process Init.\n");

    printf("\nClient process %d handling packets\n", client_id);
    printf("[Press Ctrl-C to quit ...]\n");

    for (;;) {
        uint16_t i, rx_pkts = PKT_READ_SIZE;
        uint8_t port;

        /* try dequeuing max possible packets first, if that fails, get the
         * most we can. Loop body should only execute once, maximum */
        while (rx_pkts > 0 &&
                unlikely(rte_ring_dequeue_bulk(rx_ring, pkts, rx_pkts) != 0))
            rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), PKT_READ_SIZE);

        if (unlikely(rx_pkts == 0)) {
            if (need_flush)
                for (port = 0; port < ports->num_ports; port++) {
                    sent = rte_eth_tx_buffer_flush(ports->id[port], client_id,
                                                   tx_buffer[port]);
                    if (unlikely(sent))
                        tx_stats->tx[port] += sent;
                }
            need_flush = 0;
            continue;
        }

        for (i = 0; i < rx_pkts; i++)
            handle_packet(pkts[i]);

        need_flush = 1;
    }
}
Esempio n. 19
0
/* ring_grp usage:
 * [0] = default completion ring
 * [1 -> +rx_cp_nr_rings] = rx_cp, rx rings
 * [1+rx_cp_nr_rings + 1 -> +tx_cp_nr_rings] = tx_cp, tx rings
 */
int bnxt_alloc_hwrm_rings(struct bnxt *bp)
{
	unsigned int i;
	int rc = 0;

	/* Default completion ring */
	{
		struct bnxt_cp_ring_info *cpr = bp->def_cp_ring;
		struct bnxt_ring *cp_ring = cpr->cp_ring_struct;

		rc = bnxt_hwrm_ring_alloc(bp, cp_ring,
					  HWRM_RING_ALLOC_INPUT_RING_TYPE_CMPL,
					  0, HWRM_NA_SIGNATURE);
		if (rc)
			goto err_out;
		cpr->cp_doorbell =
		    (char *)bp->eth_dev->pci_dev->mem_resource[2].addr;
		B_CP_DIS_DB(cpr, cpr->cp_raw_cons);
		bp->grp_info[0].cp_fw_ring_id = cp_ring->fw_ring_id;
	}

	for (i = 0; i < bp->rx_cp_nr_rings; i++) {
		struct bnxt_rx_queue *rxq = bp->rx_queues[i];
		struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
		struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
		struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
		struct bnxt_ring *ring = rxr->rx_ring_struct;
		unsigned int idx = i + 1;

		/* Rx cmpl */
		rc = bnxt_hwrm_ring_alloc(bp, cp_ring,
					HWRM_RING_ALLOC_INPUT_RING_TYPE_CMPL,
					idx, HWRM_NA_SIGNATURE);
		if (rc)
			goto err_out;
		cpr->cp_doorbell =
		    (char *)bp->eth_dev->pci_dev->mem_resource[2].addr +
		    idx * 0x80;
		bp->grp_info[idx].cp_fw_ring_id = cp_ring->fw_ring_id;
		B_CP_DIS_DB(cpr, cpr->cp_raw_cons);

		/* Rx ring */
		rc = bnxt_hwrm_ring_alloc(bp, ring,
					HWRM_RING_ALLOC_INPUT_RING_TYPE_RX,
					idx, cpr->hw_stats_ctx_id);
		if (rc)
			goto err_out;
		rxr->rx_prod = 0;
		rxr->rx_doorbell =
		    (char *)bp->eth_dev->pci_dev->mem_resource[2].addr +
		    idx * 0x80;
		bp->grp_info[idx].rx_fw_ring_id = ring->fw_ring_id;
		B_RX_DB(rxr->rx_doorbell, rxr->rx_prod);
		if (bnxt_init_one_rx_ring(rxq)) {
			RTE_LOG(ERR, PMD, "bnxt_init_one_rx_ring failed!");
			bnxt_rx_queue_release_op(rxq);
			return -ENOMEM;
		}
		B_RX_DB(rxr->rx_doorbell, rxr->rx_prod);
	}

	for (i = 0; i < bp->tx_cp_nr_rings; i++) {
		struct bnxt_tx_queue *txq = bp->tx_queues[i];
		struct bnxt_cp_ring_info *cpr = txq->cp_ring;
		struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
		struct bnxt_tx_ring_info *txr = txq->tx_ring;
		struct bnxt_ring *ring = txr->tx_ring_struct;
		unsigned int idx = 1 + bp->rx_cp_nr_rings + i;

		/* Tx cmpl */
		rc = bnxt_hwrm_ring_alloc(bp, cp_ring,
					HWRM_RING_ALLOC_INPUT_RING_TYPE_CMPL,
					idx, HWRM_NA_SIGNATURE);
		if (rc)
			goto err_out;

		cpr->cp_doorbell =
		    (char *)bp->eth_dev->pci_dev->mem_resource[2].addr +
		    idx * 0x80;
		bp->grp_info[idx].cp_fw_ring_id = cp_ring->fw_ring_id;
		B_CP_DIS_DB(cpr, cpr->cp_raw_cons);

		/* Tx ring */
		rc = bnxt_hwrm_ring_alloc(bp, ring,
					HWRM_RING_ALLOC_INPUT_RING_TYPE_TX,
					idx, cpr->hw_stats_ctx_id);
		if (rc)
			goto err_out;

		txr->tx_doorbell =
		    (char *)bp->eth_dev->pci_dev->mem_resource[2].addr +
		    idx * 0x80;
	}

err_out:
	return rc;
}
Esempio n. 20
0
/*
 * Mmap all hugepages of hugepage table: it first open a file in
 * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
 * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
 * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
 * map continguous physical blocks in contiguous virtual blocks.
 */
static int
map_all_hugepages(struct hugepage *hugepg_tbl,
		struct hugepage_info *hpi, int orig)
{
	int fd;
	unsigned i;
	void *virtaddr;
	void *vma_addr = NULL;
	size_t vma_len = 0;

	for (i = 0; i < hpi->num_pages[0]; i++) {
		size_t hugepage_sz = hpi->hugepage_sz;

		if (orig) {
			hugepg_tbl[i].file_id = i;
			hugepg_tbl[i].size = hugepage_sz;
			eal_get_hugefile_path(hugepg_tbl[i].filepath,
					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
					hugepg_tbl[i].file_id);
			hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
		}
#ifndef RTE_ARCH_X86_64
		/* for 32-bit systems, don't remap 1G pages, just reuse original
		 * map address as final map address.
		 */
		else if (hugepage_sz == RTE_PGSIZE_1G){
			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
			hugepg_tbl[i].orig_va = NULL;
			continue;
		}
#endif
		else if (vma_len == 0) {
			unsigned j, num_pages;

			/* reserve a virtual area for next contiguous
			 * physical block: count the number of
			 * contiguous physical pages. */
			for (j = i+1; j < hpi->num_pages[0] ; j++) {
				if (hugepg_tbl[j].physaddr !=
				    hugepg_tbl[j-1].physaddr + hugepage_sz)
					break;
			}
			num_pages = j - i;
			vma_len = num_pages * hugepage_sz;

			/* get the biggest virtual memory area up to
			 * vma_len. If it fails, vma_addr is NULL, so
			 * let the kernel provide the address. */
			vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
			if (vma_addr == NULL)
				vma_len = hugepage_sz;
		}

		/* try to create hugepage file */
		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
		if (fd < 0) {
			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
					strerror(errno));
			return -1;
		}

		virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
				MAP_SHARED, fd, 0);
		if (virtaddr == MAP_FAILED) {
			RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__,
					strerror(errno));
			close(fd);
			return -1;
		}

		if (orig) {
			hugepg_tbl[i].orig_va = virtaddr;
			memset(virtaddr, 0, hugepage_sz);
		}
		else {
			hugepg_tbl[i].final_va = virtaddr;
		}

		/* set shared flock on the file. */
		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
				__func__, strerror(errno));
			close(fd);
			return -1;
		}

		close(fd);

		vma_addr = (char *)vma_addr + hugepage_sz;
		vma_len -= hugepage_sz;
	}
	return 0;
}
Esempio n. 21
0
/*
 * This function maps grant node of vring or mbuf pool to a continuous virtual address space,
 * and returns mapped address, pfn array, index array
 * @param gntnode
 *  Pointer to grant node
 * @param domid
 *  Guest domain id
 * @param ppfn
 *  Pointer to pfn array, caller should free this array
 * @param pgs
 *  Pointer to number of pages
 * @param ppindex
 *  Pointer to index array, used to release grefs when to free this node
 * @return
 *  Pointer to mapped virtual address, NULL on failure
 */
static void *
map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex)
{
	struct xen_gnt *gnt;
	uint32_t i, j;
	size_t total_pages = 0;
	void *addr;
	uint32_t *pfn;
	uint64_t *pindex;
	uint32_t pfn_num = 0;
	int pg_sz;

	if (gntnode == NULL)
		return NULL;

	pg_sz = getpagesize();
	for (i = 0; i < gntnode->gnt_num; i++) {
		gnt = gntnode->gnt_info + i;
		total_pages += cal_pagenum(gnt);
	}
	if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) {
		RTE_LOG(ERR, XENHOST, "  %s: failed get_xen_virtual\n", __func__);
		return NULL;
	}
	pfn = calloc(total_pages, (size_t)sizeof(uint32_t));
	pindex = calloc(total_pages, (size_t)sizeof(uint64_t));
	if (pfn == NULL || pindex == NULL) {
		free_xen_virtual(addr, total_pages * pg_sz, pg_sz);
		free(pfn);
		free(pindex);
		return NULL;
	}

	RTE_LOG(INFO, XENHOST, "    %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1));
	for (i = 0; i < gntnode->gnt_num; i++) {
		gnt = gntnode->gnt_info + i;
		for (j = 0; j < (PAGE_PFNNUM) / 2; j++) {
			if ((gnt->gref_pfn[j * 2].gref) <= 0)
				goto _end;
			/*alternative: batch map, or through libxc*/
			if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz),
					domid,
					gnt->gref_pfn[j * 2].gref,
					&pindex[pfn_num]) == NULL) {
				goto mmap_failed;
			}
			pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num;
			pfn_num++;
		}
	}

mmap_failed:
	if (pfn_num)
		munmap(addr, pfn_num * pg_sz);
	for (i = 0; i < pfn_num; i++) {
		xen_unmap_grant_ref(pindex[i]);
	}
	free(pindex);
	free(pfn);
	return NULL;

_end:
	if (ppindex)
		*ppindex = pindex;
	else
		free(pindex);
	if (ppfn)
		*ppfn = pfn;
	else
		free(pfn);
	if (pgs)
		*pgs = total_pages;

	return addr;
}
Esempio n. 22
0
/*
 * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
 * page.
 */
static int
find_numasocket(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
{
	int socket_id;
	char *end, *nodestr;
	unsigned i, hp_count = 0;
	uint64_t virt_addr;
	char buf[BUFSIZ];
	char hugedir_str[PATH_MAX];
	FILE *f;

	f = fopen("/proc/self/numa_maps", "r");
	if (f == NULL) {
		RTE_LOG(INFO, EAL, "cannot open /proc/self/numa_maps,"
				" consider that all memory is in socket_id 0\n");
		return 0;
	}

	rte_snprintf(hugedir_str, sizeof(hugedir_str),
			"%s/", hpi->hugedir);

	/* parse numa map */
	while (fgets(buf, sizeof(buf), f) != NULL) {

		/* ignore non huge page */
		if (strstr(buf, " huge ") == NULL &&
				strstr(buf, hugedir_str) == NULL)
			continue;

		/* get zone addr */
		virt_addr = strtoull(buf, &end, 16);
		if (virt_addr == 0 || end == buf) {
			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
			goto error;
		}

		/* get node id (socket id) */
		nodestr = strstr(buf, " N");
		if (nodestr == NULL) {
			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
			goto error;
		}
		nodestr += 2;
		end = strstr(nodestr, "=");
		if (end == NULL) {
			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
			goto error;
		}
		end[0] = '\0';
		end = NULL;

		socket_id = strtoul(nodestr, &end, 0);
		if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
			RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
			goto error;
		}

		/* if we find this page in our mappings, set socket_id */
		for (i = 0; i < hpi->num_pages[0]; i++) {
			void *va = (void *)(unsigned long)virt_addr;
			if (hugepg_tbl[i].orig_va == va) {
				hugepg_tbl[i].socket_id = socket_id;
				hp_count++;
			}
		}
	}

	if (hp_count < hpi->num_pages[0])
		goto error;

	fclose(f);
	return 0;

error:
	fclose(f);
	return -1;
}
Esempio n. 23
0
int
parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx)
{
	char path[PATH_MAX] = {0};
	struct xen_gntnode *rx_gntnode = NULL;
	struct xen_gntnode *tx_gntnode = NULL;
	struct xen_vring *vring = NULL;

	/*check if null terminated */
	snprintf(path, sizeof(path),
		XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX,
		guest->dom_id,
		virtio_idx);

	RTE_LOG(INFO, XENHOST, "  %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path);
	rx_gntnode = parse_gntnode(guest->dom_id, path);
	if (rx_gntnode == NULL)
		goto err;

	/*check if null terminated */
	snprintf(path, sizeof(path),
		XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX,
		guest->dom_id,
		virtio_idx);

	RTE_LOG(INFO, XENHOST, "  %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path);
	tx_gntnode = parse_gntnode(guest->dom_id, path);
	if (tx_gntnode == NULL)
		goto err;

	vring = &guest->vring[virtio_idx];
	bzero(vring, sizeof(*vring));
	vring->dom_id = guest->dom_id;
	vring->virtio_idx = virtio_idx;

	if (xen_parse_etheraddr(vring) != 0)
		goto err;

	RTE_LOG(INFO, XENHOST, "  %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path);
	if (xen_map_rxvringnode(rx_gntnode, vring) != 0)
		goto err;

	RTE_LOG(INFO, XENHOST, "  %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path);
	if (xen_map_txvringnode(tx_gntnode, vring) != 0)
		goto err;

	if (xen_map_vringflag(vring) != 0)
		goto err;

	guest->vring_num++;

	xen_free_gntnode(rx_gntnode);
	xen_free_gntnode(tx_gntnode);

	return 0;

err:
	if (rx_gntnode)
		xen_free_gntnode(rx_gntnode);
	if (tx_gntnode)
		xen_free_gntnode(tx_gntnode);
	if (vring) {
		cleanup_vring(vring);
		bzero(vring, sizeof(*vring));
	}
	return -1;
}
Esempio n. 24
0
/*
 * This function is a NUMA-aware equivalent of calc_num_pages.
 * It takes in the list of hugepage sizes and the
 * number of pages thereof, and calculates the best number of
 * pages of each size to fulfill the request for <memory> ram
 */
static int
calc_num_pages_per_socket(uint64_t * memory,
		struct hugepage_info *hp_info,
		struct hugepage_info *hp_used,
		unsigned num_hp_info)
{
	unsigned socket, j, i = 0;
	unsigned requested, available;
	int total_num_pages = 0;
	uint64_t remaining_mem, cur_mem;
	uint64_t total_mem = internal_config.memory;

	if (num_hp_info == 0)
		return -1;

	for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
		/* if specific memory amounts per socket weren't requested */
		if (internal_config.force_sockets == 0) {
			/* take whatever is available */
			memory[socket] = RTE_MIN(get_socket_mem_size(socket),
					total_mem);
		}
		/* skips if the memory on specific socket wasn't requested */
		for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
			hp_used[i].hugedir = hp_info[i].hugedir;
			hp_used[i].num_pages[socket] = RTE_MIN(
					memory[socket] / hp_info[i].hugepage_sz,
					hp_info[i].num_pages[socket]);

			cur_mem = hp_used[i].num_pages[socket] *
					hp_used[i].hugepage_sz;

			memory[socket] -= cur_mem;
			total_mem -= cur_mem;

			total_num_pages += hp_used[i].num_pages[socket];

			/* check if we have met all memory requests */
			if (memory[socket] == 0)
				break;

			/* check if we have any more pages left at this size, if so
			 * move on to next size */
			if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket])
				continue;
			/* At this point we know that there are more pages available that are
			 * bigger than the memory we want, so lets see if we can get enough
			 * from other page sizes.
			 */
			remaining_mem = 0;
			for (j = i+1; j < num_hp_info; j++)
				remaining_mem += hp_info[j].hugepage_sz *
				hp_info[j].num_pages[socket];

			/* is there enough other memory, if not allocate another page and quit */
			if (remaining_mem < memory[socket]){
				cur_mem = RTE_MIN(memory[socket],
						hp_info[i].hugepage_sz);
				memory[socket] -= cur_mem;
				total_mem -= cur_mem;
				hp_used[i].num_pages[socket]++;
				total_num_pages++;
				break; /* we are done with this socket*/
			}
		}
		/* if we didn't satisfy all memory requirements per socket */
		if (memory[socket] > 0) {
			/* to prevent icc errors */
			requested = (unsigned) (internal_config.socket_mem[socket] /
					0x100000);
			available = requested -
					((unsigned) (memory[socket] / 0x100000));
			RTE_LOG(INFO, EAL, "Not enough memory available on socket %u! "
					"Requested: %uMB, available: %uMB\n", socket,
					requested, available);
			return -1;
		}
	}

	/* if we didn't satisfy total memory requirements */
	if (total_mem > 0) {
		requested = (unsigned) (internal_config.memory / 0x100000);
		available = requested - (unsigned) (total_mem / 0x100000);
		RTE_LOG(INFO, EAL, "Not enough memory available! Requested: %uMB,"
				" available: %uMB\n", requested, available);
		return -1;
	}
	return total_num_pages;
}
Esempio n. 25
0
/*
 * Application main function - loops through
 * receiving and processing packets. Never returns
 */
int
main(int argc, char *argv[])
{
    struct rte_ring *rx_ring = NULL;
    struct rte_ring *tx_ring = NULL;
    int retval = 0;
    void *pkts[PKT_READ_SIZE];
    int rslt = 0;

    if ((retval = rte_eal_init(argc, argv)) < 0) {
        return -1;
    }

    argc -= retval;
    argv += retval;

    if (parse_app_args(argc, argv) < 0) {
        rte_exit(EXIT_FAILURE, "Invalid command-line arguments\n");
    }

    rx_ring = rte_ring_lookup(get_rx_queue_name(client_id));
    if (rx_ring == NULL) {
        rte_exit(EXIT_FAILURE,
            "Cannot get RX ring - is server process running?\n");
    }

    tx_ring = rte_ring_lookup(get_tx_queue_name(client_id));
    if (tx_ring == NULL) {
        rte_exit(EXIT_FAILURE,
            "Cannot get TX ring - is server process running?\n");
    }

    RTE_LOG(INFO, APP, "Finished Process Init.\n");

    printf("\nClient process %d handling packets\n", client_id);
    printf("[Press Ctrl-C to quit ...]\n");

    for (;;) {
        unsigned rx_pkts = PKT_READ_SIZE;

        /* Try dequeuing max possible packets first, if that fails, get the
         * most we can. Loop body should only execute once, maximum.
         */
        while (unlikely(rte_ring_dequeue_bulk(rx_ring, pkts, rx_pkts) != 0) &&
            rx_pkts > 0) {
            rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), PKT_READ_SIZE);
        }

        if (rx_pkts > 0) {
            pkt++;
            /* blocking enqueue */
            do {
                rslt = rte_ring_enqueue_bulk(tx_ring, pkts, rx_pkts);
            } while (rslt == -ENOBUFS);
        } else {
               no_pkt++;
        }

        if (!(pkt %  100000)) {
            printf("pkt %d %d\n", pkt, no_pkt);
            pkt = no_pkt = 0;
        }
    }
}
Esempio n. 26
0
/*
 * Prepare physical memory mapping: fill configuration structure with
 * these infos, return 0 on success.
 *  1. map N huge pages in separate files in hugetlbfs
 *  2. find associated physical addr
 *  3. find associated NUMA socket ID
 *  4. sort all huge pages by physical address
 *  5. remap these N huge pages in the correct order
 *  6. unmap the first mapping
 *  7. fill memsegs in configuration with contiguous zones
 */
static int
rte_eal_hugepage_init(void)
{
	struct rte_mem_config *mcfg;
	struct hugepage *hugepage, *tmp_hp = NULL;
	struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];

	uint64_t memory[RTE_MAX_NUMA_NODES];

	unsigned hp_offset;
	int i, j, new_memseg;
	int nrpages, total_pages = 0;
	void *addr;

	memset(used_hp, 0, sizeof(used_hp));

	/* get pointer to global configuration */
	mcfg = rte_eal_get_configuration()->mem_config;

	/* for debug purposes, hugetlbfs can be disabled */
	if (internal_config.no_hugetlbfs) {
		addr = malloc(internal_config.memory);
		mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
		mcfg->memseg[0].addr = addr;
		mcfg->memseg[0].len = internal_config.memory;
		mcfg->memseg[0].socket_id = 0;
		return 0;
	}


	/* calculate total number of hugepages available. at this point we haven't
	 * yet started sorting them so they all are on socket 0 */
	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
		/* meanwhile, also initialize used_hp hugepage sizes in used_hp */
		used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;

		total_pages += internal_config.hugepage_info[i].num_pages[0];
	}

	/*
	 * allocate a memory area for hugepage table.
	 * this isn't shared memory yet. due to the fact that we need some
	 * processing done on these pages, shared memory will be created
	 * at a later stage.
	 */
	tmp_hp = malloc(total_pages * sizeof(struct hugepage));
	if (tmp_hp == NULL)
		goto fail;

	memset(tmp_hp, 0, total_pages * sizeof(struct hugepage));

	hp_offset = 0; /* where we start the current page size entries */

	/* map all hugepages and sort them */
	for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
		struct hugepage_info *hpi;

		/*
		 * we don't yet mark hugepages as used at this stage, so
		 * we just map all hugepages available to the system
		 * all hugepages are still located on socket 0
		 */
		hpi = &internal_config.hugepage_info[i];

		if (hpi->num_pages == 0)
			continue;

		/* map all hugepages available */
		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){
			RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n",
					(unsigned)(hpi->hugepage_sz / 0x100000));
			goto fail;
		}

		/* find physical addresses and sockets for each hugepage */
		if (find_physaddr(&tmp_hp[hp_offset], hpi) < 0){
			RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
					(unsigned)(hpi->hugepage_sz / 0x100000));
			goto fail;
		}

		if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
			RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
					(unsigned)(hpi->hugepage_sz / 0x100000));
			goto fail;
		}

		if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0)
			goto fail;

		/* remap all hugepages */
		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){
			RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
					(unsigned)(hpi->hugepage_sz / 0x100000));
			goto fail;
		}

		/* unmap original mappings */
		if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0)
			goto fail;

		/* we have processed a num of hugepages of this size, so inc offset */
		hp_offset += hpi->num_pages[0];
	}

	/* clean out the numbers of pages */
	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)
		for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
			internal_config.hugepage_info[i].num_pages[j] = 0;

	/* get hugepages for each socket */
	for (i = 0; i < total_pages; i++) {
		int socket = tmp_hp[i].socket_id;

		/* find a hugepage info with right size and increment num_pages */
		for (j = 0; j < (int) internal_config.num_hugepage_sizes; j++) {
			if (tmp_hp[i].size ==
					internal_config.hugepage_info[j].hugepage_sz) {
				internal_config.hugepage_info[j].num_pages[socket]++;
			}
		}
	}

	/* make a copy of socket_mem, needed for number of pages calculation */
	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
		memory[i] = internal_config.socket_mem[i];

	/* calculate final number of pages */
	nrpages = calc_num_pages_per_socket(memory,
			internal_config.hugepage_info, used_hp,
			internal_config.num_hugepage_sizes);

	/* error if not enough memory available */
	if (nrpages < 0)
		goto fail;

	/* reporting in! */
	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
		for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
			if (used_hp[i].num_pages[j] > 0) {
				RTE_LOG(INFO, EAL,
						"Requesting %u pages of size %uMB"
						" from socket %i\n",
						used_hp[i].num_pages[j],
						(unsigned)
							(used_hp[i].hugepage_sz / 0x100000),
						j);
			}
		}
	}

	/* create shared memory */
	hugepage = create_shared_memory(eal_hugepage_info_path(),
					nrpages * sizeof(struct hugepage));

	if (hugepage == NULL) {
		RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
		goto fail;
	}

	/*
	 * unmap pages that we won't need (looks at used_hp).
	 * also, sets final_va to NULL on pages that were unmapped.
	 */
	if (unmap_unneeded_hugepages(tmp_hp, used_hp,
			internal_config.num_hugepage_sizes) < 0) {
		RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");
		goto fail;
	}

	/*
	 * copy stuff from malloc'd hugepage* to the actual shared memory.
	 * this procedure only copies those hugepages that have final_va
	 * not NULL. has overflow protection.
	 */
	if (copy_hugepages_to_shared_mem(hugepage, nrpages,
			tmp_hp, total_pages) < 0) {
		RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
		goto fail;
	}

	/* free the temporary hugepage table */
	free(tmp_hp);
	tmp_hp = NULL;

	memset(mcfg->memseg, 0, sizeof(mcfg->memseg));
	j = -1;
	for (i = 0; i < nrpages; i++) {
		new_memseg = 0;

		/* if this is a new section, create a new memseg */
		if (i == 0)
			new_memseg = 1;
		else if (hugepage[i].socket_id != hugepage[i-1].socket_id)
			new_memseg = 1;
		else if (hugepage[i].size != hugepage[i-1].size)
			new_memseg = 1;
		else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) !=
		    hugepage[i].size)
			new_memseg = 1;
		else if (((unsigned long)hugepage[i].final_va -
		    (unsigned long)hugepage[i-1].final_va) != hugepage[i].size)
			new_memseg = 1;

		if (new_memseg) {
			j += 1;
			if (j == RTE_MAX_MEMSEG)
				break;

			mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
			mcfg->memseg[j].addr = hugepage[i].final_va;
			mcfg->memseg[j].len = hugepage[i].size;
			mcfg->memseg[j].socket_id = hugepage[i].socket_id;
			mcfg->memseg[j].hugepage_sz = hugepage[i].size;
		}
		/* continuation of previous memseg */
		else {
			mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
		}
		hugepage[i].memseg_id = j;
	}

	if (i < nrpages) {
		RTE_LOG(ERR, EAL, "Can only reserve %d pages "
			"from %d requested\n"
			"Current %s=%d is not enough\n"
			"Please either increase it or request less amount "
			"of memory.\n",
			i, nrpages, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
			RTE_MAX_MEMSEG);
		return (-ENOMEM);
	}
	

	return 0;


fail:
	if (tmp_hp)
		free(tmp_hp);
	return -1;
}
Esempio n. 27
0
static int
rte_table_lpm_ipv6_entry_add(
	void *table,
	void *key,
	void *entry,
	int *key_found,
	void **entry_ptr)
{
	struct rte_table_lpm_ipv6 *lpm = table;
	struct rte_table_lpm_ipv6_key *ip_prefix =
		key;
	uint32_t nht_pos, nht_pos0, nht_pos0_valid;
	int status;

	/* Check input parameters */
	if (lpm == NULL) {
		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
		return -EINVAL;
	}
	if (ip_prefix == NULL) {
		RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n",
			__func__);
		return -EINVAL;
	}
	if (entry == NULL) {
		RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
		return -EINVAL;
	}

	if ((ip_prefix->depth == 0) || (ip_prefix->depth > 128)) {
		RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", __func__,
			ip_prefix->depth);
		return -EINVAL;
	}

	/* Check if rule is already present in the table */
	status = rte_lpm6_is_rule_present(lpm->lpm, ip_prefix->ip,
		ip_prefix->depth, &nht_pos0);
	nht_pos0_valid = status > 0;

	/* Find existing or free NHT entry */
	if (nht_find_existing(lpm, entry, &nht_pos) == 0) {
		uint8_t *nht_entry;

		if (nht_find_free(lpm, &nht_pos) == 0) {
			RTE_LOG(ERR, TABLE, "%s: NHT full\n", __func__);
			return -1;
		}

		nht_entry = &lpm->nht[nht_pos * lpm->entry_size];
		memcpy(nht_entry, entry, lpm->entry_size);
	}

	/* Add rule to low level LPM table */
	if (rte_lpm6_add(lpm->lpm, ip_prefix->ip, ip_prefix->depth,
		nht_pos) < 0) {
		RTE_LOG(ERR, TABLE, "%s: LPM IPv6 rule add failed\n", __func__);
		return -1;
	}

	/* Commit NHT changes */
	lpm->nht_users[nht_pos]++;
	lpm->nht_users[nht_pos0] -= nht_pos0_valid;

	*key_found = nht_pos0_valid;
	*entry_ptr = (void *) &lpm->nht[nht_pos * lpm->entry_size];
	return 0;
}
Esempio n. 28
0
/*
 * This creates the memory mappings in the secondary process to match that of
 * the server process. It goes through each memory segment in the DPDK runtime
 * configuration and finds the hugepages which form that segment, mapping them
 * in order to form a contiguous block in the virtual memory space
 */
static int
rte_eal_hugepage_attach(void)
{
	const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
	const struct hugepage *hp = NULL;
	unsigned num_hp = 0;
	unsigned i, s = 0; /* s used to track the segment number */
	off_t size;
	int fd, fd_zero = -1, fd_hugepage = -1;

	if (aslr_enabled() > 0) {
		RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
				"(ASLR) is enabled in the kernel.\n");
		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory "
				"into secondary processes\n");
	}

	fd_zero = open("/dev/zero", O_RDONLY);
	if (fd_zero < 0) {
		RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
		goto error;
	}
	fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
	if (fd_hugepage < 0) {
		RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
		goto error;
	}

	/* map all segments into memory to make sure we get the addrs */
	for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
		void *base_addr;

		/*
		 * the first memory segment with len==0 is the one that
		 * follows the last valid segment.
		 */
		if (mcfg->memseg[s].len == 0)
			break;

		/*
		 * fdzero is mmapped to get a contiguous block of virtual
		 * addresses of the appropriate memseg size.
		 * use mmap to get identical addresses as the primary process.
		 */
		base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
				 PROT_READ, MAP_PRIVATE, fd_zero, 0);
		if (base_addr == MAP_FAILED ||
		    base_addr != mcfg->memseg[s].addr) {
			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
				"in /dev/zero to requested address [%p]\n",
				(unsigned long long)mcfg->memseg[s].len,
				mcfg->memseg[s].addr);
			if (aslr_enabled() > 0) {
				RTE_LOG(ERR, EAL, "It is recommended to "
					"disable ASLR in the kernel "
					"and retry running both primary "
					"and secondary processes\n");
			}
			goto error;
		}
	}

	size = getFileSize(fd_hugepage);
	hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
	if (hp == NULL) {
		RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
		goto error;
	}

	num_hp = size / sizeof(struct hugepage);
	RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp);

	s = 0;
	while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
		void *addr, *base_addr;
		uintptr_t offset = 0;

		/*
		 * free previously mapped memory so we can map the
		 * hugepages into the space
		 */
		base_addr = mcfg->memseg[s].addr;
		munmap(base_addr, mcfg->memseg[s].len);

		/* find the hugepages for this segment and map them
		 * we don't need to worry about order, as the server sorted the
		 * entries before it did the second mmap of them */
		for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){
			if (hp[i].memseg_id == (int)s){
				fd = open(hp[i].filepath, O_RDWR);
				if (fd < 0) {
					RTE_LOG(ERR, EAL, "Could not open %s\n",
						hp[i].filepath);
					goto error;
				}
				addr = mmap(RTE_PTR_ADD(base_addr, offset),
						hp[i].size, PROT_READ | PROT_WRITE,
						MAP_SHARED | MAP_FIXED, fd, 0);
				close(fd); /* close file both on success and on failure */
				if (addr == MAP_FAILED) {
					RTE_LOG(ERR, EAL, "Could not mmap %s\n",
						hp[i].filepath);
					goto error;
				}
				offset+=hp[i].size;
			}
		}
		RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
				(unsigned long long)mcfg->memseg[s].len);
		s++;
	}
	/* unmap the hugepage config file, since we are done using it */
	munmap((void *)(uintptr_t)hp, size);
	close(fd_zero);
	close(fd_hugepage);
	return 0;

error:
	if (fd_zero >= 0)
		close(fd_zero);
	if (fd_hugepage >= 0)
		close(fd_hugepage);
	return -1;
}
Esempio n. 29
0
/* Setup ethdev hardware queues */
static int
dpdk_ethdev_queues_setup(struct vr_dpdk_ethdev *ethdev)
{
    int ret, i;
    uint8_t port_id = ethdev->ethdev_port_id;
    struct rte_mempool *mempool;

    /* configure RX queues */
    RTE_LOG(DEBUG, VROUTER, "%s: nb_rx_queues=%u nb_tx_queues=%u\n",
        __func__, (unsigned)ethdev->ethdev_nb_rx_queues,
            (unsigned)ethdev->ethdev_nb_tx_queues);

    for (i = 0; i < VR_DPDK_MAX_NB_RX_QUEUES; i++) {
        if (i < ethdev->ethdev_nb_rss_queues) {
            mempool = vr_dpdk.rss_mempool;
            ethdev->ethdev_queue_states[i] = VR_DPDK_QUEUE_RSS_STATE;
        } else if (i < ethdev->ethdev_nb_rx_queues) {
            if (vr_dpdk.nb_free_mempools == 0) {
                RTE_LOG(ERR, VROUTER, "    error assigning mempool to eth device %"
                    PRIu8 " RX queue %d\n", port_id, i);
                return -ENOMEM;
            }
            vr_dpdk.nb_free_mempools--;
            mempool = vr_dpdk.free_mempools[vr_dpdk.nb_free_mempools];
            ethdev->ethdev_queue_states[i] = VR_DPDK_QUEUE_READY_STATE;
        } else {
            ethdev->ethdev_queue_states[i] = VR_DPDK_QUEUE_NONE;
            continue;
        }

        ret = rte_eth_rx_queue_setup(port_id, i, VR_DPDK_NB_RXD,
            SOCKET_ID_ANY, &rx_queue_conf, mempool);
        if (ret < 0) {
            /* return mempool to the list */
            if (mempool != vr_dpdk.rss_mempool)
                vr_dpdk.nb_free_mempools++;
            RTE_LOG(ERR, VROUTER, "    error setting up eth device %" PRIu8 " RX queue %d"
                    ": %s (%d)\n", port_id, i, rte_strerror(-ret), -ret);
            return ret;
        }
        /* map RX queue to stats counter ignoring any errors */
        rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);

        /* save queue mempool pointer */
        ethdev->ethdev_mempools[i] = mempool;
    }
    i = ethdev->ethdev_nb_rx_queues - ethdev->ethdev_nb_rss_queues;
    RTE_LOG(INFO, VROUTER, "    setup %d RSS queue(s) and %d filtering queue(s)\n",
        (int)ethdev->ethdev_nb_rss_queues, i);

    /* configure TX queues */
    for (i = 0; i < ethdev->ethdev_nb_tx_queues; i++) {
        ret = rte_eth_tx_queue_setup(port_id, i, VR_DPDK_NB_TXD,
            SOCKET_ID_ANY, &tx_queue_conf);
        if (ret < 0) {
            RTE_LOG(ERR, VROUTER, "    error setting up eth device %" PRIu8 " TX queue %d"
                    ": %s (%d)\n", port_id, i, rte_strerror(-ret), -ret);
            return ret;
        }
        /* map TX queue to stats counter ignoring any errors */
        rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
    }
    return 0;
}
Esempio n. 30
0
static int
cperf_initialize_cryptodev(struct cperf_options *opts, uint8_t *enabled_cdevs,
			struct rte_mempool *session_pool_socket[])
{
	uint8_t enabled_cdev_count = 0, nb_lcores, cdev_id;
	unsigned int i, j;
	int ret;

	enabled_cdev_count = rte_cryptodev_devices_get(opts->device_type,
			enabled_cdevs, RTE_CRYPTO_MAX_DEVS);
	if (enabled_cdev_count == 0) {
		printf("No crypto devices type %s available\n",
				opts->device_type);
		return -EINVAL;
	}

	nb_lcores = rte_lcore_count() - 1;

	if (nb_lcores < 1) {
		RTE_LOG(ERR, USER1,
			"Number of enabled cores need to be higher than 1\n");
		return -EINVAL;
	}

	/*
	 * Use less number of devices,
	 * if there are more available than cores.
	 */
	if (enabled_cdev_count > nb_lcores)
		enabled_cdev_count = nb_lcores;

	/* Create a mempool shared by all the devices */
	uint32_t max_sess_size = 0, sess_size;

	for (cdev_id = 0; cdev_id < rte_cryptodev_count(); cdev_id++) {
		sess_size = rte_cryptodev_get_private_session_size(cdev_id);
		if (sess_size > max_sess_size)
			max_sess_size = sess_size;
	}

	/*
	 * Calculate number of needed queue pairs, based on the amount
	 * of available number of logical cores and crypto devices.
	 * For instance, if there are 4 cores and 2 crypto devices,
	 * 2 queue pairs will be set up per device.
	 */
	opts->nb_qps = (nb_lcores % enabled_cdev_count) ?
				(nb_lcores / enabled_cdev_count) + 1 :
				nb_lcores / enabled_cdev_count;

	for (i = 0; i < enabled_cdev_count &&
			i < RTE_CRYPTO_MAX_DEVS; i++) {
		cdev_id = enabled_cdevs[i];
#ifdef RTE_LIBRTE_PMD_CRYPTO_SCHEDULER
		/*
		 * If multi-core scheduler is used, limit the number
		 * of queue pairs to 1, as there is no way to know
		 * how many cores are being used by the PMD, and
		 * how many will be available for the application.
		 */
		if (!strcmp((const char *)opts->device_type, "crypto_scheduler") &&
				rte_cryptodev_scheduler_mode_get(cdev_id) ==
				CDEV_SCHED_MODE_MULTICORE)
			opts->nb_qps = 1;
#endif

		struct rte_cryptodev_info cdev_info;
		uint8_t socket_id = rte_cryptodev_socket_id(cdev_id);

		rte_cryptodev_info_get(cdev_id, &cdev_info);
		if (opts->nb_qps > cdev_info.max_nb_queue_pairs) {
			printf("Number of needed queue pairs is higher "
				"than the maximum number of queue pairs "
				"per device.\n");
			printf("Lower the number of cores or increase "
				"the number of crypto devices\n");
			return -EINVAL;
		}
		struct rte_cryptodev_config conf = {
			.nb_queue_pairs = opts->nb_qps,
			.socket_id = socket_id
		};

		struct rte_cryptodev_qp_conf qp_conf = {
			.nb_descriptors = opts->nb_descriptors
		};

		if (session_pool_socket[socket_id] == NULL) {
			char mp_name[RTE_MEMPOOL_NAMESIZE];
			struct rte_mempool *sess_mp;

			snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
				"sess_mp_%u", socket_id);

			sess_mp = rte_mempool_create(mp_name,
						NUM_SESSIONS,
						max_sess_size,
						SESS_MEMPOOL_CACHE_SIZE,
						0, NULL, NULL, NULL,
						NULL, socket_id,
						0);

			if (sess_mp == NULL) {
				printf("Cannot create session pool on socket %d\n",
					socket_id);
				return -ENOMEM;
			}

			printf("Allocated session pool on socket %d\n", socket_id);
			session_pool_socket[socket_id] = sess_mp;
		}

		ret = rte_cryptodev_configure(cdev_id, &conf);
		if (ret < 0) {
			printf("Failed to configure cryptodev %u", cdev_id);
			return -EINVAL;
		}

		for (j = 0; j < opts->nb_qps; j++) {
			ret = rte_cryptodev_queue_pair_setup(cdev_id, j,
				&qp_conf, socket_id,
				session_pool_socket[socket_id]);
			if (ret < 0) {
				printf("Failed to setup queue pair %u on "
					"cryptodev %u",	j, cdev_id);
				return -EINVAL;
			}
		}

		ret = rte_cryptodev_start(cdev_id);
		if (ret < 0) {
			printf("Failed to start device %u: error %d\n",
					cdev_id, ret);
			return -EPERM;
		}
	}

	return enabled_cdev_count;
}

static int
cperf_verify_devices_capabilities(struct cperf_options *opts,
		uint8_t *enabled_cdevs, uint8_t nb_cryptodevs)
{
	struct rte_cryptodev_sym_capability_idx cap_idx;
	const struct rte_cryptodev_symmetric_capability *capability;

	uint8_t i, cdev_id;
	int ret;

	for (i = 0; i < nb_cryptodevs; i++) {

		cdev_id = enabled_cdevs[i];

		if (opts->op_type == CPERF_AUTH_ONLY ||
				opts->op_type == CPERF_CIPHER_THEN_AUTH ||
				opts->op_type == CPERF_AUTH_THEN_CIPHER) {

			cap_idx.type = RTE_CRYPTO_SYM_XFORM_AUTH;
			cap_idx.algo.auth = opts->auth_algo;

			capability = rte_cryptodev_sym_capability_get(cdev_id,
					&cap_idx);
			if (capability == NULL)
				return -1;

			ret = rte_cryptodev_sym_capability_check_auth(
					capability,
					opts->auth_key_sz,
					opts->digest_sz,
					opts->auth_iv_sz);
			if (ret != 0)
				return ret;
		}

		if (opts->op_type == CPERF_CIPHER_ONLY ||
				opts->op_type == CPERF_CIPHER_THEN_AUTH ||
				opts->op_type == CPERF_AUTH_THEN_CIPHER) {

			cap_idx.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
			cap_idx.algo.cipher = opts->cipher_algo;

			capability = rte_cryptodev_sym_capability_get(cdev_id,
					&cap_idx);
			if (capability == NULL)
				return -1;

			ret = rte_cryptodev_sym_capability_check_cipher(
					capability,
					opts->cipher_key_sz,
					opts->cipher_iv_sz);
			if (ret != 0)
				return ret;
		}

		if (opts->op_type == CPERF_AEAD) {

			cap_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD;
			cap_idx.algo.aead = opts->aead_algo;

			capability = rte_cryptodev_sym_capability_get(cdev_id,
					&cap_idx);
			if (capability == NULL)
				return -1;

			ret = rte_cryptodev_sym_capability_check_aead(
					capability,
					opts->aead_key_sz,
					opts->digest_sz,
					opts->aead_aad_sz,
					opts->aead_iv_sz);
			if (ret != 0)
				return ret;
		}
	}

	return 0;
}

static int
cperf_check_test_vector(struct cperf_options *opts,
		struct cperf_test_vector *test_vec)
{
	if (opts->op_type == CPERF_CIPHER_ONLY) {
		if (opts->cipher_algo == RTE_CRYPTO_CIPHER_NULL) {
			if (test_vec->plaintext.data == NULL)
				return -1;
		} else if (opts->cipher_algo != RTE_CRYPTO_CIPHER_NULL) {
			if (test_vec->plaintext.data == NULL)
				return -1;
			if (test_vec->plaintext.length < opts->max_buffer_size)
				return -1;
			if (test_vec->ciphertext.data == NULL)
				return -1;
			if (test_vec->ciphertext.length < opts->max_buffer_size)
				return -1;
			if (test_vec->cipher_iv.data == NULL)
				return -1;
			if (test_vec->cipher_iv.length != opts->cipher_iv_sz)
				return -1;
			if (test_vec->cipher_key.data == NULL)
				return -1;
			if (test_vec->cipher_key.length != opts->cipher_key_sz)
				return -1;
		}
	} else if (opts->op_type == CPERF_AUTH_ONLY) {
		if (opts->auth_algo != RTE_CRYPTO_AUTH_NULL) {
			if (test_vec->plaintext.data == NULL)
				return -1;
			if (test_vec->plaintext.length < opts->max_buffer_size)
				return -1;
			if (test_vec->auth_key.data == NULL)
				return -1;
			if (test_vec->auth_key.length != opts->auth_key_sz)
				return -1;
			if (test_vec->auth_iv.length != opts->auth_iv_sz)
				return -1;
			/* Auth IV is only required for some algorithms */
			if (opts->auth_iv_sz && test_vec->auth_iv.data == NULL)
				return -1;
			if (test_vec->digest.data == NULL)
				return -1;
			if (test_vec->digest.length < opts->digest_sz)
				return -1;
		}

	} else if (opts->op_type == CPERF_CIPHER_THEN_AUTH ||
			opts->op_type == CPERF_AUTH_THEN_CIPHER) {
		if (opts->cipher_algo == RTE_CRYPTO_CIPHER_NULL) {
			if (test_vec->plaintext.data == NULL)
				return -1;
			if (test_vec->plaintext.length < opts->max_buffer_size)
				return -1;
		} else if (opts->cipher_algo != RTE_CRYPTO_CIPHER_NULL) {
			if (test_vec->plaintext.data == NULL)
				return -1;
			if (test_vec->plaintext.length < opts->max_buffer_size)
				return -1;
			if (test_vec->ciphertext.data == NULL)
				return -1;
			if (test_vec->ciphertext.length < opts->max_buffer_size)
				return -1;
			if (test_vec->cipher_iv.data == NULL)
				return -1;
			if (test_vec->cipher_iv.length != opts->cipher_iv_sz)
				return -1;
			if (test_vec->cipher_key.data == NULL)
				return -1;
			if (test_vec->cipher_key.length != opts->cipher_key_sz)
				return -1;
		}
		if (opts->auth_algo != RTE_CRYPTO_AUTH_NULL) {
			if (test_vec->auth_key.data == NULL)
				return -1;
			if (test_vec->auth_key.length != opts->auth_key_sz)
				return -1;
			if (test_vec->auth_iv.length != opts->auth_iv_sz)
				return -1;
			/* Auth IV is only required for some algorithms */
			if (opts->auth_iv_sz && test_vec->auth_iv.data == NULL)
				return -1;
			if (test_vec->digest.data == NULL)
				return -1;
			if (test_vec->digest.length < opts->digest_sz)
				return -1;
		}
	} else if (opts->op_type == CPERF_AEAD) {
		if (test_vec->plaintext.data == NULL)
			return -1;
		if (test_vec->plaintext.length < opts->max_buffer_size)
			return -1;
		if (test_vec->ciphertext.data == NULL)
			return -1;
		if (test_vec->ciphertext.length < opts->max_buffer_size)
			return -1;
		if (test_vec->aead_iv.data == NULL)
			return -1;
		if (test_vec->aead_iv.length != opts->aead_iv_sz)
			return -1;
		if (test_vec->aad.data == NULL)
			return -1;
		if (test_vec->aad.length != opts->aead_aad_sz)
			return -1;
		if (test_vec->digest.data == NULL)
			return -1;
		if (test_vec->digest.length < opts->digest_sz)
			return -1;
	}
	return 0;
}

int
main(int argc, char **argv)
{
	struct cperf_options opts = {0};
	struct cperf_test_vector *t_vec = NULL;
	struct cperf_op_fns op_fns;

	void *ctx[RTE_MAX_LCORE] = { };
	struct rte_mempool *session_pool_socket[RTE_MAX_NUMA_NODES] = { 0 };

	int nb_cryptodevs = 0;
	uint16_t total_nb_qps = 0;
	uint8_t cdev_id, i;
	uint8_t enabled_cdevs[RTE_CRYPTO_MAX_DEVS] = { 0 };

	uint8_t buffer_size_idx = 0;

	int ret;
	uint32_t lcore_id;

	/* Initialise DPDK EAL */
	ret = rte_eal_init(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Invalid EAL arguments!\n");
	argc -= ret;
	argv += ret;

	cperf_options_default(&opts);

	ret = cperf_options_parse(&opts, argc, argv);
	if (ret) {
		RTE_LOG(ERR, USER1, "Parsing on or more user options failed\n");
		goto err;
	}

	ret = cperf_options_check(&opts);
	if (ret) {
		RTE_LOG(ERR, USER1,
				"Checking on or more user options failed\n");
		goto err;
	}

	nb_cryptodevs = cperf_initialize_cryptodev(&opts, enabled_cdevs,
			session_pool_socket);

	if (!opts.silent)
		cperf_options_dump(&opts);

	if (nb_cryptodevs < 1) {
		RTE_LOG(ERR, USER1, "Failed to initialise requested crypto "
				"device type\n");
		nb_cryptodevs = 0;
		goto err;
	}

	ret = cperf_verify_devices_capabilities(&opts, enabled_cdevs,
			nb_cryptodevs);
	if (ret) {
		RTE_LOG(ERR, USER1, "Crypto device type does not support "
				"capabilities requested\n");
		goto err;
	}

	if (opts.test_file != NULL) {
		t_vec = cperf_test_vector_get_from_file(&opts);
		if (t_vec == NULL) {
			RTE_LOG(ERR, USER1,
					"Failed to create test vector for"
					" specified file\n");
			goto err;
		}

		if (cperf_check_test_vector(&opts, t_vec)) {
			RTE_LOG(ERR, USER1, "Incomplete necessary test vectors"
					"\n");
			goto err;
		}
	} else {
		t_vec = cperf_test_vector_get_dummy(&opts);
		if (t_vec == NULL) {
			RTE_LOG(ERR, USER1,
					"Failed to create test vector for"
					" specified algorithms\n");
			goto err;
		}
	}

	ret = cperf_get_op_functions(&opts, &op_fns);
	if (ret) {
		RTE_LOG(ERR, USER1, "Failed to find function ops set for "
				"specified algorithms combination\n");
		goto err;
	}

	if (!opts.silent)
		show_test_vector(t_vec);

	total_nb_qps = nb_cryptodevs * opts.nb_qps;

	i = 0;
	uint8_t qp_id = 0, cdev_index = 0;
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {

		if (i == total_nb_qps)
			break;

		cdev_id = enabled_cdevs[cdev_index];

		uint8_t socket_id = rte_cryptodev_socket_id(cdev_id);

		ctx[i] = cperf_testmap[opts.test].constructor(
				session_pool_socket[socket_id], cdev_id, qp_id,
				&opts, t_vec, &op_fns);
		if (ctx[i] == NULL) {
			RTE_LOG(ERR, USER1, "Test run constructor failed\n");
			goto err;
		}
		qp_id = (qp_id + 1) % opts.nb_qps;
		if (qp_id == 0)
			cdev_index++;
		i++;
	}

	/* Get first size from range or list */
	if (opts.inc_buffer_size != 0)
		opts.test_buffer_size = opts.min_buffer_size;
	else
		opts.test_buffer_size = opts.buffer_size_list[0];

	while (opts.test_buffer_size <= opts.max_buffer_size) {
		i = 0;
		RTE_LCORE_FOREACH_SLAVE(lcore_id) {

			if (i == total_nb_qps)
				break;

			rte_eal_remote_launch(cperf_testmap[opts.test].runner,
				ctx[i], lcore_id);
			i++;
		}
		i = 0;
		RTE_LCORE_FOREACH_SLAVE(lcore_id) {

			if (i == total_nb_qps)
				break;
			rte_eal_wait_lcore(lcore_id);
			i++;
		}

		/* Get next size from range or list */
		if (opts.inc_buffer_size != 0)
			opts.test_buffer_size += opts.inc_buffer_size;
		else {
			if (++buffer_size_idx == opts.buffer_size_count)
				break;
			opts.test_buffer_size = opts.buffer_size_list[buffer_size_idx];
		}
	}

	i = 0;
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {

		if (i == total_nb_qps)
			break;

		cperf_testmap[opts.test].destructor(ctx[i]);
		i++;
	}

	for (i = 0; i < nb_cryptodevs &&
			i < RTE_CRYPTO_MAX_DEVS; i++)
		rte_cryptodev_stop(enabled_cdevs[i]);

	free_test_vector(t_vec, &opts);

	printf("\n");
	return EXIT_SUCCESS;

err:
	i = 0;
	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
		if (i == total_nb_qps)
			break;

		cdev_id = enabled_cdevs[i];

		if (ctx[i] && cperf_testmap[opts.test].destructor)
			cperf_testmap[opts.test].destructor(ctx[i]);
		i++;
	}

	for (i = 0; i < nb_cryptodevs &&
			i < RTE_CRYPTO_MAX_DEVS; i++)
		rte_cryptodev_stop(enabled_cdevs[i]);

	free_test_vector(t_vec, &opts);

	printf("\n");
	return EXIT_FAILURE;
}