Example #1
0
/*
 * joing two struct malloc_elem together. elem1 and elem2 must
 * be contiguous in memory.
 */
static inline void
join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
{
	struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
	elem1->size += elem2->size;
	next->prev = elem1;
}
Example #2
0
int
inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx)
{
	struct ipsec_mbuf_metadata *priv;

	priv = RTE_PTR_ADD(m, sizeof(struct rte_mbuf));

	return (sa_ctx->sa[sa_idx].spi == priv->sa->spi);
}
Example #3
0
/*
 * split an existing element into two smaller elements at the given
 * split_pt parameter.
 */
static void
split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
{
	struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
	const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
	const size_t new_elem_size = elem->size - old_elem_size;

	malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
	split_pt->prev = elem;
	next_elem->prev = split_pt;
	elem->size = old_elem_size;
	set_trailer(elem);
}
Example #4
0
/*
 * attempt to resize a malloc_elem by expanding into any free space
 * immediately after it in memory.
 */
int
malloc_elem_resize(struct malloc_elem *elem, size_t size)
{
	const size_t new_size = size + MALLOC_ELEM_OVERHEAD;
	/* if we request a smaller size, then always return ok */
	const size_t current_size = elem->size - elem->pad;
	if (current_size >= new_size)
		return 0;

	struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
	rte_spinlock_lock(&elem->heap->lock);
	if (next ->state != ELEM_FREE)
		goto err_return;
	if (current_size + next->size < new_size)
		goto err_return;

	/* we now know the element fits, so remove from free list,
	 * join the two
	 */
	elem_free_list_remove(next);
	join_elem(elem, next);

	if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){
		/* now we have a big block together. Lets cut it down a bit, by splitting */
		struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
		split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
		split_elem(elem, split_pt);
		malloc_elem_free_list_insert(split_pt);
	}
	rte_spinlock_unlock(&elem->heap->lock);
	return 0;

err_return:
	rte_spinlock_unlock(&elem->heap->lock);
	return -1;
}
Example #5
0
/*
 * reserve a block of data in an existing malloc_elem. If the malloc_elem
 * is much larger than the data block requested, we split the element in two.
 * This function is only called from malloc_heap_alloc so parameter checking
 * is not done here, as it's done there previously.
 */
struct malloc_elem *
malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
		size_t bound)
{
	struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
	const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
	const size_t trailer_size = elem->size - old_elem_size - size -
		MALLOC_ELEM_OVERHEAD;

	elem_free_list_remove(elem);

	if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
		/* split it, too much free space after elem */
		struct malloc_elem *new_free_elem =
				RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);

		split_elem(elem, new_free_elem);
		malloc_elem_free_list_insert(new_free_elem);
	}

	if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
		/* don't split it, pad the element instead */
		elem->state = ELEM_BUSY;
		elem->pad = old_elem_size;

		/* put a dummy header in padding, to point to real element header */
		if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything
		                     * is cache-line aligned */
			new_elem->pad = elem->pad;
			new_elem->state = ELEM_PAD;
			new_elem->size = elem->size - elem->pad;
			set_header(new_elem);
		}

		return new_elem;
	}

	/* we are going to split the element in two. The original element
	 * remains free, and the new element is the one allocated.
	 * Re-insert original element, in case its new size makes it
	 * belong on a different list.
	 */
	split_elem(elem, new_elem);
	new_elem->state = ELEM_BUSY;
	malloc_elem_free_list_insert(elem);

	return new_elem;
}
Example #6
0
/*
 * Expand the heap with a memseg.
 * This reserves the zone and sets a dummy malloc_elem header at the end
 * to prevent overflow. The rest of the zone is added to free list as a single
 * large free block
 */
static void
malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
{
	/* allocate the memory block headers, one at end, one at start */
	struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
	struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
			ms->len - MALLOC_ELEM_OVERHEAD);
	end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
	const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;

	malloc_elem_init(start_elem, heap, ms, elem_size);
	malloc_elem_mkend(end_elem, start_elem);
	malloc_elem_free_list_insert(start_elem);

	heap->total_size += elem_size;
}
Example #7
0
/*
 * reserve an extra memory zone and make it available for use by a particular
 * heap. This reserves the zone and sets a dummy malloc_elem header at the end
 * to prevent overflow. The rest of the zone is added to free list as a single
 * large free block
 */
static int
malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align)
{
	const unsigned mz_flags = 0;
	const size_t block_size = get_malloc_memzone_size();
	/* ensure the data we want to allocate will fit in the memzone */
	const size_t min_size = size + align + MALLOC_ELEM_OVERHEAD * 2;
	const struct rte_memzone *mz = NULL;
	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
	unsigned numa_socket = heap - mcfg->malloc_heaps;

	size_t mz_size = min_size;
	if (mz_size < block_size)
		mz_size = block_size;

	char mz_name[RTE_MEMZONE_NAMESIZE];
	snprintf(mz_name, sizeof(mz_name), "MALLOC_S%u_HEAP_%u",
		     numa_socket, heap->mz_count++);

	/* try getting a block. if we fail and we don't need as big a block
	 * as given in the config, we can shrink our request and try again
	 */
	do {
		mz = rte_memzone_reserve(mz_name, mz_size, numa_socket,
					 mz_flags);
		if (mz == NULL)
			mz_size /= 2;
	} while (mz == NULL && mz_size > min_size);
	if (mz == NULL)
		return -1;

	/* allocate the memory block headers, one at end, one at start */
	struct malloc_elem *start_elem = (struct malloc_elem *)mz->addr;
	struct malloc_elem *end_elem = RTE_PTR_ADD(mz->addr,
			mz_size - MALLOC_ELEM_OVERHEAD);
	end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);

	const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
	malloc_elem_init(start_elem, heap, mz, elem_size);
	malloc_elem_mkend(end_elem, start_elem);
	malloc_elem_free_list_insert(start_elem);

	/* increase heap total size by size of new memzone */
	heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD;
	return 0;
}
Example #8
0
static struct rte_memseg *
virt2memseg(const void *addr, const struct rte_memseg_list *msl)
{
	const struct rte_fbarray *arr;
	void *start, *end;
	int ms_idx;

	if (msl == NULL)
		return NULL;

	/* a memseg list was specified, check if it's the right one */
	start = msl->base_va;
	end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);

	if (addr < start || addr >= end)
		return NULL;

	/* now, calculate index */
	arr = &msl->memseg_arr;
	ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
	return rte_fbarray_get(arr, ms_idx);
}
Example #9
0
static struct rte_memseg_list *
virt2memseg_list(const void *addr)
{
	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
	struct rte_memseg_list *msl;
	int msl_idx;

	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
		void *start, *end;
		msl = &mcfg->memsegs[msl_idx];

		start = msl->base_va;
		end = RTE_PTR_ADD(start,
				(size_t)msl->page_sz * msl->memseg_arr.len);
		if (addr >= start && addr < end)
			break;
	}
	/* if we didn't find our memseg list */
	if (msl_idx == RTE_MAX_MEMSEG_LISTS)
		return NULL;
	return msl;
}
Example #10
0
/*
 * unmaps hugepages that are not going to be used. since we originally allocate
 * ALL hugepages (not just those we need), additional unmapping needs to be done.
 */
static int
unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
		struct hugepage_info *hpi,
		unsigned num_hp_info)
{
	unsigned socket, size;
	int page, nrpages = 0;

	/* get total number of hugepages */
	for (size = 0; size < num_hp_info; size++)
		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
			nrpages += internal_config.hugepage_info[size].num_pages[socket];

	for (size = 0; size < num_hp_info; size++) {
		for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
			unsigned pages_found = 0;

			/* traverse until we have unmapped all the unused pages */
			for (page = 0; page < nrpages; page++) {
				struct hugepage_file *hp = &hugepg_tbl[page];

#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
				/* if this page was already cleared */
				if (hp->final_va == NULL)
					continue;
#endif

				/* find a page that matches the criteria */
				if ((hp->size == hpi[size].hugepage_sz) &&
						(hp->socket_id == (int) socket)) {

					/* if we skipped enough pages, unmap the rest */
					if (pages_found == hpi[size].num_pages[socket]) {
						uint64_t unmap_len;

#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
						unmap_len = hp->size * hp->repeated;
#else
						unmap_len = hp->size;
#endif

						/* get start addr and len of the remaining segment */
						munmap(hp->final_va, (size_t) unmap_len);

						hp->final_va = NULL;
						if (unlink(hp->filepath) == -1) {
							RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
									__func__, hp->filepath, strerror(errno));
							return -1;
						}
					}
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
					/* else, check how much do we need to map */
					else {
						int nr_pg_left =
								hpi[size].num_pages[socket] - pages_found;

						/* if we need enough memory to fit into the segment */
						if (hp->repeated <= nr_pg_left) {
							pages_found += hp->repeated;
						}
						/* truncate the segment */
						else {
							uint64_t final_size = nr_pg_left * hp->size;
							uint64_t seg_size = hp->repeated * hp->size;

							void * unmap_va = RTE_PTR_ADD(hp->final_va,
									final_size);
							int fd;

							munmap(unmap_va, seg_size - final_size);

							fd = open(hp->filepath, O_RDWR);
							if (fd < 0) {
								RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
										hp->filepath, strerror(errno));
								return -1;
							}
							if (ftruncate(fd, final_size) < 0) {
								RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
										hp->filepath, strerror(errno));
								return -1;
							}
							close(fd);

							pages_found += nr_pg_left;
							hp->repeated = nr_pg_left;
						}
					}
#else
					/* else, lock the page and skip */
					else
						pages_found++;
#endif

				} /* match page */
			} /* foreach page */
		} /* foreach socket */
	} /* foreach pagesize */
Example #11
0
/*
 * Remaps all hugepages into single file segments
 */
static int
remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
	int fd;
	unsigned i = 0, j, num_pages, page_idx = 0;
	void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
	size_t vma_len = 0;
	size_t hugepage_sz = hpi->hugepage_sz;
	size_t total_size, offset;
	char filepath[MAX_HUGEPAGE_PATH];
	phys_addr_t physaddr;
	int socket;

	while (i < hpi->num_pages[0]) {

#ifndef RTE_ARCH_64
		/* for 32-bit systems, don't remap 1G pages and 16G pages,
		 * just reuse original map address as final map address.
		 */
		if ((hugepage_sz == RTE_PGSIZE_1G)
			|| (hugepage_sz == RTE_PGSIZE_16G)) {
			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
			hugepg_tbl[i].orig_va = NULL;
			i++;
			continue;
		}
#endif

		/* reserve a virtual area for next contiguous
		 * physical block: count the number of
		 * contiguous physical pages. */
		for (j = i+1; j < hpi->num_pages[0] ; j++) {
#ifdef RTE_ARCH_PPC_64
			/* The physical addresses are sorted in descending
			 * order on PPC64 */
			if (hugepg_tbl[j].physaddr !=
				hugepg_tbl[j-1].physaddr - hugepage_sz)
				break;
#else
			if (hugepg_tbl[j].physaddr !=
				hugepg_tbl[j-1].physaddr + hugepage_sz)
				break;
#endif
		}
		num_pages = j - i;
		vma_len = num_pages * hugepage_sz;

		socket = hugepg_tbl[i].socket_id;

		/* get the biggest virtual memory area up to
		 * vma_len. If it fails, vma_addr is NULL, so
		 * let the kernel provide the address. */
		vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);

		/* If we can't find a big enough virtual area, work out how many pages
		 * we are going to get */
		if (vma_addr == NULL)
			j = i + 1;
		else if (vma_len != num_pages * hugepage_sz) {
			num_pages = vma_len / hugepage_sz;
			j = i + num_pages;

		}

		hugepg_tbl[page_idx].file_id = page_idx;
		eal_get_hugefile_path(filepath,
				sizeof(filepath),
				hpi->hugedir,
				hugepg_tbl[page_idx].file_id);

		/* try to create hugepage file */
		fd = open(filepath, O_CREAT | O_RDWR, 0755);
		if (fd < 0) {
			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
			return -1;
		}

		total_size = 0;
		for (;i < j; i++) {

			/* unmap current segment */
			if (total_size > 0)
				munmap(vma_addr, total_size);

			/* unmap original page */
			munmap(hugepg_tbl[i].orig_va, hugepage_sz);
			unlink(hugepg_tbl[i].filepath);

			total_size += hugepage_sz;

			old_addr = vma_addr;

			/* map new, bigger segment */
			vma_addr = mmap(vma_addr, total_size,
					PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

			if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
				RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
				close(fd);
				return -1;
			}

			/* touch the page. this is needed because kernel postpones mapping
			 * creation until the first page fault. with this, we pin down
			 * the page and it is marked as used and gets into process' pagemap.
			 */
			for (offset = 0; offset < total_size; offset += hugepage_sz)
				*((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset));
		}

		/* set shared flock on the file. */
		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
				__func__, strerror(errno));
			close(fd);
			return -1;
		}

		snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
				filepath);

		physaddr = rte_mem_virt2phy(vma_addr);

		if (physaddr == RTE_BAD_PHYS_ADDR)
			return -1;

		hugepg_tbl[page_idx].final_va = vma_addr;

		hugepg_tbl[page_idx].physaddr = physaddr;

		hugepg_tbl[page_idx].repeated = num_pages;

		hugepg_tbl[page_idx].socket_id = socket;

		close(fd);

		/* verify the memory segment - that is, check that every VA corresponds
		 * to the physical address we expect to see
		 */
		for (offset = 0; offset < vma_len; offset += hugepage_sz) {
			uint64_t expected_physaddr;

			expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
			page_addr = RTE_PTR_ADD(vma_addr, offset);
			physaddr = rte_mem_virt2phy(page_addr);

			if (physaddr != expected_physaddr) {
				RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
						"at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
						" (expected 0x%" PRIx64 ")\n",
						page_addr, offset, physaddr, expected_physaddr);
				return -1;
			}
		}

		/* zero out the whole segment */
		memset(hugepg_tbl[page_idx].final_va, 0, total_size);

		page_idx++;
	}

	/* zero out the rest */
	memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
	return page_idx;
}
Example #12
0
/* Returns a pointer to the first signature in specified bucket. */
static inline hash_sig_t *
get_sig_tbl_bucket(const struct rte_hash *h, uint32_t bucket_index)
{
	return RTE_PTR_ADD(h->sig_tbl, (bucket_index *
					h->sig_tbl_bucket_size));
}
Example #13
0
/* Returns a pointer to a key at a specific position in a specified bucket. */
static inline void *
get_key_from_bucket(const struct rte_hash *h, uint8_t *bkt, uint32_t pos)
{
	return RTE_PTR_ADD(bkt, pos * h->key_tbl_key_size);
}
Example #14
0
/* Returns a pointer to the first key in specified bucket. */
static inline uint8_t *
get_key_tbl_bucket(const struct rte_hash *h, uint32_t bucket_index)
{
	return RTE_PTR_ADD(h->key_tbl, (bucket_index * h->bucket_entries *
					h->key_tbl_key_size));
}
Example #15
0
/*
 * Get physical address of any mapped virtual address in the current process.
 */
phys_addr_t
rte_mem_virt2phy(const void *virtaddr)
{
	int fd, retval;
	uint64_t page, physaddr;
	unsigned long virt_pfn;
	int page_size;
	off_t offset;

	/* when using dom0, /proc/self/pagemap always returns 0, check in
	 * dpdk memory by browsing the memsegs */
	if (rte_xen_dom0_supported()) {
		struct rte_mem_config *mcfg;
		struct rte_memseg *memseg;
		unsigned i;

		mcfg = rte_eal_get_configuration()->mem_config;
		for (i = 0; i < RTE_MAX_MEMSEG; i++) {
			memseg = &mcfg->memseg[i];
			if (memseg->addr == NULL)
				break;
			if (virtaddr > memseg->addr &&
					virtaddr < RTE_PTR_ADD(memseg->addr,
						memseg->len)) {
				return memseg->phys_addr +
					RTE_PTR_DIFF(virtaddr, memseg->addr);
			}
		}

		return RTE_BAD_PHYS_ADDR;
	}

	/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
	if (!proc_pagemap_readable)
		return RTE_BAD_PHYS_ADDR;

	/* standard page size */
	page_size = getpagesize();

	fd = open("/proc/self/pagemap", O_RDONLY);
	if (fd < 0) {
		RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
			__func__, strerror(errno));
		return RTE_BAD_PHYS_ADDR;
	}

	virt_pfn = (unsigned long)virtaddr / page_size;
	offset = sizeof(uint64_t) * virt_pfn;
	if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
		RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
				__func__, strerror(errno));
		close(fd);
		return RTE_BAD_PHYS_ADDR;
	}

	retval = read(fd, &page, PFN_MASK_SIZE);
	close(fd);
	if (retval < 0) {
		RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
				__func__, strerror(errno));
		return RTE_BAD_PHYS_ADDR;
	} else if (retval != PFN_MASK_SIZE) {
		RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap "
				"but expected %d:\n",
				__func__, retval, PFN_MASK_SIZE);
		return RTE_BAD_PHYS_ADDR;
	}

	/*
	 * the pfn (page frame number) are bits 0-54 (see
	 * pagemap.txt in linux Documentation)
	 */
	physaddr = ((page & 0x7fffffffffffffULL) * page_size)
		+ ((unsigned long)virtaddr % page_size);

	return physaddr;
}
static struct _mempool_gntalloc_info
_create_mempool(const char *name, unsigned elt_num, unsigned elt_size,
		   unsigned cache_size, unsigned private_data_size,
		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
		   rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
		   int socket_id, unsigned flags)
{
	struct _mempool_gntalloc_info mgi;
	struct rte_mempool *mp = NULL;
	struct rte_mempool_objsz  objsz;
	uint32_t pg_num, rpg_num, pg_shift, pg_sz;
	char *va, *orig_va, *uv; /* uv: from which, the pages could be freed */
	ssize_t sz, usz; /* usz: unused size */
	/*
	 * for each page allocated through xen_gntalloc driver,
	 * gref_arr:stores grant references,
	 * pa_arr: stores physical address,
	 * gnt_arr: stores all meta dat
	 */
	uint32_t *gref_arr = NULL;
	phys_addr_t *pa_arr = NULL;
	struct _gntarr *gnt_arr = NULL;
	/* start index of the grant referances, used for dealloc*/
	uint64_t start_index;
	uint32_t i, j;
	int rv = 0;
	struct ioctl_gntalloc_dealloc_gref arg;

	mgi.mp = NULL;
	va = orig_va = uv = NULL;
	pg_num = rpg_num = 0;
	sz = 0;

	pg_sz = getpagesize();
	if (rte_is_power_of_2(pg_sz) == 0) {
		goto out;
	}
	pg_shift = rte_bsf32(pg_sz);

	rte_mempool_calc_obj_size(elt_size, flags, &objsz);
	sz = rte_mempool_xmem_size(elt_num, objsz.total_size, pg_shift);
	pg_num = sz >> pg_shift;

	pa_arr = calloc(pg_num, sizeof(pa_arr[0]));
	gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
	gnt_arr  = calloc(pg_num, sizeof(gnt_arr[0]));
	if ((gnt_arr == NULL) || (gref_arr == NULL) || (pa_arr == NULL))
		goto out;

	/* grant index is continuous in ascending order */
	orig_va = gntalloc(sz, gref_arr, &start_index);
	if (orig_va == NULL)
		goto out;

	get_phys_map(orig_va, pa_arr, pg_num, pg_sz);
	for (i = 0; i < pg_num; i++) {
		gnt_arr[i].index = start_index + i * pg_sz;
		gnt_arr[i].gref = gref_arr[i];
		gnt_arr[i].pa = pa_arr[i];
		gnt_arr[i].va  = RTE_PTR_ADD(orig_va, i * pg_sz);
	}
	qsort(gnt_arr, pg_num, sizeof(struct _gntarr), compare);

	va = get_xen_virtual(sz, pg_sz);
	if (va == NULL) {
		goto out;
	}

	/*
	 * map one by one, as index isn't continuous now.
	 * pg_num VMAs, doesn't linux has a limitation on this?
	 */
	for (i = 0; i < pg_num; i++) {
	/* update gref_arr and pa_arr after sort */
		gref_arr[i] = gnt_arr[i].gref;
		pa_arr[i]   = gnt_arr[i].pa;
		gnt_arr[i].va = mmap(va + i * pg_sz, pg_sz, PROT_READ | PROT_WRITE,
			MAP_SHARED | MAP_FIXED, gntalloc_fd, gnt_arr[i].index);
		if ((gnt_arr[i].va == MAP_FAILED) || (gnt_arr[i].va != (va + i * pg_sz))) {
			RTE_LOG(ERR, PMD, "failed to map %d pages\n", i);
			goto mmap_failed;
		}
	}

	/*
	 * Check that allocated size is big enough to hold elt_num
	 * objects and a calcualte how many bytes are actually required.
	 */
	usz = rte_mempool_xmem_usage(va, elt_num, objsz.total_size, pa_arr, pg_num, pg_shift);
	if (usz < 0) {
		mp = NULL;
		i = pg_num;
		goto mmap_failed;
	} else {
		/* unmap unused pages if any */
		uv = RTE_PTR_ADD(va, usz);
		if ((usz = va + sz - uv) > 0) {

			RTE_LOG(ERR, PMD,
				"%s(%s): unmap unused %zu of %zu "
				"mmaped bytes @%p orig:%p\n",
				__func__, name, usz, sz, uv, va);
			munmap(uv, usz);
			i = (sz - usz) / pg_sz;
			for (; i < pg_num; i++) {
				arg.count = 1;
				arg.index = gnt_arr[i].index;
				rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg);
				if (rv) {
					/* shouldn't fail here */
					RTE_LOG(ERR, PMD, "va=%p pa=%"PRIu64"x index=%"PRIu64" %s\n",
						gnt_arr[i].va,
						gnt_arr[i].pa,
						arg.index, strerror(errno));
					rte_panic("gntdealloc failed when freeing pages\n");
				}
			}

			rpg_num = (sz - usz) >> pg_shift;
		} else
Example #17
0
/* map the PCI resource of a PCI device in virtual memory */
int
pci_uio_map_resource(struct rte_pci_device *dev)
{
	int i, map_idx;
	char dirname[PATH_MAX];
	char cfgname[PATH_MAX];
	char devname[PATH_MAX]; /* contains the /dev/uioX */
	void *mapaddr;
	int uio_num;
	uint64_t phaddr;
	struct rte_pci_addr *loc = &dev->addr;
	struct mapped_pci_resource *uio_res;
	struct mapped_pci_res_list *uio_res_list = RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
	struct pci_map *maps;

	dev->intr_handle.fd = -1;
	dev->intr_handle.uio_cfg_fd = -1;
	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;

	/* secondary processes - use already recorded details */
	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
		return pci_uio_map_secondary(dev);

	/* find uio resource */
	uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname));
	if (uio_num < 0) {
		RTE_LOG(WARNING, EAL, "  "PCI_PRI_FMT" not managed by UIO driver, "
				"skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
		return 1;
	}
	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);

	/* save fd if in primary process */
	dev->intr_handle.fd = open(devname, O_RDWR);
	if (dev->intr_handle.fd < 0) {
		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
			devname, strerror(errno));
		return -1;
	}
	dev->intr_handle.type = RTE_INTR_HANDLE_UIO;

	snprintf(cfgname, sizeof(cfgname),
			"/sys/class/uio/uio%u/device/config", uio_num);
	dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
	if (dev->intr_handle.uio_cfg_fd < 0) {
		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
			cfgname, strerror(errno));
		return -1;
	}

	/* set bus master that is not done by uio_pci_generic */
	if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
		RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
		return -1;
	}

	/* allocate the mapping details for secondary processes*/
	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
	if (uio_res == NULL) {
		RTE_LOG(ERR, EAL,
			"%s(): cannot store uio mmap details\n", __func__);
		return -1;
	}

	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
	memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr));

	/* Map all BARs */
	maps = uio_res->maps;
	for (i = 0, map_idx = 0; i != PCI_MAX_RESOURCE; i++) {
		int fd;
		int fail = 0;

		/* skip empty BAR */
		phaddr = dev->mem_resource[i].phys_addr;
		if (phaddr == 0)
			continue;


		/* update devname for mmap  */
		snprintf(devname, sizeof(devname),
				SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
				loc->domain, loc->bus, loc->devid, loc->function,
				i);

		/*
		 * open resource file, to mmap it
		 */
		fd = open(devname, O_RDWR);
		if (fd < 0) {
			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
					devname, strerror(errno));
			return -1;
		}

		/* try mapping somewhere close to the end of hugepages */
		if (pci_map_addr == NULL)
			pci_map_addr = pci_find_max_end_va();

		mapaddr = pci_map_resource(pci_map_addr, fd, 0,
				(size_t)dev->mem_resource[i].len, 0);
		if (mapaddr == MAP_FAILED)
			fail = 1;

		pci_map_addr = RTE_PTR_ADD(mapaddr,
				(size_t)dev->mem_resource[i].len);

		maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
		if (maps[map_idx].path == NULL)
			fail = 1;

		if (fail) {
			rte_free(uio_res);
			close(fd);
			return -1;
		}
		close(fd);

		maps[map_idx].phaddr = dev->mem_resource[i].phys_addr;
		maps[map_idx].size = dev->mem_resource[i].len;
		maps[map_idx].addr = mapaddr;
		maps[map_idx].offset = 0;
		strcpy(maps[map_idx].path, devname);
		map_idx++;
		dev->mem_resource[i].addr = mapaddr;
	}

	uio_res->nb_maps = map_idx;

	TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);

	return 0;
}
Example #18
0
static int
test_memzone_reserve_max_aligned(void)
{
	const struct rte_memzone *mz;
	const struct rte_config *config;
	const struct rte_memseg *ms;
	int memseg_idx = 0;
	int memzone_idx = 0;
	uintptr_t addr_offset;
	size_t len = 0;
	void* last_addr;
	size_t maxlen = 0;

	/* random alignment */
	rte_srand((unsigned)rte_rdtsc());
	const unsigned align = 1 << ((rte_rand() % 8) + 5); /* from 128 up to 4k alignment */

	/* get pointer to global configuration */
	config = rte_eal_get_configuration();

	ms = rte_eal_get_physmem_layout();

	addr_offset = 0;

	for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){

		/* ignore smaller memsegs as they can only get smaller */
		if (ms[memseg_idx].len < maxlen)
			continue;

		/* align everything */
		last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE);
		len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr);
		len &= ~((size_t) RTE_CACHE_LINE_MASK);

		/* cycle through all memzones */
		for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) {

			/* stop when reaching last allocated memzone */
			if (config->mem_config->memzone[memzone_idx].addr == NULL)
				break;

			/* check if the memzone is in our memseg and subtract length */
			if ((config->mem_config->memzone[memzone_idx].addr >=
					ms[memseg_idx].addr) &&
					(config->mem_config->memzone[memzone_idx].addr <
					(RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) {
				/* since the zones can now be aligned and occasionally skip
				 * some space, we should calculate the length based on
				 * reported length and start addresses difference.
				 */
				len -= (uintptr_t) RTE_PTR_SUB(
						config->mem_config->memzone[memzone_idx].addr,
						(uintptr_t) last_addr);
				len -= config->mem_config->memzone[memzone_idx].len;
				last_addr =
						RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr,
						(size_t) config->mem_config->memzone[memzone_idx].len);
			}
		}

		/* make sure we get the alignment offset */
		if (len > maxlen) {
			addr_offset = RTE_PTR_ALIGN_CEIL((uintptr_t) last_addr, align) - (uintptr_t) last_addr;
			maxlen = len;
		}
	}

	if (maxlen == 0 || maxlen == addr_offset) {
		printf("There is no space left for biggest %u-aligned memzone!\n", align);
		return 0;
	}

	maxlen -= addr_offset;

	mz = rte_memzone_reserve_aligned("max_zone_aligned", 0,
			SOCKET_ID_ANY, 0, align);
	if (mz == NULL){
		printf("Failed to reserve a big chunk of memory\n");
		rte_dump_physmem_layout(stdout);
		rte_memzone_dump(stdout);
		return -1;
	}

	if (mz->len != maxlen) {
		printf("Memzone reserve with 0 size and alignment %u did not return"
				" bigest block\n", align);
		printf("Expected size = %zu, actual size = %zu\n",
				maxlen, mz->len);
		rte_dump_physmem_layout(stdout);
		rte_memzone_dump(stdout);

		return -1;
	}
	return 0;
}
Example #19
0
static int
test_memzone_reserve_max(void)
{
	const struct rte_memzone *mz;
	const struct rte_config *config;
	const struct rte_memseg *ms;
	int memseg_idx = 0;
	int memzone_idx = 0;
	size_t len = 0;
	void* last_addr;
	size_t maxlen = 0;

	/* get pointer to global configuration */
	config = rte_eal_get_configuration();

	ms = rte_eal_get_physmem_layout();

	for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){
		/* ignore smaller memsegs as they can only get smaller */
		if (ms[memseg_idx].len < maxlen)
			continue;

		/* align everything */
		last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE);
		len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr);
		len &= ~((size_t) RTE_CACHE_LINE_MASK);

		/* cycle through all memzones */
		for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) {

			/* stop when reaching last allocated memzone */
			if (config->mem_config->memzone[memzone_idx].addr == NULL)
				break;

			/* check if the memzone is in our memseg and subtract length */
			if ((config->mem_config->memzone[memzone_idx].addr >=
			     ms[memseg_idx].addr) &&
			    (config->mem_config->memzone[memzone_idx].addr <
			     (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) {
				/* since the zones can now be aligned and occasionally skip
				 * some space, we should calculate the length based on
				 * reported length and start addresses difference. Addresses
				 * are allocated sequentially so we don't need to worry about
				 * them being in the right order.
				 */
				len -= RTE_PTR_DIFF(
						    config->mem_config->memzone[memzone_idx].addr,
						    last_addr);
				len -= config->mem_config->memzone[memzone_idx].len;
				last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr,
							(size_t) config->mem_config->memzone[memzone_idx].len);
			}
		}

		/* we don't need to calculate offset here since length
		 * is always cache-aligned */
		if (len > maxlen)
			maxlen = len;
	}

	if (maxlen == 0) {
		printf("There is no space left!\n");
		return 0;
	}

	mz = rte_memzone_reserve("max_zone", 0, SOCKET_ID_ANY, 0);
	if (mz == NULL){
		printf("Failed to reserve a big chunk of memory\n");
		rte_dump_physmem_layout(stdout);
		rte_memzone_dump(stdout);
		return -1;
	}

	if (mz->len != maxlen) {
		printf("Memzone reserve with 0 size did not return bigest block\n");
		printf("Expected size = %zu, actual size = %zu\n",
		       maxlen, mz->len);
		rte_dump_physmem_layout(stdout);
		rte_memzone_dump(stdout);

		return -1;
	}
	return 0;
}
Example #20
0
void *
eal_get_virtual_area(void *requested_addr, size_t *size,
		size_t page_sz, int flags, int mmap_flags)
{
	bool addr_is_hint, allow_shrink, unmap, no_align;
	uint64_t map_sz;
	void *mapped_addr, *aligned_addr;

	if (system_page_sz == 0)
		system_page_sz = sysconf(_SC_PAGESIZE);

	mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;

	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);

	addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
	allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
	unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;

	if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
			rte_eal_process_type() == RTE_PROC_PRIMARY)
		next_baseaddr = (void *) internal_config.base_virtaddr;

	if (requested_addr == NULL && next_baseaddr != NULL) {
		requested_addr = next_baseaddr;
		requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
		addr_is_hint = true;
	}

	/* we don't need alignment of resulting pointer in the following cases:
	 *
	 * 1. page size is equal to system size
	 * 2. we have a requested address, and it is page-aligned, and we will
	 *    be discarding the address if we get a different one.
	 *
	 * for all other cases, alignment is potentially necessary.
	 */
	no_align = (requested_addr != NULL &&
		requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
		!addr_is_hint) ||
		page_sz == system_page_sz;

	do {
		map_sz = no_align ? *size : *size + page_sz;
		if (map_sz > SIZE_MAX) {
			RTE_LOG(ERR, EAL, "Map size too big\n");
			rte_errno = E2BIG;
			return NULL;
		}

		mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ,
				mmap_flags, -1, 0);
		if (mapped_addr == MAP_FAILED && allow_shrink)
			*size -= page_sz;
	} while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0);

	/* align resulting address - if map failed, we will ignore the value
	 * anyway, so no need to add additional checks.
	 */
	aligned_addr = no_align ? mapped_addr :
			RTE_PTR_ALIGN(mapped_addr, page_sz);

	if (*size == 0) {
		RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
			strerror(errno));
		rte_errno = errno;
		return NULL;
	} else if (mapped_addr == MAP_FAILED) {
		RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
			strerror(errno));
		/* pass errno up the call chain */
		rte_errno = errno;
		return NULL;
	} else if (requested_addr != NULL && !addr_is_hint &&
			aligned_addr != requested_addr) {
		RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
			requested_addr, aligned_addr);
		munmap(mapped_addr, map_sz);
		rte_errno = EADDRNOTAVAIL;
		return NULL;
	} else if (requested_addr != NULL && addr_is_hint &&
			aligned_addr != requested_addr) {
		RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
			requested_addr, aligned_addr);
		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory into secondary processes\n");
	} else if (next_baseaddr != NULL) {
		next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
	}

	RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
		aligned_addr, *size);

	if (unmap) {
		munmap(mapped_addr, map_sz);
	} else if (!no_align) {
		void *map_end, *aligned_end;
		size_t before_len, after_len;

		/* when we reserve space with alignment, we add alignment to
		 * mapping size. On 32-bit, if 1GB alignment was requested, this
		 * would waste 1GB of address space, which is a luxury we cannot
		 * afford. so, if alignment was performed, check if any unneeded
		 * address space can be unmapped back.
		 */

		map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
		aligned_end = RTE_PTR_ADD(aligned_addr, *size);

		/* unmap space before aligned mmap address */
		before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
		if (before_len > 0)
			munmap(mapped_addr, before_len);

		/* unmap space after aligned end mmap address */
		after_len = RTE_PTR_DIFF(map_end, aligned_end);
		if (after_len > 0)
			munmap(aligned_end, after_len);
	}

	return aligned_addr;
}
Example #21
0
static const struct rte_memzone *
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
		int socket_id, unsigned flags, unsigned align, unsigned bound)
{
	struct rte_mem_config *mcfg;
	unsigned i = 0;
	int memseg_idx = -1;
	uint64_t addr_offset, seg_offset = 0;
	size_t requested_len;
	size_t memseg_len = 0;
	phys_addr_t memseg_physaddr;
	void *memseg_addr;

	/* get pointer to global configuration */
	mcfg = rte_eal_get_configuration()->mem_config;

	/* no more room in config */
	if (mcfg->memzone_idx >= RTE_MAX_MEMZONE) {
		RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
		rte_errno = ENOSPC;
		return NULL;
	}

	/* zone already exist */
	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
			__func__, name);
		rte_errno = EEXIST;
		return NULL;
	}

	/* if alignment is not a power of two */
	if (align && !rte_is_power_of_2(align)) {
		RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
				align);
		rte_errno = EINVAL;
		return NULL;
	}

	/* alignment less than cache size is not allowed */
	if (align < RTE_CACHE_LINE_SIZE)
		align = RTE_CACHE_LINE_SIZE;


	/* align length on cache boundary. Check for overflow before doing so */
	if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
		rte_errno = EINVAL; /* requested size too big */
		return NULL;
	}

	len += RTE_CACHE_LINE_MASK;
	len &= ~((size_t) RTE_CACHE_LINE_MASK);

	/* save minimal requested  length */
	requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);

	/* check that boundary condition is valid */
	if (bound != 0 &&
			(requested_len > bound || !rte_is_power_of_2(bound))) {
		rte_errno = EINVAL;
		return NULL;
	}

	/* find the smallest segment matching requirements */
	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
		/* last segment */
		if (free_memseg[i].addr == NULL)
			break;

		/* empty segment, skip it */
		if (free_memseg[i].len == 0)
			continue;

		/* bad socket ID */
		if (socket_id != SOCKET_ID_ANY &&
		    free_memseg[i].socket_id != SOCKET_ID_ANY &&
		    socket_id != free_memseg[i].socket_id)
			continue;

		/*
		 * calculate offset to closest alignment that
		 * meets boundary conditions.
		 */
		addr_offset = align_phys_boundary(free_memseg + i,
			requested_len, align, bound);

		/* check len */
		if ((requested_len + addr_offset) > free_memseg[i].len)
			continue;

		/* check flags for hugepage sizes */
		if ((flags & RTE_MEMZONE_2MB) &&
				free_memseg[i].hugepage_sz == RTE_PGSIZE_1G)
			continue;
		if ((flags & RTE_MEMZONE_1GB) &&
				free_memseg[i].hugepage_sz == RTE_PGSIZE_2M)
			continue;
		if ((flags & RTE_MEMZONE_16MB) &&
				free_memseg[i].hugepage_sz == RTE_PGSIZE_16G)
			continue;
		if ((flags & RTE_MEMZONE_16GB) &&
				free_memseg[i].hugepage_sz == RTE_PGSIZE_16M)
			continue;

		/* this segment is the best until now */
		if (memseg_idx == -1) {
			memseg_idx = i;
			memseg_len = free_memseg[i].len;
			seg_offset = addr_offset;
		}
		/* find the biggest contiguous zone */
		else if (len == 0) {
			if (free_memseg[i].len > memseg_len) {
				memseg_idx = i;
				memseg_len = free_memseg[i].len;
				seg_offset = addr_offset;
			}
		}
		/*
		 * find the smallest (we already checked that current
		 * zone length is > len
		 */
		else if (free_memseg[i].len + align < memseg_len ||
				(free_memseg[i].len <= memseg_len + align &&
				addr_offset < seg_offset)) {
			memseg_idx = i;
			memseg_len = free_memseg[i].len;
			seg_offset = addr_offset;
		}
	}

	/* no segment found */
	if (memseg_idx == -1) {
		/*
		 * If RTE_MEMZONE_SIZE_HINT_ONLY flag is specified,
		 * try allocating again without the size parameter otherwise -fail.
		 */
		if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY)  &&
		    ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)
		|| (flags & RTE_MEMZONE_16MB) || (flags & RTE_MEMZONE_16GB)))
			return memzone_reserve_aligned_thread_unsafe(name,
				len, socket_id, 0, align, bound);

		rte_errno = ENOMEM;
		return NULL;
	}

	/* save aligned physical and virtual addresses */
	memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset;
	memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr,
			(uintptr_t) seg_offset);

	/* if we are looking for a biggest memzone */
	if (len == 0) {
		if (bound == 0)
			requested_len = memseg_len - seg_offset;
		else
			requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1,
				bound) - memseg_physaddr;
	}

	/* set length to correct value */
	len = (size_t)seg_offset + requested_len;

	/* update our internal state */
	free_memseg[memseg_idx].len -= len;
	free_memseg[memseg_idx].phys_addr += len;
	free_memseg[memseg_idx].addr =
		(char *)free_memseg[memseg_idx].addr + len;

	/* fill the zone in config */
	struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++];
	snprintf(mz->name, sizeof(mz->name), "%s", name);
	mz->phys_addr = memseg_physaddr;
	mz->addr = memseg_addr;
	mz->len = requested_len;
	mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz;
	mz->socket_id = free_memseg[memseg_idx].socket_id;
	mz->flags = 0;
	mz->memseg_id = memseg_idx;

	return mz;
}
Example #22
0
int
grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size)
{
	uint64_t start_index;
	int pg_size;
	uint32_t pg_shift;
	void *ptr = NULL;
	uint32_t count, entries_per_pg;
	uint32_t i, j = 0, k = 0;
	uint32_t *gref_tmp;
	int first = 1;
	char tmp_str[PATH_MAX] = {0};
	int rv = -1;

	pg_size = getpagesize();
	if (rte_is_power_of_2(pg_size) == 0) {
		return -1;
	}
	pg_shift = rte_bsf32(pg_size);
	if (pg_size % sizeof(struct grant_node_item)) {
		RTE_LOG(ERR, PMD, "pg_size isn't a multiple of grant node item\n");
		return -1;
	}

	entries_per_pg = pg_size / sizeof(struct grant_node_item);
	count  = (pg_num +  entries_per_pg - 1 ) / entries_per_pg;
	gref_tmp = malloc(count * sizeof(uint32_t));
	if (gref_tmp == NULL)
		return -1;
	ptr = gntalloc(pg_size * count, gref_tmp, &start_index);
	if (ptr == NULL) {
		RTE_LOG(ERR, PMD, "%s: gntalloc error of %d pages\n", __func__, count);
		free(gref_tmp);
		return -1;
	}

	while (j < pg_num) {
		if (first) {
			rv = snprintf(val_str, str_size, "%u", gref_tmp[k]);
			first = 0;
		} else {
			snprintf(tmp_str, PATH_MAX, "%s", val_str);
			rv = snprintf(val_str, str_size, "%s,%u", tmp_str, gref_tmp[k]);
		}
		k++;
		if (rv == -1)
			break;

		for (i = 0; i < entries_per_pg && j < pg_num ; i++) {
			((struct grant_node_item *)ptr)->gref = gref_arr[j];
			((struct grant_node_item *)ptr)->pfn =  pa_arr[j] >> pg_shift;
			ptr = RTE_PTR_ADD(ptr, sizeof(struct grant_node_item));
			j++;
		}
	}
	if (rv == -1) {
		gntfree(ptr, pg_size * count, start_index);
	} else
		rv = 0;
	free(gref_tmp);
	return rv;
}
Example #23
0
/*
 * This creates the memory mappings in the secondary process to match that of
 * the server process. It goes through each memory segment in the DPDK runtime
 * configuration and finds the hugepages which form that segment, mapping them
 * in order to form a contiguous block in the virtual memory space
 */
static int
rte_eal_hugepage_attach(void)
{
	const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
	const struct hugepage *hp = NULL;
	unsigned num_hp = 0;
	unsigned i, s = 0; /* s used to track the segment number */
	off_t size;
	int fd, fd_zero = -1, fd_hugepage = -1;

	if (aslr_enabled() > 0) {
		RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
				"(ASLR) is enabled in the kernel.\n");
		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory "
				"into secondary processes\n");
	}

	fd_zero = open("/dev/zero", O_RDONLY);
	if (fd_zero < 0) {
		RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
		goto error;
	}
	fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
	if (fd_hugepage < 0) {
		RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
		goto error;
	}

	size = getFileSize(fd_hugepage);
	hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
	if (hp == NULL) {
		RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
		goto error;
	}

	num_hp = size / sizeof(struct hugepage);
	RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp);

	while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
		void *addr, *base_addr;
		uintptr_t offset = 0;

		/* fdzero is mmapped to get a contiguous block of virtual addresses
		 * get a block of free memory of the appropriate size -
		 * use mmap to attempt to get an identical address as server.
		 */
		base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
				PROT_READ, MAP_PRIVATE, fd_zero, 0);
		if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) {
			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
				"in /dev/zero to requested address [%p]\n",
				(unsigned long long)mcfg->memseg[s].len,
				mcfg->memseg[s].addr);
			if (aslr_enabled() > 0)
				RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel "
						"and retry running both primary and secondary processes\n");
			goto error;
		}
		/* free memory so we can map the hugepages into the space */
		munmap(base_addr, mcfg->memseg[s].len);

		/* find the hugepages for this segment and map them
		 * we don't need to worry about order, as the server sorted the
		 * entries before it did the second mmap of them */
		for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){
			if (hp[i].memseg_id == (int)s){
				fd = open(hp[i].filepath, O_RDWR);
				if (fd < 0) {
					RTE_LOG(ERR, EAL, "Could not open %s\n",
						hp[i].filepath);
					goto error;
				}
				addr = mmap(RTE_PTR_ADD(base_addr, offset),
						hp[i].size, PROT_READ | PROT_WRITE,
						MAP_SHARED | MAP_FIXED, fd, 0);
				close(fd); /* close file both on success and on failure */
				if (addr == MAP_FAILED) {
					RTE_LOG(ERR, EAL, "Could not mmap %s\n",
						hp[i].filepath);
					goto error;
				}
				offset+=hp[i].size;
			}
		}
		RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
				(unsigned long long)mcfg->memseg[s].len);
		s++;
	}
	close(fd_zero);
	close(fd_hugepage);
	return 0;

error:
	if (fd_zero >= 0)
		close(fd_zero);
	if (fd_hugepage >= 0)
		close(fd_hugepage);
	return -1;
}
Example #24
0
int
rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
{
	const struct memseg_cache_entry * ms_cache, *entry;
	struct ivshmem_config * config;
	char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
	char cfg_file_path[PATH_MAX];
	unsigned remaining_len, tmplen, iter;
	uint64_t shared_mem_size, zero_size, total_size;

	if (buffer == NULL || name == NULL)
		return -1;

	config = get_config_by_name(name);

	if (config == NULL) {
		RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
		return -1;
	}

	rte_spinlock_lock(&config->sl);

	/* prepare metadata file path */
	snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
			config->metadata->name);

	ms_cache = config->memseg_cache;

	cmdline_ptr = cmdline;
	remaining_len = sizeof(cmdline);

	shared_mem_size = 0;
	iter = 0;

	while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {

		entry = &ms_cache[iter];

		/* Offset and sizes within the current pathname */
		tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
				entry->filepath, entry->offset, entry->len);

		shared_mem_size += entry->len;

		cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
		remaining_len -= tmplen;

		if (remaining_len == 0) {
			RTE_LOG(ERR, EAL, "Command line too long!\n");
			rte_spinlock_unlock(&config->sl);
			return -1;
		}

		iter++;
	}

	total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
	zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;

	/* add /dev/zero to command-line to fill the space */
	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
			"/dev/zero",
			(uint64_t)0x0,
			zero_size);

	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
	remaining_len -= tmplen;

	if (remaining_len == 0) {
		RTE_LOG(ERR, EAL, "Command line too long!\n");
		rte_spinlock_unlock(&config->sl);
		return -1;
	}

	/* add metadata file to the end of command-line */
	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
			cfg_file_path,
			(uint64_t)0x0,
			METADATA_SIZE_ALIGNED);

	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
	remaining_len -= tmplen;

	if (remaining_len == 0) {
		RTE_LOG(ERR, EAL, "Command line too long!\n");
		rte_spinlock_unlock(&config->sl);
		return -1;
	}

	/* if current length of the command line is bigger than the buffer supplied
	 * by the user, or if command-line is bigger than what IVSHMEM accepts */
	if ((sizeof(cmdline) - remaining_len) > size) {
		RTE_LOG(ERR, EAL, "Buffer is too short!\n");
		rte_spinlock_unlock(&config->sl);
		return -1;
	}
	/* complete the command-line */
	snprintf(buffer, size,
			IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
			total_size >> 20,
			cmdline);

	rte_spinlock_unlock(&config->sl);

	return 0;
}
Example #25
0
/*
 * This function maps grant node of vring or mbuf pool to a continuous virtual address space,
 * and returns mapped address, pfn array, index array
 * @param gntnode
 *  Pointer to grant node
 * @param domid
 *  Guest domain id
 * @param ppfn
 *  Pointer to pfn array, caller should free this array
 * @param pgs
 *  Pointer to number of pages
 * @param ppindex
 *  Pointer to index array, used to release grefs when to free this node
 * @return
 *  Pointer to mapped virtual address, NULL on failure
 */
static void *
map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex)
{
	struct xen_gnt *gnt;
	uint32_t i, j;
	size_t total_pages = 0;
	void *addr;
	uint32_t *pfn;
	uint64_t *pindex;
	uint32_t pfn_num = 0;
	int pg_sz;

	if (gntnode == NULL)
		return NULL;

	pg_sz = getpagesize();
	for (i = 0; i < gntnode->gnt_num; i++) {
		gnt = gntnode->gnt_info + i;
		total_pages += cal_pagenum(gnt);
	}
	if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) {
		RTE_LOG(ERR, XENHOST, "  %s: failed get_xen_virtual\n", __func__);
		return NULL;
	}
	pfn = calloc(total_pages, (size_t)sizeof(uint32_t));
	pindex = calloc(total_pages, (size_t)sizeof(uint64_t));
	if (pfn == NULL || pindex == NULL) {
		free_xen_virtual(addr, total_pages * pg_sz, pg_sz);
		free(pfn);
		free(pindex);
		return NULL;
	}

	RTE_LOG(INFO, XENHOST, "    %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1));
	for (i = 0; i < gntnode->gnt_num; i++) {
		gnt = gntnode->gnt_info + i;
		for (j = 0; j < (PAGE_PFNNUM) / 2; j++) {
			if ((gnt->gref_pfn[j * 2].gref) <= 0)
				goto _end;
			/*alternative: batch map, or through libxc*/
			if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz),
					domid,
					gnt->gref_pfn[j * 2].gref,
					&pindex[pfn_num]) == NULL) {
				goto mmap_failed;
			}
			pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num;
			pfn_num++;
		}
	}

mmap_failed:
	if (pfn_num)
		munmap(addr, pfn_num * pg_sz);
	for (i = 0; i < pfn_num; i++) {
		xen_unmap_grant_ref(pindex[i]);
	}
	free(pindex);
	free(pfn);
	return NULL;

_end:
	if (ppindex)
		*ppindex = pindex;
	else
		free(pindex);
	if (ppfn)
		*ppfn = pfn;
	else
		free(pfn);
	if (pgs)
		*pgs = total_pages;

	return addr;
}