Example #1
0
/*
 * Remaps all hugepages into single file segments
 */
static int
remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
	int fd;
	unsigned i = 0, j, num_pages, page_idx = 0;
	void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
	size_t vma_len = 0;
	size_t hugepage_sz = hpi->hugepage_sz;
	size_t total_size, offset;
	char filepath[MAX_HUGEPAGE_PATH];
	phys_addr_t physaddr;
	int socket;

	while (i < hpi->num_pages[0]) {

#ifndef RTE_ARCH_64
		/* for 32-bit systems, don't remap 1G pages and 16G pages,
		 * just reuse original map address as final map address.
		 */
		if ((hugepage_sz == RTE_PGSIZE_1G)
			|| (hugepage_sz == RTE_PGSIZE_16G)) {
			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
			hugepg_tbl[i].orig_va = NULL;
			i++;
			continue;
		}
#endif

		/* reserve a virtual area for next contiguous
		 * physical block: count the number of
		 * contiguous physical pages. */
		for (j = i+1; j < hpi->num_pages[0] ; j++) {
#ifdef RTE_ARCH_PPC_64
			/* The physical addresses are sorted in descending
			 * order on PPC64 */
			if (hugepg_tbl[j].physaddr !=
				hugepg_tbl[j-1].physaddr - hugepage_sz)
				break;
#else
			if (hugepg_tbl[j].physaddr !=
				hugepg_tbl[j-1].physaddr + hugepage_sz)
				break;
#endif
		}
		num_pages = j - i;
		vma_len = num_pages * hugepage_sz;

		socket = hugepg_tbl[i].socket_id;

		/* get the biggest virtual memory area up to
		 * vma_len. If it fails, vma_addr is NULL, so
		 * let the kernel provide the address. */
		vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);

		/* If we can't find a big enough virtual area, work out how many pages
		 * we are going to get */
		if (vma_addr == NULL)
			j = i + 1;
		else if (vma_len != num_pages * hugepage_sz) {
			num_pages = vma_len / hugepage_sz;
			j = i + num_pages;

		}

		hugepg_tbl[page_idx].file_id = page_idx;
		eal_get_hugefile_path(filepath,
				sizeof(filepath),
				hpi->hugedir,
				hugepg_tbl[page_idx].file_id);

		/* try to create hugepage file */
		fd = open(filepath, O_CREAT | O_RDWR, 0755);
		if (fd < 0) {
			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
			return -1;
		}

		total_size = 0;
		for (;i < j; i++) {

			/* unmap current segment */
			if (total_size > 0)
				munmap(vma_addr, total_size);

			/* unmap original page */
			munmap(hugepg_tbl[i].orig_va, hugepage_sz);
			unlink(hugepg_tbl[i].filepath);

			total_size += hugepage_sz;

			old_addr = vma_addr;

			/* map new, bigger segment */
			vma_addr = mmap(vma_addr, total_size,
					PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

			if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
				RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
				close(fd);
				return -1;
			}

			/* touch the page. this is needed because kernel postpones mapping
			 * creation until the first page fault. with this, we pin down
			 * the page and it is marked as used and gets into process' pagemap.
			 */
			for (offset = 0; offset < total_size; offset += hugepage_sz)
				*((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset));
		}

		/* set shared flock on the file. */
		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
				__func__, strerror(errno));
			close(fd);
			return -1;
		}

		snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
				filepath);

		physaddr = rte_mem_virt2phy(vma_addr);

		if (physaddr == RTE_BAD_PHYS_ADDR)
			return -1;

		hugepg_tbl[page_idx].final_va = vma_addr;

		hugepg_tbl[page_idx].physaddr = physaddr;

		hugepg_tbl[page_idx].repeated = num_pages;

		hugepg_tbl[page_idx].socket_id = socket;

		close(fd);

		/* verify the memory segment - that is, check that every VA corresponds
		 * to the physical address we expect to see
		 */
		for (offset = 0; offset < vma_len; offset += hugepage_sz) {
			uint64_t expected_physaddr;

			expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
			page_addr = RTE_PTR_ADD(vma_addr, offset);
			physaddr = rte_mem_virt2phy(page_addr);

			if (physaddr != expected_physaddr) {
				RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
						"at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
						" (expected 0x%" PRIx64 ")\n",
						page_addr, offset, physaddr, expected_physaddr);
				return -1;
			}
		}

		/* zero out the whole segment */
		memset(hugepg_tbl[page_idx].final_va, 0, total_size);

		page_idx++;
	}

	/* zero out the rest */
	memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
	return page_idx;
}
Example #2
0
/*
 * Mmap all hugepages of hugepage table: it first open a file in
 * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
 * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
 * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
 * map continguous physical blocks in contiguous virtual blocks.
 */
static int
map_all_hugepages(struct hugepage *hugepg_tbl,
		struct hugepage_info *hpi, int orig)
{
	int fd;
	unsigned i;
	void *virtaddr;
	void *vma_addr = NULL;
	uint64_t vma_len = 0;

	for (i = 0; i < hpi->num_pages[0]; i++) {
		uint64_t hugepage_sz = hpi->hugepage_sz;

		if (orig) {
			hugepg_tbl[i].file_id = i;
			hugepg_tbl[i].size = hugepage_sz;
			eal_get_hugefile_path(hugepg_tbl[i].filepath,
					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
					hugepg_tbl[i].file_id);
			hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
		}
#ifndef RTE_ARCH_X86_64
		/* for 32-bit systems, don't remap 1G pages, just reuse original
		 * map address as final map address.
		 */
		else if (hugepage_sz == RTE_PGSIZE_1G){
			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
			hugepg_tbl[i].orig_va = NULL;
			continue;
		}
#endif
		else if (vma_len == 0) {
			unsigned j, num_pages;

			/* reserve a virtual area for next contiguous
			 * physical block: count the number of
			 * contiguous physical pages. */
			for (j = i+1; j < hpi->num_pages[0] ; j++) {
				if (hugepg_tbl[j].physaddr !=
				    hugepg_tbl[j-1].physaddr + hugepage_sz)
					break;
			}
			num_pages = j - i;
			vma_len = num_pages * hugepage_sz;

			/* get the biggest virtual memory area up to
			 * vma_len. If it fails, vma_addr is NULL, so
			 * let the kernel provide the address. */
			vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
			if (vma_addr == NULL)
				vma_len = hugepage_sz;
		}

		/* try to create hugepage file */
		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
		if (fd < 0) {
			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
					strerror(errno));
			return -1;
		}

		virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
				MAP_SHARED, fd, 0);
		if (virtaddr == MAP_FAILED) {
			RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__,
					strerror(errno));
			close(fd);
			return -1;
		}

		if (orig) {
			hugepg_tbl[i].orig_va = virtaddr;
			memset(virtaddr, 0, hugepage_sz);
		}
		else {
			hugepg_tbl[i].final_va = virtaddr;
		}

		/* close the file descriptor, files will be locked later */
		close(fd);

		vma_addr = (char *)vma_addr + hugepage_sz;
		vma_len -= hugepage_sz;
	}
	return 0;
}
Example #3
0
/*
 * Mmap all hugepages of hugepage table: it first open a file in
 * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
 * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
 * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
 * map continguous physical blocks in contiguous virtual blocks.
 */
static int
map_all_hugepages(struct hugepage_file *hugepg_tbl,
		struct hugepage_info *hpi, int orig)
{
	int fd;
	unsigned i;
	void *virtaddr;
	void *vma_addr = NULL;
	size_t vma_len = 0;

#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
	RTE_SET_USED(vma_len);
#endif

	for (i = 0; i < hpi->num_pages[0]; i++) {
		uint64_t hugepage_sz = hpi->hugepage_sz;

		if (orig) {
			hugepg_tbl[i].file_id = i;
			hugepg_tbl[i].size = hugepage_sz;
#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
			eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
					hugepg_tbl[i].file_id);
#else
			eal_get_hugefile_path(hugepg_tbl[i].filepath,
					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
					hugepg_tbl[i].file_id);
#endif
			hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
		}
#ifndef RTE_ARCH_64
		/* for 32-bit systems, don't remap 1G and 16G pages, just reuse
		 * original map address as final map address.
		 */
		else if ((hugepage_sz == RTE_PGSIZE_1G)
			|| (hugepage_sz == RTE_PGSIZE_16G)) {
			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
			hugepg_tbl[i].orig_va = NULL;
			continue;
		}
#endif

#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
		else if (vma_len == 0) {
			unsigned j, num_pages;

			/* reserve a virtual area for next contiguous
			 * physical block: count the number of
			 * contiguous physical pages. */
			for (j = i+1; j < hpi->num_pages[0] ; j++) {
#ifdef RTE_ARCH_PPC_64
				/* The physical addresses are sorted in
				 * descending order on PPC64 */
				if (hugepg_tbl[j].physaddr !=
				    hugepg_tbl[j-1].physaddr - hugepage_sz)
					break;
#else
				if (hugepg_tbl[j].physaddr !=
				    hugepg_tbl[j-1].physaddr + hugepage_sz)
					break;
#endif
			}
			num_pages = j - i;
			vma_len = num_pages * hugepage_sz;

			/* get the biggest virtual memory area up to
			 * vma_len. If it fails, vma_addr is NULL, so
			 * let the kernel provide the address. */
			vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
			if (vma_addr == NULL)
				vma_len = hugepage_sz;
		}
#endif

		/* try to create hugepage file */
		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
		if (fd < 0) {
			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
					strerror(errno));
			return -1;
		}

		virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
				MAP_SHARED, fd, 0);
		if (virtaddr == MAP_FAILED) {
			RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__,
					strerror(errno));
			close(fd);
			return -1;
		}

		if (orig) {
			hugepg_tbl[i].orig_va = virtaddr;
			memset(virtaddr, 0, hugepage_sz);
		}
		else {
			hugepg_tbl[i].final_va = virtaddr;
		}

		/* set shared flock on the file. */
		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
				__func__, strerror(errno));
			close(fd);
			return -1;
		}

		close(fd);

		vma_addr = (char *)vma_addr + hugepage_sz;
		vma_len -= hugepage_sz;
	}
	return 0;
}
Example #4
0
/*
 * Mmap all hugepages of hugepage table: it first open a file in
 * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
 * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
 * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
 * map continguous physical blocks in contiguous virtual blocks.
 */
static unsigned
map_all_hugepages(struct hugepage_file *hugepg_tbl,
		struct hugepage_info *hpi, int orig)
{
	int fd;
	unsigned i;
	void *virtaddr;
	void *vma_addr = NULL;
	size_t vma_len = 0;

	for (i = 0; i < hpi->num_pages[0]; i++) {
		uint64_t hugepage_sz = hpi->hugepage_sz;

		if (orig) {
			hugepg_tbl[i].file_id = i;
			hugepg_tbl[i].size = hugepage_sz;
			eal_get_hugefile_path(hugepg_tbl[i].filepath,
					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
					hugepg_tbl[i].file_id);
			hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
		}
#ifndef RTE_ARCH_64
		/* for 32-bit systems, don't remap 1G and 16G pages, just reuse
		 * original map address as final map address.
		 */
		else if ((hugepage_sz == RTE_PGSIZE_1G)
			|| (hugepage_sz == RTE_PGSIZE_16G)) {
			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
			hugepg_tbl[i].orig_va = NULL;
			continue;
		}
#endif
		else if (vma_len == 0) {
			unsigned j, num_pages;

			/* reserve a virtual area for next contiguous
			 * physical block: count the number of
			 * contiguous physical pages. */
			for (j = i+1; j < hpi->num_pages[0] ; j++) {
#ifdef RTE_ARCH_PPC_64
				/* The physical addresses are sorted in
				 * descending order on PPC64 */
				if (hugepg_tbl[j].physaddr !=
				    hugepg_tbl[j-1].physaddr - hugepage_sz)
					break;
#else
				if (hugepg_tbl[j].physaddr !=
				    hugepg_tbl[j-1].physaddr + hugepage_sz)
					break;
#endif
			}
			num_pages = j - i;
			vma_len = num_pages * hugepage_sz;

			/* get the biggest virtual memory area up to
			 * vma_len. If it fails, vma_addr is NULL, so
			 * let the kernel provide the address. */
			vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
			if (vma_addr == NULL)
				vma_len = hugepage_sz;
		}

		/* try to create hugepage file */
		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600);
		if (fd < 0) {
			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
					strerror(errno));
			return i;
		}

		/* map the segment, and populate page tables,
		 * the kernel fills this segment with zeros */
		virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_POPULATE, fd, 0);
		if (virtaddr == MAP_FAILED) {
			RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__,
					strerror(errno));
			close(fd);
			return i;
		}

		if (orig) {
			hugepg_tbl[i].orig_va = virtaddr;
		}
		else {
			hugepg_tbl[i].final_va = virtaddr;
		}

		if (orig) {
			/* In linux, hugetlb limitations, like cgroup, are
			 * enforced at fault time instead of mmap(), even
			 * with the option of MAP_POPULATE. Kernel will send
			 * a SIGBUS signal. To avoid to be killed, save stack
			 * environment here, if SIGBUS happens, we can jump
			 * back here.
			 */
			if (huge_wrap_sigsetjmp()) {
				RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more "
					"hugepages of size %u MB\n",
					(unsigned)(hugepage_sz / 0x100000));
				munmap(virtaddr, hugepage_sz);
				close(fd);
				unlink(hugepg_tbl[i].filepath);
				return i;
			}
			*(int *)virtaddr = 0;
		}


		/* set shared flock on the file. */
		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
			RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n",
				__func__, strerror(errno));
			close(fd);
			return i;
		}

		close(fd);

		vma_addr = (char *)vma_addr + hugepage_sz;
		vma_len -= hugepage_sz;
	}

	return i;
}