/* * Remaps all hugepages into single file segments */ static int remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { int fd; unsigned i = 0, j, num_pages, page_idx = 0; void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; size_t vma_len = 0; size_t hugepage_sz = hpi->hugepage_sz; size_t total_size, offset; char filepath[MAX_HUGEPAGE_PATH]; phys_addr_t physaddr; int socket; while (i < hpi->num_pages[0]) { #ifndef RTE_ARCH_64 /* for 32-bit systems, don't remap 1G pages and 16G pages, * just reuse original map address as final map address. */ if ((hugepage_sz == RTE_PGSIZE_1G) || (hugepage_sz == RTE_PGSIZE_16G)) { hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; hugepg_tbl[i].orig_va = NULL; i++; continue; } #endif /* reserve a virtual area for next contiguous * physical block: count the number of * contiguous physical pages. */ for (j = i+1; j < hpi->num_pages[0] ; j++) { #ifdef RTE_ARCH_PPC_64 /* The physical addresses are sorted in descending * order on PPC64 */ if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr - hugepage_sz) break; #else if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) break; #endif } num_pages = j - i; vma_len = num_pages * hugepage_sz; socket = hugepg_tbl[i].socket_id; /* get the biggest virtual memory area up to * vma_len. If it fails, vma_addr is NULL, so * let the kernel provide the address. */ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); /* If we can't find a big enough virtual area, work out how many pages * we are going to get */ if (vma_addr == NULL) j = i + 1; else if (vma_len != num_pages * hugepage_sz) { num_pages = vma_len / hugepage_sz; j = i + num_pages; } hugepg_tbl[page_idx].file_id = page_idx; eal_get_hugefile_path(filepath, sizeof(filepath), hpi->hugedir, hugepg_tbl[page_idx].file_id); /* try to create hugepage file */ fd = open(filepath, O_CREAT | O_RDWR, 0755); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); return -1; } total_size = 0; for (;i < j; i++) { /* unmap current segment */ if (total_size > 0) munmap(vma_addr, total_size); /* unmap original page */ munmap(hugepg_tbl[i].orig_va, hugepage_sz); unlink(hugepg_tbl[i].filepath); total_size += hugepage_sz; old_addr = vma_addr; /* map new, bigger segment */ vma_addr = mmap(vma_addr, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (vma_addr == MAP_FAILED || vma_addr != old_addr) { RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); return -1; } /* touch the page. this is needed because kernel postpones mapping * creation until the first page fault. with this, we pin down * the page and it is marked as used and gets into process' pagemap. */ for (offset = 0; offset < total_size; offset += hugepage_sz) *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset)); } /* set shared flock on the file. */ if (flock(fd, LOCK_SH | LOCK_NB) == -1) { RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); return -1; } snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", filepath); physaddr = rte_mem_virt2phy(vma_addr); if (physaddr == RTE_BAD_PHYS_ADDR) return -1; hugepg_tbl[page_idx].final_va = vma_addr; hugepg_tbl[page_idx].physaddr = physaddr; hugepg_tbl[page_idx].repeated = num_pages; hugepg_tbl[page_idx].socket_id = socket; close(fd); /* verify the memory segment - that is, check that every VA corresponds * to the physical address we expect to see */ for (offset = 0; offset < vma_len; offset += hugepage_sz) { uint64_t expected_physaddr; expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; page_addr = RTE_PTR_ADD(vma_addr, offset); physaddr = rte_mem_virt2phy(page_addr); if (physaddr != expected_physaddr) { RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 " (expected 0x%" PRIx64 ")\n", page_addr, offset, physaddr, expected_physaddr); return -1; } } /* zero out the whole segment */ memset(hugepg_tbl[page_idx].final_va, 0, total_size); page_idx++; } /* zero out the rest */ memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); return page_idx; }
/* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to * map continguous physical blocks in contiguous virtual blocks. */ static int map_all_hugepages(struct hugepage *hugepg_tbl, struct hugepage_info *hpi, int orig) { int fd; unsigned i; void *virtaddr; void *vma_addr = NULL; uint64_t vma_len = 0; for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; eal_get_hugefile_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } #ifndef RTE_ARCH_X86_64 /* for 32-bit systems, don't remap 1G pages, just reuse original * map address as final map address. */ else if (hugepage_sz == RTE_PGSIZE_1G){ hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; hugepg_tbl[i].orig_va = NULL; continue; } #endif else if (vma_len == 0) { unsigned j, num_pages; /* reserve a virtual area for next contiguous * physical block: count the number of * contiguous physical pages. */ for (j = i+1; j < hpi->num_pages[0] ; j++) { if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) break; } num_pages = j - i; vma_len = num_pages * hugepage_sz; /* get the biggest virtual memory area up to * vma_len. If it fails, vma_addr is NULL, so * let the kernel provide the address. */ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); if (vma_addr == NULL) vma_len = hugepage_sz; } /* try to create hugepage file */ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); return -1; } virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (virtaddr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); return -1; } if (orig) { hugepg_tbl[i].orig_va = virtaddr; memset(virtaddr, 0, hugepage_sz); } else { hugepg_tbl[i].final_va = virtaddr; } /* close the file descriptor, files will be locked later */ close(fd); vma_addr = (char *)vma_addr + hugepage_sz; vma_len -= hugepage_sz; } return 0; }
/* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to * map continguous physical blocks in contiguous virtual blocks. */ static int map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, int orig) { int fd; unsigned i; void *virtaddr; void *vma_addr = NULL; size_t vma_len = 0; #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS RTE_SET_USED(vma_len); #endif for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); #else eal_get_hugefile_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); #endif hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } #ifndef RTE_ARCH_64 /* for 32-bit systems, don't remap 1G and 16G pages, just reuse * original map address as final map address. */ else if ((hugepage_sz == RTE_PGSIZE_1G) || (hugepage_sz == RTE_PGSIZE_16G)) { hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; hugepg_tbl[i].orig_va = NULL; continue; } #endif #ifndef RTE_EAL_SINGLE_FILE_SEGMENTS else if (vma_len == 0) { unsigned j, num_pages; /* reserve a virtual area for next contiguous * physical block: count the number of * contiguous physical pages. */ for (j = i+1; j < hpi->num_pages[0] ; j++) { #ifdef RTE_ARCH_PPC_64 /* The physical addresses are sorted in * descending order on PPC64 */ if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr - hugepage_sz) break; #else if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) break; #endif } num_pages = j - i; vma_len = num_pages * hugepage_sz; /* get the biggest virtual memory area up to * vma_len. If it fails, vma_addr is NULL, so * let the kernel provide the address. */ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); if (vma_addr == NULL) vma_len = hugepage_sz; } #endif /* try to create hugepage file */ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); return -1; } virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (virtaddr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); return -1; } if (orig) { hugepg_tbl[i].orig_va = virtaddr; memset(virtaddr, 0, hugepage_sz); } else { hugepg_tbl[i].final_va = virtaddr; } /* set shared flock on the file. */ if (flock(fd, LOCK_SH | LOCK_NB) == -1) { RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); return -1; } close(fd); vma_addr = (char *)vma_addr + hugepage_sz; vma_len -= hugepage_sz; } return 0; }
/* * Mmap all hugepages of hugepage table: it first open a file in * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to * map continguous physical blocks in contiguous virtual blocks. */ static unsigned map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, int orig) { int fd; unsigned i; void *virtaddr; void *vma_addr = NULL; size_t vma_len = 0; for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; eal_get_hugefile_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } #ifndef RTE_ARCH_64 /* for 32-bit systems, don't remap 1G and 16G pages, just reuse * original map address as final map address. */ else if ((hugepage_sz == RTE_PGSIZE_1G) || (hugepage_sz == RTE_PGSIZE_16G)) { hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; hugepg_tbl[i].orig_va = NULL; continue; } #endif else if (vma_len == 0) { unsigned j, num_pages; /* reserve a virtual area for next contiguous * physical block: count the number of * contiguous physical pages. */ for (j = i+1; j < hpi->num_pages[0] ; j++) { #ifdef RTE_ARCH_PPC_64 /* The physical addresses are sorted in * descending order on PPC64 */ if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr - hugepage_sz) break; #else if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) break; #endif } num_pages = j - i; vma_len = num_pages * hugepage_sz; /* get the biggest virtual memory area up to * vma_len. If it fails, vma_addr is NULL, so * let the kernel provide the address. */ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); if (vma_addr == NULL) vma_len = hugepage_sz; } /* try to create hugepage file */ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600); if (fd < 0) { RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); return i; } /* map the segment, and populate page tables, * the kernel fills this segment with zeros */ virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); if (virtaddr == MAP_FAILED) { RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); return i; } if (orig) { hugepg_tbl[i].orig_va = virtaddr; } else { hugepg_tbl[i].final_va = virtaddr; } if (orig) { /* In linux, hugetlb limitations, like cgroup, are * enforced at fault time instead of mmap(), even * with the option of MAP_POPULATE. Kernel will send * a SIGBUS signal. To avoid to be killed, save stack * environment here, if SIGBUS happens, we can jump * back here. */ if (huge_wrap_sigsetjmp()) { RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more " "hugepages of size %u MB\n", (unsigned)(hugepage_sz / 0x100000)); munmap(virtaddr, hugepage_sz); close(fd); unlink(hugepg_tbl[i].filepath); return i; } *(int *)virtaddr = 0; } /* set shared flock on the file. */ if (flock(fd, LOCK_SH | LOCK_NB) == -1) { RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); return i; } close(fd); vma_addr = (char *)vma_addr + hugepage_sz; vma_len -= hugepage_sz; } return i; }