static int rte_eal_contigmem_attach(void) { const struct hugepage_info *hpi; int fd_hugepage_info, fd_hugepage = -1; unsigned i = 0; struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; /* Obtain a file descriptor for hugepage_info */ fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY); if (fd_hugepage_info < 0) { RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); return -1; } /* Map the shared hugepage_info into the process address spaces */ hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE, fd_hugepage_info, 0); if (hpi == NULL) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); goto error; } /* Obtain a file descriptor for contiguous memory */ fd_hugepage = open(hpi->hugedir, O_RDWR); if (fd_hugepage < 0) { RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir); goto error; } /* Map the contiguous memory into each memory segment */ for (i = 0; i < hpi->num_pages[0]; i++) { void *addr; struct rte_memseg *seg = &mcfg->memseg[i]; addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, fd_hugepage, i * PAGE_SIZE); if (addr == MAP_FAILED || addr != seg->addr) { RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", i, hpi->hugedir); goto error; } } /* hugepage_info is no longer required */ munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info)); close(fd_hugepage_info); close(fd_hugepage); return 0; error: if (fd_hugepage_info >= 0) close(fd_hugepage_info); if (fd_hugepage >= 0) close(fd_hugepage); return -1; }
/* * This creates the memory mappings in the secondary process to match that of * the server process. It goes through each memory segment in the DPDK runtime * configuration and finds the hugepages which form that segment, mapping them * in order to form a contiguous block in the virtual memory space */ static int rte_eal_hugepage_attach(void) { const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; const struct hugepage *hp = NULL; unsigned num_hp = 0; unsigned i, s = 0; /* s used to track the segment number */ off_t size; int fd, fd_zero = -1, fd_hugepage = -1; if (aslr_enabled() > 0) { RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " "(ASLR) is enabled in the kernel.\n"); RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory " "into secondary processes\n"); } fd_zero = open("/dev/zero", O_RDONLY); if (fd_zero < 0) { RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); goto error; } fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); if (fd_hugepage < 0) { RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); goto error; } size = getFileSize(fd_hugepage); hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); if (hp == NULL) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); goto error; } num_hp = size / sizeof(struct hugepage); RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp); while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ void *addr, *base_addr; uintptr_t offset = 0; /* fdzero is mmapped to get a contiguous block of virtual addresses * get a block of free memory of the appropriate size - * use mmap to attempt to get an identical address as server. */ base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, PROT_READ, MAP_PRIVATE, fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " "in /dev/zero to requested address [%p]\n", (unsigned long long)mcfg->memseg[s].len, mcfg->memseg[s].addr); if (aslr_enabled() > 0) RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel " "and retry running both primary and secondary processes\n"); goto error; } /* free memory so we can map the hugepages into the space */ munmap(base_addr, mcfg->memseg[s].len); /* find the hugepages for this segment and map them * we don't need to worry about order, as the server sorted the * entries before it did the second mmap of them */ for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){ if (hp[i].memseg_id == (int)s){ fd = open(hp[i].filepath, O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "Could not open %s\n", hp[i].filepath); goto error; } addr = mmap(RTE_PTR_ADD(base_addr, offset), hp[i].size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); close(fd); /* close file both on success and on failure */ if (addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", hp[i].filepath); goto error; } offset+=hp[i].size; } } RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, (unsigned long long)mcfg->memseg[s].len); s++; } close(fd_zero); close(fd_hugepage); return 0; error: if (fd_zero >= 0) close(fd_zero); if (fd_hugepage >= 0) close(fd_hugepage); return -1; }
/* * Prepare physical memory mapping: fill configuration structure with * these infos, return 0 on success. * 1. map N huge pages in separate files in hugetlbfs * 2. find associated physical addr * 3. find associated NUMA socket ID * 4. sort all huge pages by physical address * 5. remap these N huge pages in the correct order * 6. unmap the first mapping * 7. fill memsegs in configuration with contiguous zones */ static int rte_eal_hugepage_init(void) { struct rte_mem_config *mcfg; struct hugepage *hugepage, *tmp_hp = NULL; struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; uint64_t memory[RTE_MAX_NUMA_NODES]; unsigned hp_offset; int i, j, new_memseg; int nrpages, total_pages = 0; void *addr; memset(used_hp, 0, sizeof(used_hp)); /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; /* for debug purposes, hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { addr = malloc(internal_config.memory); mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; mcfg->memseg[0].addr = addr; mcfg->memseg[0].len = internal_config.memory; mcfg->memseg[0].socket_id = 0; return 0; } /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; total_pages += internal_config.hugepage_info[i].num_pages[0]; } /* * allocate a memory area for hugepage table. * this isn't shared memory yet. due to the fact that we need some * processing done on these pages, shared memory will be created * at a later stage. */ tmp_hp = malloc(total_pages * sizeof(struct hugepage)); if (tmp_hp == NULL) goto fail; memset(tmp_hp, 0, total_pages * sizeof(struct hugepage)); hp_offset = 0; /* where we start the current page size entries */ /* map all hugepages and sort them */ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ struct hugepage_info *hpi; /* * we don't yet mark hugepages as used at this stage, so * we just map all hugepages available to the system * all hugepages are still located on socket 0 */ hpi = &internal_config.hugepage_info[i]; if (hpi->num_pages == 0) continue; /* map all hugepages available */ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){ RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; } /* find physical addresses and sockets for each hugepage */ if (find_physaddr(&tmp_hp[hp_offset], hpi) < 0){ RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; } if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; } if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0) goto fail; /* remap all hugepages */ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; } /* unmap original mappings */ if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0) goto fail; /* we have processed a num of hugepages of this size, so inc offset */ hp_offset += hpi->num_pages[0]; } /* clean out the numbers of pages */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) for (j = 0; j < RTE_MAX_NUMA_NODES; j++) internal_config.hugepage_info[i].num_pages[j] = 0; /* get hugepages for each socket */ for (i = 0; i < total_pages; i++) { int socket = tmp_hp[i].socket_id; /* find a hugepage info with right size and increment num_pages */ for (j = 0; j < (int) internal_config.num_hugepage_sizes; j++) { if (tmp_hp[i].size == internal_config.hugepage_info[j].hugepage_sz) { internal_config.hugepage_info[j].num_pages[socket]++; } } } /* make a copy of socket_mem, needed for number of pages calculation */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) memory[i] = internal_config.socket_mem[i]; /* calculate final number of pages */ nrpages = calc_num_pages_per_socket(memory, internal_config.hugepage_info, used_hp, internal_config.num_hugepage_sizes); /* error if not enough memory available */ if (nrpages < 0) goto fail; /* reporting in! */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { if (used_hp[i].num_pages[j] > 0) { RTE_LOG(INFO, EAL, "Requesting %u pages of size %uMB" " from socket %i\n", used_hp[i].num_pages[j], (unsigned) (used_hp[i].hugepage_sz / 0x100000), j); } } } /* create shared memory */ hugepage = create_shared_memory(eal_hugepage_info_path(), nrpages * sizeof(struct hugepage)); if (hugepage == NULL) { RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); goto fail; } /* * unmap pages that we won't need (looks at used_hp). * also, sets final_va to NULL on pages that were unmapped. */ if (unmap_unneeded_hugepages(tmp_hp, used_hp, internal_config.num_hugepage_sizes) < 0) { RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n"); goto fail; } /* * copy stuff from malloc'd hugepage* to the actual shared memory. * this procedure only copies those hugepages that have final_va * not NULL. has overflow protection. */ if (copy_hugepages_to_shared_mem(hugepage, nrpages, tmp_hp, total_pages) < 0) { RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n"); goto fail; } /* free the temporary hugepage table */ free(tmp_hp); tmp_hp = NULL; memset(mcfg->memseg, 0, sizeof(mcfg->memseg)); j = -1; for (i = 0; i < nrpages; i++) { new_memseg = 0; /* if this is a new section, create a new memseg */ if (i == 0) new_memseg = 1; else if (hugepage[i].socket_id != hugepage[i-1].socket_id) new_memseg = 1; else if (hugepage[i].size != hugepage[i-1].size) new_memseg = 1; else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) != hugepage[i].size) new_memseg = 1; else if (((unsigned long)hugepage[i].final_va - (unsigned long)hugepage[i-1].final_va) != hugepage[i].size) new_memseg = 1; if (new_memseg) { j += 1; if (j == RTE_MAX_MEMSEG) break; mcfg->memseg[j].phys_addr = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; mcfg->memseg[j].len = hugepage[i].size; mcfg->memseg[j].socket_id = hugepage[i].socket_id; mcfg->memseg[j].hugepage_sz = hugepage[i].size; } /* continuation of previous memseg */ else { mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; } hugepage[i].memseg_id = j; } return 0; fail: if (tmp_hp) free(tmp_hp); return -1; }