/* this is really a sanity check */ static int test_macros(int __rte_unused unused_parm) { #define SMALLER 0x1000U #define BIGGER 0x2000U #define PTR_DIFF BIGGER - SMALLER #define FAIL_MACRO(x)\ {printf(#x "() test failed!\n");\ return -1;} uintptr_t unused = 0; RTE_SET_USED(unused); if ((uintptr_t)RTE_PTR_ADD(SMALLER, PTR_DIFF) != BIGGER) FAIL_MACRO(RTE_PTR_ADD); if ((uintptr_t)RTE_PTR_SUB(BIGGER, PTR_DIFF) != SMALLER) FAIL_MACRO(RTE_PTR_SUB); if (RTE_PTR_DIFF(BIGGER, SMALLER) != PTR_DIFF) FAIL_MACRO(RTE_PTR_DIFF); if (RTE_MAX(SMALLER, BIGGER) != BIGGER) FAIL_MACRO(RTE_MAX); if (RTE_MIN(SMALLER, BIGGER) != SMALLER) FAIL_MACRO(RTE_MIN); if (strncmp(RTE_STR(test), "test", sizeof("test"))) FAIL_MACRO(RTE_STR); return 0; }
static struct rte_memseg * virt2memseg(const void *addr, const struct rte_memseg_list *msl) { const struct rte_fbarray *arr; void *start, *end; int ms_idx; if (msl == NULL) return NULL; /* a memseg list was specified, check if it's the right one */ start = msl->base_va; end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len); if (addr < start || addr >= end) return NULL; /* now, calculate index */ arr = &msl->memseg_arr; ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz; return rte_fbarray_get(arr, ms_idx); }
/* fills hugepage cache entry for a given start virt_addr */ static int get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e) { uint64_t start_addr, end_addr; char *start,*path_end; char buf[PATH_MAX*2]; FILE *f; start = NULL; path_end = NULL; start_addr = 0; memset(e->filepath, 0, sizeof(e->filepath)); /* open /proc/self/maps */ f = fopen("/proc/self/maps", "r"); if (f == NULL) { RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n"); return -1; } /* parse maps */ while (fgets(buf, sizeof(buf), f) != NULL) { /* get endptr to end of start addr */ start = buf; GET_PAGEMAP_ADDR(start,start_addr,'-', "Cannot find start address in maps!\n"); /* if start address is bigger than our address, skip */ if (start_addr > virt_addr) continue; GET_PAGEMAP_ADDR(start,end_addr,' ', "Cannot find end address in maps!\n"); /* if end address is less than our address, skip */ if (end_addr <= virt_addr) continue; /* find where the path starts */ start = strstr(start, "/"); if (start == NULL) continue; /* at this point, we know that this is our map. * now let's find the file */ path_end = strstr(start, "\n"); break; } if (path_end == NULL) { RTE_LOG(ERR, EAL, "Hugefile path not found!\n"); goto error; } /* calculate offset and copy the file path */ snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start); e->offset = virt_addr - start_addr; fclose(f); return 0; error: fclose(f); return -1; }
static int test_memzone_reserve_max_aligned(void) { const struct rte_memzone *mz; const struct rte_config *config; const struct rte_memseg *ms; int memseg_idx = 0; int memzone_idx = 0; uintptr_t addr_offset; size_t len = 0; void* last_addr; size_t maxlen = 0; /* random alignment */ rte_srand((unsigned)rte_rdtsc()); const unsigned align = 1 << ((rte_rand() % 8) + 5); /* from 128 up to 4k alignment */ /* get pointer to global configuration */ config = rte_eal_get_configuration(); ms = rte_eal_get_physmem_layout(); addr_offset = 0; for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){ /* ignore smaller memsegs as they can only get smaller */ if (ms[memseg_idx].len < maxlen) continue; /* align everything */ last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE); len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr); len &= ~((size_t) RTE_CACHE_LINE_MASK); /* cycle through all memzones */ for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) { /* stop when reaching last allocated memzone */ if (config->mem_config->memzone[memzone_idx].addr == NULL) break; /* check if the memzone is in our memseg and subtract length */ if ((config->mem_config->memzone[memzone_idx].addr >= ms[memseg_idx].addr) && (config->mem_config->memzone[memzone_idx].addr < (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) { /* since the zones can now be aligned and occasionally skip * some space, we should calculate the length based on * reported length and start addresses difference. */ len -= (uintptr_t) RTE_PTR_SUB( config->mem_config->memzone[memzone_idx].addr, (uintptr_t) last_addr); len -= config->mem_config->memzone[memzone_idx].len; last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr, (size_t) config->mem_config->memzone[memzone_idx].len); } } /* make sure we get the alignment offset */ if (len > maxlen) { addr_offset = RTE_PTR_ALIGN_CEIL((uintptr_t) last_addr, align) - (uintptr_t) last_addr; maxlen = len; } } if (maxlen == 0 || maxlen == addr_offset) { printf("There is no space left for biggest %u-aligned memzone!\n", align); return 0; } maxlen -= addr_offset; mz = rte_memzone_reserve_aligned("max_zone_aligned", 0, SOCKET_ID_ANY, 0, align); if (mz == NULL){ printf("Failed to reserve a big chunk of memory\n"); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } if (mz->len != maxlen) { printf("Memzone reserve with 0 size and alignment %u did not return" " bigest block\n", align); printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } return 0; }
static int test_memzone_reserve_max(void) { const struct rte_memzone *mz; const struct rte_config *config; const struct rte_memseg *ms; int memseg_idx = 0; int memzone_idx = 0; size_t len = 0; void* last_addr; size_t maxlen = 0; /* get pointer to global configuration */ config = rte_eal_get_configuration(); ms = rte_eal_get_physmem_layout(); for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){ /* ignore smaller memsegs as they can only get smaller */ if (ms[memseg_idx].len < maxlen) continue; /* align everything */ last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE); len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr); len &= ~((size_t) RTE_CACHE_LINE_MASK); /* cycle through all memzones */ for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) { /* stop when reaching last allocated memzone */ if (config->mem_config->memzone[memzone_idx].addr == NULL) break; /* check if the memzone is in our memseg and subtract length */ if ((config->mem_config->memzone[memzone_idx].addr >= ms[memseg_idx].addr) && (config->mem_config->memzone[memzone_idx].addr < (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) { /* since the zones can now be aligned and occasionally skip * some space, we should calculate the length based on * reported length and start addresses difference. Addresses * are allocated sequentially so we don't need to worry about * them being in the right order. */ len -= RTE_PTR_DIFF( config->mem_config->memzone[memzone_idx].addr, last_addr); len -= config->mem_config->memzone[memzone_idx].len; last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr, (size_t) config->mem_config->memzone[memzone_idx].len); } } /* we don't need to calculate offset here since length * is always cache-aligned */ if (len > maxlen) maxlen = len; } if (maxlen == 0) { printf("There is no space left!\n"); return 0; } mz = rte_memzone_reserve("max_zone", 0, SOCKET_ID_ANY, 0); if (mz == NULL){ printf("Failed to reserve a big chunk of memory\n"); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } if (mz->len != maxlen) { printf("Memzone reserve with 0 size did not return bigest block\n"); printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } return 0; }
void * eal_get_virtual_area(void *requested_addr, size_t *size, size_t page_sz, int flags, int mmap_flags) { bool addr_is_hint, allow_shrink, unmap, no_align; uint64_t map_sz; void *mapped_addr, *aligned_addr; if (system_page_sz == 0) system_page_sz = sysconf(_SC_PAGESIZE); mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0; allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0; unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0; if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) next_baseaddr = (void *) internal_config.base_virtaddr; if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz); addr_is_hint = true; } /* we don't need alignment of resulting pointer in the following cases: * * 1. page size is equal to system size * 2. we have a requested address, and it is page-aligned, and we will * be discarding the address if we get a different one. * * for all other cases, alignment is potentially necessary. */ no_align = (requested_addr != NULL && requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) && !addr_is_hint) || page_sz == system_page_sz; do { map_sz = no_align ? *size : *size + page_sz; if (map_sz > SIZE_MAX) { RTE_LOG(ERR, EAL, "Map size too big\n"); rte_errno = E2BIG; return NULL; } mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ, mmap_flags, -1, 0); if (mapped_addr == MAP_FAILED && allow_shrink) *size -= page_sz; } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0); /* align resulting address - if map failed, we will ignore the value * anyway, so no need to add additional checks. */ aligned_addr = no_align ? mapped_addr : RTE_PTR_ALIGN(mapped_addr, page_sz); if (*size == 0) { RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n", strerror(errno)); rte_errno = errno; return NULL; } else if (mapped_addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", strerror(errno)); /* pass errno up the call chain */ rte_errno = errno; return NULL; } else if (requested_addr != NULL && !addr_is_hint && aligned_addr != requested_addr) { RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n", requested_addr, aligned_addr); munmap(mapped_addr, map_sz); rte_errno = EADDRNOTAVAIL; return NULL; } else if (requested_addr != NULL && addr_is_hint && aligned_addr != requested_addr) { RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n", requested_addr, aligned_addr); RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n"); } else if (next_baseaddr != NULL) { next_baseaddr = RTE_PTR_ADD(aligned_addr, *size); } RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", aligned_addr, *size); if (unmap) { munmap(mapped_addr, map_sz); } else if (!no_align) { void *map_end, *aligned_end; size_t before_len, after_len; /* when we reserve space with alignment, we add alignment to * mapping size. On 32-bit, if 1GB alignment was requested, this * would waste 1GB of address space, which is a luxury we cannot * afford. so, if alignment was performed, check if any unneeded * address space can be unmapped back. */ map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz); aligned_end = RTE_PTR_ADD(aligned_addr, *size); /* unmap space before aligned mmap address */ before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr); if (before_len > 0) munmap(mapped_addr, before_len); /* unmap space after aligned end mmap address */ after_len = RTE_PTR_DIFF(map_end, aligned_end); if (after_len > 0) munmap(aligned_end, after_len); } return aligned_addr; }
/* * Get physical address of any mapped virtual address in the current process. */ phys_addr_t rte_mem_virt2phy(const void *virtaddr) { int fd, retval; uint64_t page, physaddr; unsigned long virt_pfn; int page_size; off_t offset; /* when using dom0, /proc/self/pagemap always returns 0, check in * dpdk memory by browsing the memsegs */ if (rte_xen_dom0_supported()) { struct rte_mem_config *mcfg; struct rte_memseg *memseg; unsigned i; mcfg = rte_eal_get_configuration()->mem_config; for (i = 0; i < RTE_MAX_MEMSEG; i++) { memseg = &mcfg->memseg[i]; if (memseg->addr == NULL) break; if (virtaddr > memseg->addr && virtaddr < RTE_PTR_ADD(memseg->addr, memseg->len)) { return memseg->phys_addr + RTE_PTR_DIFF(virtaddr, memseg->addr); } } return RTE_BAD_PHYS_ADDR; } /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ if (!proc_pagemap_readable) return RTE_BAD_PHYS_ADDR; /* standard page size */ page_size = getpagesize(); fd = open("/proc/self/pagemap", O_RDONLY); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", __func__, strerror(errno)); return RTE_BAD_PHYS_ADDR; } virt_pfn = (unsigned long)virtaddr / page_size; offset = sizeof(uint64_t) * virt_pfn; if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); return RTE_BAD_PHYS_ADDR; } retval = read(fd, &page, PFN_MASK_SIZE); close(fd); if (retval < 0) { RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", __func__, strerror(errno)); return RTE_BAD_PHYS_ADDR; } else if (retval != PFN_MASK_SIZE) { RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap " "but expected %d:\n", __func__, retval, PFN_MASK_SIZE); return RTE_BAD_PHYS_ADDR; } /* * the pfn (page frame number) are bits 0-54 (see * pagemap.txt in linux Documentation) */ physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); return physaddr; }