/* * joing two struct malloc_elem together. elem1 and elem2 must * be contiguous in memory. */ static inline void join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) { struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size); elem1->size += elem2->size; next->prev = elem1; }
int inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx) { struct ipsec_mbuf_metadata *priv; priv = RTE_PTR_ADD(m, sizeof(struct rte_mbuf)); return (sa_ctx->sa[sa_idx].spi == priv->sa->spi); }
/* * split an existing element into two smaller elements at the given * split_pt parameter. */ static void split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) { struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size); const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; const size_t new_elem_size = elem->size - old_elem_size; malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size); split_pt->prev = elem; next_elem->prev = split_pt; elem->size = old_elem_size; set_trailer(elem); }
/* * attempt to resize a malloc_elem by expanding into any free space * immediately after it in memory. */ int malloc_elem_resize(struct malloc_elem *elem, size_t size) { const size_t new_size = size + MALLOC_ELEM_OVERHEAD; /* if we request a smaller size, then always return ok */ const size_t current_size = elem->size - elem->pad; if (current_size >= new_size) return 0; struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); rte_spinlock_lock(&elem->heap->lock); if (next ->state != ELEM_FREE) goto err_return; if (current_size + next->size < new_size) goto err_return; /* we now know the element fits, so remove from free list, * join the two */ elem_free_list_remove(next); join_elem(elem, next); if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){ /* now we have a big block together. Lets cut it down a bit, by splitting */ struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE); split_elem(elem, split_pt); malloc_elem_free_list_insert(split_pt); } rte_spinlock_unlock(&elem->heap->lock); return 0; err_return: rte_spinlock_unlock(&elem->heap->lock); return -1; }
/* * reserve a block of data in an existing malloc_elem. If the malloc_elem * is much larger than the data block requested, we split the element in two. * This function is only called from malloc_heap_alloc so parameter checking * is not done here, as it's done there previously. */ struct malloc_elem * malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, size_t bound) { struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; const size_t trailer_size = elem->size - old_elem_size - size - MALLOC_ELEM_OVERHEAD; elem_free_list_remove(elem); if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { /* split it, too much free space after elem */ struct malloc_elem *new_free_elem = RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); split_elem(elem, new_free_elem); malloc_elem_free_list_insert(new_free_elem); } if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { /* don't split it, pad the element instead */ elem->state = ELEM_BUSY; elem->pad = old_elem_size; /* put a dummy header in padding, to point to real element header */ if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything * is cache-line aligned */ new_elem->pad = elem->pad; new_elem->state = ELEM_PAD; new_elem->size = elem->size - elem->pad; set_header(new_elem); } return new_elem; } /* we are going to split the element in two. The original element * remains free, and the new element is the one allocated. * Re-insert original element, in case its new size makes it * belong on a different list. */ split_elem(elem, new_elem); new_elem->state = ELEM_BUSY; malloc_elem_free_list_insert(elem); return new_elem; }
/* * Expand the heap with a memseg. * This reserves the zone and sets a dummy malloc_elem header at the end * to prevent overflow. The rest of the zone is added to free list as a single * large free block */ static void malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) { /* allocate the memory block headers, one at end, one at start */ struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr; struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr, ms->len - MALLOC_ELEM_OVERHEAD); end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; malloc_elem_init(start_elem, heap, ms, elem_size); malloc_elem_mkend(end_elem, start_elem); malloc_elem_free_list_insert(start_elem); heap->total_size += elem_size; }
/* * reserve an extra memory zone and make it available for use by a particular * heap. This reserves the zone and sets a dummy malloc_elem header at the end * to prevent overflow. The rest of the zone is added to free list as a single * large free block */ static int malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align) { const unsigned mz_flags = 0; const size_t block_size = get_malloc_memzone_size(); /* ensure the data we want to allocate will fit in the memzone */ const size_t min_size = size + align + MALLOC_ELEM_OVERHEAD * 2; const struct rte_memzone *mz = NULL; struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; unsigned numa_socket = heap - mcfg->malloc_heaps; size_t mz_size = min_size; if (mz_size < block_size) mz_size = block_size; char mz_name[RTE_MEMZONE_NAMESIZE]; snprintf(mz_name, sizeof(mz_name), "MALLOC_S%u_HEAP_%u", numa_socket, heap->mz_count++); /* try getting a block. if we fail and we don't need as big a block * as given in the config, we can shrink our request and try again */ do { mz = rte_memzone_reserve(mz_name, mz_size, numa_socket, mz_flags); if (mz == NULL) mz_size /= 2; } while (mz == NULL && mz_size > min_size); if (mz == NULL) return -1; /* allocate the memory block headers, one at end, one at start */ struct malloc_elem *start_elem = (struct malloc_elem *)mz->addr; struct malloc_elem *end_elem = RTE_PTR_ADD(mz->addr, mz_size - MALLOC_ELEM_OVERHEAD); end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; malloc_elem_init(start_elem, heap, mz, elem_size); malloc_elem_mkend(end_elem, start_elem); malloc_elem_free_list_insert(start_elem); /* increase heap total size by size of new memzone */ heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD; return 0; }
static struct rte_memseg * virt2memseg(const void *addr, const struct rte_memseg_list *msl) { const struct rte_fbarray *arr; void *start, *end; int ms_idx; if (msl == NULL) return NULL; /* a memseg list was specified, check if it's the right one */ start = msl->base_va; end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len); if (addr < start || addr >= end) return NULL; /* now, calculate index */ arr = &msl->memseg_arr; ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz; return rte_fbarray_get(arr, ms_idx); }
static struct rte_memseg_list * virt2memseg_list(const void *addr) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; struct rte_memseg_list *msl; int msl_idx; for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { void *start, *end; msl = &mcfg->memsegs[msl_idx]; start = msl->base_va; end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len); if (addr >= start && addr < end) break; } /* if we didn't find our memseg list */ if (msl_idx == RTE_MAX_MEMSEG_LISTS) return NULL; return msl; }
/* * unmaps hugepages that are not going to be used. since we originally allocate * ALL hugepages (not just those we need), additional unmapping needs to be done. */ static int unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, unsigned num_hp_info) { unsigned socket, size; int page, nrpages = 0; /* get total number of hugepages */ for (size = 0; size < num_hp_info; size++) for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) nrpages += internal_config.hugepage_info[size].num_pages[socket]; for (size = 0; size < num_hp_info; size++) { for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { unsigned pages_found = 0; /* traverse until we have unmapped all the unused pages */ for (page = 0; page < nrpages; page++) { struct hugepage_file *hp = &hugepg_tbl[page]; #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS /* if this page was already cleared */ if (hp->final_va == NULL) continue; #endif /* find a page that matches the criteria */ if ((hp->size == hpi[size].hugepage_sz) && (hp->socket_id == (int) socket)) { /* if we skipped enough pages, unmap the rest */ if (pages_found == hpi[size].num_pages[socket]) { uint64_t unmap_len; #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS unmap_len = hp->size * hp->repeated; #else unmap_len = hp->size; #endif /* get start addr and len of the remaining segment */ munmap(hp->final_va, (size_t) unmap_len); hp->final_va = NULL; if (unlink(hp->filepath) == -1) { RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n", __func__, hp->filepath, strerror(errno)); return -1; } } #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS /* else, check how much do we need to map */ else { int nr_pg_left = hpi[size].num_pages[socket] - pages_found; /* if we need enough memory to fit into the segment */ if (hp->repeated <= nr_pg_left) { pages_found += hp->repeated; } /* truncate the segment */ else { uint64_t final_size = nr_pg_left * hp->size; uint64_t seg_size = hp->repeated * hp->size; void * unmap_va = RTE_PTR_ADD(hp->final_va, final_size); int fd; munmap(unmap_va, seg_size - final_size); fd = open(hp->filepath, O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", hp->filepath, strerror(errno)); return -1; } if (ftruncate(fd, final_size) < 0) { RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", hp->filepath, strerror(errno)); return -1; } close(fd); pages_found += nr_pg_left; hp->repeated = nr_pg_left; } } #else /* else, lock the page and skip */ else pages_found++; #endif } /* match page */ } /* foreach page */ } /* foreach socket */ } /* foreach pagesize */
/* * Remaps all hugepages into single file segments */ static int remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { int fd; unsigned i = 0, j, num_pages, page_idx = 0; void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; size_t vma_len = 0; size_t hugepage_sz = hpi->hugepage_sz; size_t total_size, offset; char filepath[MAX_HUGEPAGE_PATH]; phys_addr_t physaddr; int socket; while (i < hpi->num_pages[0]) { #ifndef RTE_ARCH_64 /* for 32-bit systems, don't remap 1G pages and 16G pages, * just reuse original map address as final map address. */ if ((hugepage_sz == RTE_PGSIZE_1G) || (hugepage_sz == RTE_PGSIZE_16G)) { hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; hugepg_tbl[i].orig_va = NULL; i++; continue; } #endif /* reserve a virtual area for next contiguous * physical block: count the number of * contiguous physical pages. */ for (j = i+1; j < hpi->num_pages[0] ; j++) { #ifdef RTE_ARCH_PPC_64 /* The physical addresses are sorted in descending * order on PPC64 */ if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr - hugepage_sz) break; #else if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) break; #endif } num_pages = j - i; vma_len = num_pages * hugepage_sz; socket = hugepg_tbl[i].socket_id; /* get the biggest virtual memory area up to * vma_len. If it fails, vma_addr is NULL, so * let the kernel provide the address. */ vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); /* If we can't find a big enough virtual area, work out how many pages * we are going to get */ if (vma_addr == NULL) j = i + 1; else if (vma_len != num_pages * hugepage_sz) { num_pages = vma_len / hugepage_sz; j = i + num_pages; } hugepg_tbl[page_idx].file_id = page_idx; eal_get_hugefile_path(filepath, sizeof(filepath), hpi->hugedir, hugepg_tbl[page_idx].file_id); /* try to create hugepage file */ fd = open(filepath, O_CREAT | O_RDWR, 0755); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); return -1; } total_size = 0; for (;i < j; i++) { /* unmap current segment */ if (total_size > 0) munmap(vma_addr, total_size); /* unmap original page */ munmap(hugepg_tbl[i].orig_va, hugepage_sz); unlink(hugepg_tbl[i].filepath); total_size += hugepage_sz; old_addr = vma_addr; /* map new, bigger segment */ vma_addr = mmap(vma_addr, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (vma_addr == MAP_FAILED || vma_addr != old_addr) { RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); return -1; } /* touch the page. this is needed because kernel postpones mapping * creation until the first page fault. with this, we pin down * the page and it is marked as used and gets into process' pagemap. */ for (offset = 0; offset < total_size; offset += hugepage_sz) *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset)); } /* set shared flock on the file. */ if (flock(fd, LOCK_SH | LOCK_NB) == -1) { RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", __func__, strerror(errno)); close(fd); return -1; } snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", filepath); physaddr = rte_mem_virt2phy(vma_addr); if (physaddr == RTE_BAD_PHYS_ADDR) return -1; hugepg_tbl[page_idx].final_va = vma_addr; hugepg_tbl[page_idx].physaddr = physaddr; hugepg_tbl[page_idx].repeated = num_pages; hugepg_tbl[page_idx].socket_id = socket; close(fd); /* verify the memory segment - that is, check that every VA corresponds * to the physical address we expect to see */ for (offset = 0; offset < vma_len; offset += hugepage_sz) { uint64_t expected_physaddr; expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; page_addr = RTE_PTR_ADD(vma_addr, offset); physaddr = rte_mem_virt2phy(page_addr); if (physaddr != expected_physaddr) { RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 " (expected 0x%" PRIx64 ")\n", page_addr, offset, physaddr, expected_physaddr); return -1; } } /* zero out the whole segment */ memset(hugepg_tbl[page_idx].final_va, 0, total_size); page_idx++; } /* zero out the rest */ memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); return page_idx; }
/* Returns a pointer to the first signature in specified bucket. */ static inline hash_sig_t * get_sig_tbl_bucket(const struct rte_hash *h, uint32_t bucket_index) { return RTE_PTR_ADD(h->sig_tbl, (bucket_index * h->sig_tbl_bucket_size)); }
/* Returns a pointer to a key at a specific position in a specified bucket. */ static inline void * get_key_from_bucket(const struct rte_hash *h, uint8_t *bkt, uint32_t pos) { return RTE_PTR_ADD(bkt, pos * h->key_tbl_key_size); }
/* Returns a pointer to the first key in specified bucket. */ static inline uint8_t * get_key_tbl_bucket(const struct rte_hash *h, uint32_t bucket_index) { return RTE_PTR_ADD(h->key_tbl, (bucket_index * h->bucket_entries * h->key_tbl_key_size)); }
/* * Get physical address of any mapped virtual address in the current process. */ phys_addr_t rte_mem_virt2phy(const void *virtaddr) { int fd, retval; uint64_t page, physaddr; unsigned long virt_pfn; int page_size; off_t offset; /* when using dom0, /proc/self/pagemap always returns 0, check in * dpdk memory by browsing the memsegs */ if (rte_xen_dom0_supported()) { struct rte_mem_config *mcfg; struct rte_memseg *memseg; unsigned i; mcfg = rte_eal_get_configuration()->mem_config; for (i = 0; i < RTE_MAX_MEMSEG; i++) { memseg = &mcfg->memseg[i]; if (memseg->addr == NULL) break; if (virtaddr > memseg->addr && virtaddr < RTE_PTR_ADD(memseg->addr, memseg->len)) { return memseg->phys_addr + RTE_PTR_DIFF(virtaddr, memseg->addr); } } return RTE_BAD_PHYS_ADDR; } /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ if (!proc_pagemap_readable) return RTE_BAD_PHYS_ADDR; /* standard page size */ page_size = getpagesize(); fd = open("/proc/self/pagemap", O_RDONLY); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", __func__, strerror(errno)); return RTE_BAD_PHYS_ADDR; } virt_pfn = (unsigned long)virtaddr / page_size; offset = sizeof(uint64_t) * virt_pfn; if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); return RTE_BAD_PHYS_ADDR; } retval = read(fd, &page, PFN_MASK_SIZE); close(fd); if (retval < 0) { RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", __func__, strerror(errno)); return RTE_BAD_PHYS_ADDR; } else if (retval != PFN_MASK_SIZE) { RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap " "but expected %d:\n", __func__, retval, PFN_MASK_SIZE); return RTE_BAD_PHYS_ADDR; } /* * the pfn (page frame number) are bits 0-54 (see * pagemap.txt in linux Documentation) */ physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); return physaddr; }
static struct _mempool_gntalloc_info _create_mempool(const char *name, unsigned elt_num, unsigned elt_size, unsigned cache_size, unsigned private_data_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags) { struct _mempool_gntalloc_info mgi; struct rte_mempool *mp = NULL; struct rte_mempool_objsz objsz; uint32_t pg_num, rpg_num, pg_shift, pg_sz; char *va, *orig_va, *uv; /* uv: from which, the pages could be freed */ ssize_t sz, usz; /* usz: unused size */ /* * for each page allocated through xen_gntalloc driver, * gref_arr:stores grant references, * pa_arr: stores physical address, * gnt_arr: stores all meta dat */ uint32_t *gref_arr = NULL; phys_addr_t *pa_arr = NULL; struct _gntarr *gnt_arr = NULL; /* start index of the grant referances, used for dealloc*/ uint64_t start_index; uint32_t i, j; int rv = 0; struct ioctl_gntalloc_dealloc_gref arg; mgi.mp = NULL; va = orig_va = uv = NULL; pg_num = rpg_num = 0; sz = 0; pg_sz = getpagesize(); if (rte_is_power_of_2(pg_sz) == 0) { goto out; } pg_shift = rte_bsf32(pg_sz); rte_mempool_calc_obj_size(elt_size, flags, &objsz); sz = rte_mempool_xmem_size(elt_num, objsz.total_size, pg_shift); pg_num = sz >> pg_shift; pa_arr = calloc(pg_num, sizeof(pa_arr[0])); gref_arr = calloc(pg_num, sizeof(gref_arr[0])); gnt_arr = calloc(pg_num, sizeof(gnt_arr[0])); if ((gnt_arr == NULL) || (gref_arr == NULL) || (pa_arr == NULL)) goto out; /* grant index is continuous in ascending order */ orig_va = gntalloc(sz, gref_arr, &start_index); if (orig_va == NULL) goto out; get_phys_map(orig_va, pa_arr, pg_num, pg_sz); for (i = 0; i < pg_num; i++) { gnt_arr[i].index = start_index + i * pg_sz; gnt_arr[i].gref = gref_arr[i]; gnt_arr[i].pa = pa_arr[i]; gnt_arr[i].va = RTE_PTR_ADD(orig_va, i * pg_sz); } qsort(gnt_arr, pg_num, sizeof(struct _gntarr), compare); va = get_xen_virtual(sz, pg_sz); if (va == NULL) { goto out; } /* * map one by one, as index isn't continuous now. * pg_num VMAs, doesn't linux has a limitation on this? */ for (i = 0; i < pg_num; i++) { /* update gref_arr and pa_arr after sort */ gref_arr[i] = gnt_arr[i].gref; pa_arr[i] = gnt_arr[i].pa; gnt_arr[i].va = mmap(va + i * pg_sz, pg_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, gntalloc_fd, gnt_arr[i].index); if ((gnt_arr[i].va == MAP_FAILED) || (gnt_arr[i].va != (va + i * pg_sz))) { RTE_LOG(ERR, PMD, "failed to map %d pages\n", i); goto mmap_failed; } } /* * Check that allocated size is big enough to hold elt_num * objects and a calcualte how many bytes are actually required. */ usz = rte_mempool_xmem_usage(va, elt_num, objsz.total_size, pa_arr, pg_num, pg_shift); if (usz < 0) { mp = NULL; i = pg_num; goto mmap_failed; } else { /* unmap unused pages if any */ uv = RTE_PTR_ADD(va, usz); if ((usz = va + sz - uv) > 0) { RTE_LOG(ERR, PMD, "%s(%s): unmap unused %zu of %zu " "mmaped bytes @%p orig:%p\n", __func__, name, usz, sz, uv, va); munmap(uv, usz); i = (sz - usz) / pg_sz; for (; i < pg_num; i++) { arg.count = 1; arg.index = gnt_arr[i].index; rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg); if (rv) { /* shouldn't fail here */ RTE_LOG(ERR, PMD, "va=%p pa=%"PRIu64"x index=%"PRIu64" %s\n", gnt_arr[i].va, gnt_arr[i].pa, arg.index, strerror(errno)); rte_panic("gntdealloc failed when freeing pages\n"); } } rpg_num = (sz - usz) >> pg_shift; } else
/* map the PCI resource of a PCI device in virtual memory */ int pci_uio_map_resource(struct rte_pci_device *dev) { int i, map_idx; char dirname[PATH_MAX]; char cfgname[PATH_MAX]; char devname[PATH_MAX]; /* contains the /dev/uioX */ void *mapaddr; int uio_num; uint64_t phaddr; struct rte_pci_addr *loc = &dev->addr; struct mapped_pci_resource *uio_res; struct mapped_pci_res_list *uio_res_list = RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); struct pci_map *maps; dev->intr_handle.fd = -1; dev->intr_handle.uio_cfg_fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; /* secondary processes - use already recorded details */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return pci_uio_map_secondary(dev); /* find uio resource */ uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname)); if (uio_num < 0) { RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); return 1; } snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); /* save fd if in primary process */ dev->intr_handle.fd = open(devname, O_RDWR); if (dev->intr_handle.fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno)); return -1; } dev->intr_handle.type = RTE_INTR_HANDLE_UIO; snprintf(cfgname, sizeof(cfgname), "/sys/class/uio/uio%u/device/config", uio_num); dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); if (dev->intr_handle.uio_cfg_fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", cfgname, strerror(errno)); return -1; } /* set bus master that is not done by uio_pci_generic */ if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); return -1; } /* allocate the mapping details for secondary processes*/ uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0); if (uio_res == NULL) { RTE_LOG(ERR, EAL, "%s(): cannot store uio mmap details\n", __func__); return -1; } snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname); memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr)); /* Map all BARs */ maps = uio_res->maps; for (i = 0, map_idx = 0; i != PCI_MAX_RESOURCE; i++) { int fd; int fail = 0; /* skip empty BAR */ phaddr = dev->mem_resource[i].phys_addr; if (phaddr == 0) continue; /* update devname for mmap */ snprintf(devname, sizeof(devname), SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", loc->domain, loc->bus, loc->devid, loc->function, i); /* * open resource file, to mmap it */ fd = open(devname, O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno)); return -1; } /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); mapaddr = pci_map_resource(pci_map_addr, fd, 0, (size_t)dev->mem_resource[i].len, 0); if (mapaddr == MAP_FAILED) fail = 1; pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t)dev->mem_resource[i].len); maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); if (maps[map_idx].path == NULL) fail = 1; if (fail) { rte_free(uio_res); close(fd); return -1; } close(fd); maps[map_idx].phaddr = dev->mem_resource[i].phys_addr; maps[map_idx].size = dev->mem_resource[i].len; maps[map_idx].addr = mapaddr; maps[map_idx].offset = 0; strcpy(maps[map_idx].path, devname); map_idx++; dev->mem_resource[i].addr = mapaddr; } uio_res->nb_maps = map_idx; TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); return 0; }
static int test_memzone_reserve_max_aligned(void) { const struct rte_memzone *mz; const struct rte_config *config; const struct rte_memseg *ms; int memseg_idx = 0; int memzone_idx = 0; uintptr_t addr_offset; size_t len = 0; void* last_addr; size_t maxlen = 0; /* random alignment */ rte_srand((unsigned)rte_rdtsc()); const unsigned align = 1 << ((rte_rand() % 8) + 5); /* from 128 up to 4k alignment */ /* get pointer to global configuration */ config = rte_eal_get_configuration(); ms = rte_eal_get_physmem_layout(); addr_offset = 0; for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){ /* ignore smaller memsegs as they can only get smaller */ if (ms[memseg_idx].len < maxlen) continue; /* align everything */ last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE); len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr); len &= ~((size_t) RTE_CACHE_LINE_MASK); /* cycle through all memzones */ for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) { /* stop when reaching last allocated memzone */ if (config->mem_config->memzone[memzone_idx].addr == NULL) break; /* check if the memzone is in our memseg and subtract length */ if ((config->mem_config->memzone[memzone_idx].addr >= ms[memseg_idx].addr) && (config->mem_config->memzone[memzone_idx].addr < (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) { /* since the zones can now be aligned and occasionally skip * some space, we should calculate the length based on * reported length and start addresses difference. */ len -= (uintptr_t) RTE_PTR_SUB( config->mem_config->memzone[memzone_idx].addr, (uintptr_t) last_addr); len -= config->mem_config->memzone[memzone_idx].len; last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr, (size_t) config->mem_config->memzone[memzone_idx].len); } } /* make sure we get the alignment offset */ if (len > maxlen) { addr_offset = RTE_PTR_ALIGN_CEIL((uintptr_t) last_addr, align) - (uintptr_t) last_addr; maxlen = len; } } if (maxlen == 0 || maxlen == addr_offset) { printf("There is no space left for biggest %u-aligned memzone!\n", align); return 0; } maxlen -= addr_offset; mz = rte_memzone_reserve_aligned("max_zone_aligned", 0, SOCKET_ID_ANY, 0, align); if (mz == NULL){ printf("Failed to reserve a big chunk of memory\n"); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } if (mz->len != maxlen) { printf("Memzone reserve with 0 size and alignment %u did not return" " bigest block\n", align); printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } return 0; }
static int test_memzone_reserve_max(void) { const struct rte_memzone *mz; const struct rte_config *config; const struct rte_memseg *ms; int memseg_idx = 0; int memzone_idx = 0; size_t len = 0; void* last_addr; size_t maxlen = 0; /* get pointer to global configuration */ config = rte_eal_get_configuration(); ms = rte_eal_get_physmem_layout(); for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){ /* ignore smaller memsegs as they can only get smaller */ if (ms[memseg_idx].len < maxlen) continue; /* align everything */ last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE); len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr); len &= ~((size_t) RTE_CACHE_LINE_MASK); /* cycle through all memzones */ for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) { /* stop when reaching last allocated memzone */ if (config->mem_config->memzone[memzone_idx].addr == NULL) break; /* check if the memzone is in our memseg and subtract length */ if ((config->mem_config->memzone[memzone_idx].addr >= ms[memseg_idx].addr) && (config->mem_config->memzone[memzone_idx].addr < (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) { /* since the zones can now be aligned and occasionally skip * some space, we should calculate the length based on * reported length and start addresses difference. Addresses * are allocated sequentially so we don't need to worry about * them being in the right order. */ len -= RTE_PTR_DIFF( config->mem_config->memzone[memzone_idx].addr, last_addr); len -= config->mem_config->memzone[memzone_idx].len; last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr, (size_t) config->mem_config->memzone[memzone_idx].len); } } /* we don't need to calculate offset here since length * is always cache-aligned */ if (len > maxlen) maxlen = len; } if (maxlen == 0) { printf("There is no space left!\n"); return 0; } mz = rte_memzone_reserve("max_zone", 0, SOCKET_ID_ANY, 0); if (mz == NULL){ printf("Failed to reserve a big chunk of memory\n"); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } if (mz->len != maxlen) { printf("Memzone reserve with 0 size did not return bigest block\n"); printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } return 0; }
void * eal_get_virtual_area(void *requested_addr, size_t *size, size_t page_sz, int flags, int mmap_flags) { bool addr_is_hint, allow_shrink, unmap, no_align; uint64_t map_sz; void *mapped_addr, *aligned_addr; if (system_page_sz == 0) system_page_sz = sysconf(_SC_PAGESIZE); mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0; allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0; unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0; if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 && rte_eal_process_type() == RTE_PROC_PRIMARY) next_baseaddr = (void *) internal_config.base_virtaddr; if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz); addr_is_hint = true; } /* we don't need alignment of resulting pointer in the following cases: * * 1. page size is equal to system size * 2. we have a requested address, and it is page-aligned, and we will * be discarding the address if we get a different one. * * for all other cases, alignment is potentially necessary. */ no_align = (requested_addr != NULL && requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) && !addr_is_hint) || page_sz == system_page_sz; do { map_sz = no_align ? *size : *size + page_sz; if (map_sz > SIZE_MAX) { RTE_LOG(ERR, EAL, "Map size too big\n"); rte_errno = E2BIG; return NULL; } mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ, mmap_flags, -1, 0); if (mapped_addr == MAP_FAILED && allow_shrink) *size -= page_sz; } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0); /* align resulting address - if map failed, we will ignore the value * anyway, so no need to add additional checks. */ aligned_addr = no_align ? mapped_addr : RTE_PTR_ALIGN(mapped_addr, page_sz); if (*size == 0) { RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n", strerror(errno)); rte_errno = errno; return NULL; } else if (mapped_addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", strerror(errno)); /* pass errno up the call chain */ rte_errno = errno; return NULL; } else if (requested_addr != NULL && !addr_is_hint && aligned_addr != requested_addr) { RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n", requested_addr, aligned_addr); munmap(mapped_addr, map_sz); rte_errno = EADDRNOTAVAIL; return NULL; } else if (requested_addr != NULL && addr_is_hint && aligned_addr != requested_addr) { RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n", requested_addr, aligned_addr); RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n"); } else if (next_baseaddr != NULL) { next_baseaddr = RTE_PTR_ADD(aligned_addr, *size); } RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", aligned_addr, *size); if (unmap) { munmap(mapped_addr, map_sz); } else if (!no_align) { void *map_end, *aligned_end; size_t before_len, after_len; /* when we reserve space with alignment, we add alignment to * mapping size. On 32-bit, if 1GB alignment was requested, this * would waste 1GB of address space, which is a luxury we cannot * afford. so, if alignment was performed, check if any unneeded * address space can be unmapped back. */ map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz); aligned_end = RTE_PTR_ADD(aligned_addr, *size); /* unmap space before aligned mmap address */ before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr); if (before_len > 0) munmap(mapped_addr, before_len); /* unmap space after aligned end mmap address */ after_len = RTE_PTR_DIFF(map_end, aligned_end); if (after_len > 0) munmap(aligned_end, after_len); } return aligned_addr; }
static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, int socket_id, unsigned flags, unsigned align, unsigned bound) { struct rte_mem_config *mcfg; unsigned i = 0; int memseg_idx = -1; uint64_t addr_offset, seg_offset = 0; size_t requested_len; size_t memseg_len = 0; phys_addr_t memseg_physaddr; void *memseg_addr; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; /* no more room in config */ if (mcfg->memzone_idx >= RTE_MAX_MEMZONE) { RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); rte_errno = ENOSPC; return NULL; } /* zone already exist */ if ((memzone_lookup_thread_unsafe(name)) != NULL) { RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n", __func__, name); rte_errno = EEXIST; return NULL; } /* if alignment is not a power of two */ if (align && !rte_is_power_of_2(align)) { RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__, align); rte_errno = EINVAL; return NULL; } /* alignment less than cache size is not allowed */ if (align < RTE_CACHE_LINE_SIZE) align = RTE_CACHE_LINE_SIZE; /* align length on cache boundary. Check for overflow before doing so */ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { rte_errno = EINVAL; /* requested size too big */ return NULL; } len += RTE_CACHE_LINE_MASK; len &= ~((size_t) RTE_CACHE_LINE_MASK); /* save minimal requested length */ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); /* check that boundary condition is valid */ if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { rte_errno = EINVAL; return NULL; } /* find the smallest segment matching requirements */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { /* last segment */ if (free_memseg[i].addr == NULL) break; /* empty segment, skip it */ if (free_memseg[i].len == 0) continue; /* bad socket ID */ if (socket_id != SOCKET_ID_ANY && free_memseg[i].socket_id != SOCKET_ID_ANY && socket_id != free_memseg[i].socket_id) continue; /* * calculate offset to closest alignment that * meets boundary conditions. */ addr_offset = align_phys_boundary(free_memseg + i, requested_len, align, bound); /* check len */ if ((requested_len + addr_offset) > free_memseg[i].len) continue; /* check flags for hugepage sizes */ if ((flags & RTE_MEMZONE_2MB) && free_memseg[i].hugepage_sz == RTE_PGSIZE_1G) continue; if ((flags & RTE_MEMZONE_1GB) && free_memseg[i].hugepage_sz == RTE_PGSIZE_2M) continue; if ((flags & RTE_MEMZONE_16MB) && free_memseg[i].hugepage_sz == RTE_PGSIZE_16G) continue; if ((flags & RTE_MEMZONE_16GB) && free_memseg[i].hugepage_sz == RTE_PGSIZE_16M) continue; /* this segment is the best until now */ if (memseg_idx == -1) { memseg_idx = i; memseg_len = free_memseg[i].len; seg_offset = addr_offset; } /* find the biggest contiguous zone */ else if (len == 0) { if (free_memseg[i].len > memseg_len) { memseg_idx = i; memseg_len = free_memseg[i].len; seg_offset = addr_offset; } } /* * find the smallest (we already checked that current * zone length is > len */ else if (free_memseg[i].len + align < memseg_len || (free_memseg[i].len <= memseg_len + align && addr_offset < seg_offset)) { memseg_idx = i; memseg_len = free_memseg[i].len; seg_offset = addr_offset; } } /* no segment found */ if (memseg_idx == -1) { /* * If RTE_MEMZONE_SIZE_HINT_ONLY flag is specified, * try allocating again without the size parameter otherwise -fail. */ if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) && ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB) || (flags & RTE_MEMZONE_16MB) || (flags & RTE_MEMZONE_16GB))) return memzone_reserve_aligned_thread_unsafe(name, len, socket_id, 0, align, bound); rte_errno = ENOMEM; return NULL; } /* save aligned physical and virtual addresses */ memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset; memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr, (uintptr_t) seg_offset); /* if we are looking for a biggest memzone */ if (len == 0) { if (bound == 0) requested_len = memseg_len - seg_offset; else requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1, bound) - memseg_physaddr; } /* set length to correct value */ len = (size_t)seg_offset + requested_len; /* update our internal state */ free_memseg[memseg_idx].len -= len; free_memseg[memseg_idx].phys_addr += len; free_memseg[memseg_idx].addr = (char *)free_memseg[memseg_idx].addr + len; /* fill the zone in config */ struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++]; snprintf(mz->name, sizeof(mz->name), "%s", name); mz->phys_addr = memseg_physaddr; mz->addr = memseg_addr; mz->len = requested_len; mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz; mz->socket_id = free_memseg[memseg_idx].socket_id; mz->flags = 0; mz->memseg_id = memseg_idx; return mz; }
int grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size) { uint64_t start_index; int pg_size; uint32_t pg_shift; void *ptr = NULL; uint32_t count, entries_per_pg; uint32_t i, j = 0, k = 0; uint32_t *gref_tmp; int first = 1; char tmp_str[PATH_MAX] = {0}; int rv = -1; pg_size = getpagesize(); if (rte_is_power_of_2(pg_size) == 0) { return -1; } pg_shift = rte_bsf32(pg_size); if (pg_size % sizeof(struct grant_node_item)) { RTE_LOG(ERR, PMD, "pg_size isn't a multiple of grant node item\n"); return -1; } entries_per_pg = pg_size / sizeof(struct grant_node_item); count = (pg_num + entries_per_pg - 1 ) / entries_per_pg; gref_tmp = malloc(count * sizeof(uint32_t)); if (gref_tmp == NULL) return -1; ptr = gntalloc(pg_size * count, gref_tmp, &start_index); if (ptr == NULL) { RTE_LOG(ERR, PMD, "%s: gntalloc error of %d pages\n", __func__, count); free(gref_tmp); return -1; } while (j < pg_num) { if (first) { rv = snprintf(val_str, str_size, "%u", gref_tmp[k]); first = 0; } else { snprintf(tmp_str, PATH_MAX, "%s", val_str); rv = snprintf(val_str, str_size, "%s,%u", tmp_str, gref_tmp[k]); } k++; if (rv == -1) break; for (i = 0; i < entries_per_pg && j < pg_num ; i++) { ((struct grant_node_item *)ptr)->gref = gref_arr[j]; ((struct grant_node_item *)ptr)->pfn = pa_arr[j] >> pg_shift; ptr = RTE_PTR_ADD(ptr, sizeof(struct grant_node_item)); j++; } } if (rv == -1) { gntfree(ptr, pg_size * count, start_index); } else rv = 0; free(gref_tmp); return rv; }
/* * This creates the memory mappings in the secondary process to match that of * the server process. It goes through each memory segment in the DPDK runtime * configuration and finds the hugepages which form that segment, mapping them * in order to form a contiguous block in the virtual memory space */ static int rte_eal_hugepage_attach(void) { const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; const struct hugepage *hp = NULL; unsigned num_hp = 0; unsigned i, s = 0; /* s used to track the segment number */ off_t size; int fd, fd_zero = -1, fd_hugepage = -1; if (aslr_enabled() > 0) { RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " "(ASLR) is enabled in the kernel.\n"); RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory " "into secondary processes\n"); } fd_zero = open("/dev/zero", O_RDONLY); if (fd_zero < 0) { RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); goto error; } fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); if (fd_hugepage < 0) { RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); goto error; } size = getFileSize(fd_hugepage); hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); if (hp == NULL) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); goto error; } num_hp = size / sizeof(struct hugepage); RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp); while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ void *addr, *base_addr; uintptr_t offset = 0; /* fdzero is mmapped to get a contiguous block of virtual addresses * get a block of free memory of the appropriate size - * use mmap to attempt to get an identical address as server. */ base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, PROT_READ, MAP_PRIVATE, fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " "in /dev/zero to requested address [%p]\n", (unsigned long long)mcfg->memseg[s].len, mcfg->memseg[s].addr); if (aslr_enabled() > 0) RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel " "and retry running both primary and secondary processes\n"); goto error; } /* free memory so we can map the hugepages into the space */ munmap(base_addr, mcfg->memseg[s].len); /* find the hugepages for this segment and map them * we don't need to worry about order, as the server sorted the * entries before it did the second mmap of them */ for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){ if (hp[i].memseg_id == (int)s){ fd = open(hp[i].filepath, O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "Could not open %s\n", hp[i].filepath); goto error; } addr = mmap(RTE_PTR_ADD(base_addr, offset), hp[i].size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); close(fd); /* close file both on success and on failure */ if (addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", hp[i].filepath); goto error; } offset+=hp[i].size; } } RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, (unsigned long long)mcfg->memseg[s].len); s++; } close(fd_zero); close(fd_hugepage); return 0; error: if (fd_zero >= 0) close(fd_zero); if (fd_hugepage >= 0) close(fd_hugepage); return -1; }
int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name) { const struct memseg_cache_entry * ms_cache, *entry; struct ivshmem_config * config; char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr; char cfg_file_path[PATH_MAX]; unsigned remaining_len, tmplen, iter; uint64_t shared_mem_size, zero_size, total_size; if (buffer == NULL || name == NULL) return -1; config = get_config_by_name(name); if (config == NULL) { RTE_LOG(ERR, EAL, "Config %s not found!\n", name); return -1; } rte_spinlock_lock(&config->sl); /* prepare metadata file path */ snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT, config->metadata->name); ms_cache = config->memseg_cache; cmdline_ptr = cmdline; remaining_len = sizeof(cmdline); shared_mem_size = 0; iter = 0; while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) { entry = &ms_cache[iter]; /* Offset and sizes within the current pathname */ tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, entry->filepath, entry->offset, entry->len); shared_mem_size += entry->len; cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); remaining_len -= tmplen; if (remaining_len == 0) { RTE_LOG(ERR, EAL, "Command line too long!\n"); rte_spinlock_unlock(&config->sl); return -1; } iter++; } total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED); zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED; /* add /dev/zero to command-line to fill the space */ tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, "/dev/zero", (uint64_t)0x0, zero_size); cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); remaining_len -= tmplen; if (remaining_len == 0) { RTE_LOG(ERR, EAL, "Command line too long!\n"); rte_spinlock_unlock(&config->sl); return -1; } /* add metadata file to the end of command-line */ tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, cfg_file_path, (uint64_t)0x0, METADATA_SIZE_ALIGNED); cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); remaining_len -= tmplen; if (remaining_len == 0) { RTE_LOG(ERR, EAL, "Command line too long!\n"); rte_spinlock_unlock(&config->sl); return -1; } /* if current length of the command line is bigger than the buffer supplied * by the user, or if command-line is bigger than what IVSHMEM accepts */ if ((sizeof(cmdline) - remaining_len) > size) { RTE_LOG(ERR, EAL, "Buffer is too short!\n"); rte_spinlock_unlock(&config->sl); return -1; } /* complete the command-line */ snprintf(buffer, size, IVSHMEM_QEMU_CMD_LINE_HEADER_FMT, total_size >> 20, cmdline); rte_spinlock_unlock(&config->sl); return 0; }
/* * This function maps grant node of vring or mbuf pool to a continuous virtual address space, * and returns mapped address, pfn array, index array * @param gntnode * Pointer to grant node * @param domid * Guest domain id * @param ppfn * Pointer to pfn array, caller should free this array * @param pgs * Pointer to number of pages * @param ppindex * Pointer to index array, used to release grefs when to free this node * @return * Pointer to mapped virtual address, NULL on failure */ static void * map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex) { struct xen_gnt *gnt; uint32_t i, j; size_t total_pages = 0; void *addr; uint32_t *pfn; uint64_t *pindex; uint32_t pfn_num = 0; int pg_sz; if (gntnode == NULL) return NULL; pg_sz = getpagesize(); for (i = 0; i < gntnode->gnt_num; i++) { gnt = gntnode->gnt_info + i; total_pages += cal_pagenum(gnt); } if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) { RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__); return NULL; } pfn = calloc(total_pages, (size_t)sizeof(uint32_t)); pindex = calloc(total_pages, (size_t)sizeof(uint64_t)); if (pfn == NULL || pindex == NULL) { free_xen_virtual(addr, total_pages * pg_sz, pg_sz); free(pfn); free(pindex); return NULL; } RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1)); for (i = 0; i < gntnode->gnt_num; i++) { gnt = gntnode->gnt_info + i; for (j = 0; j < (PAGE_PFNNUM) / 2; j++) { if ((gnt->gref_pfn[j * 2].gref) <= 0) goto _end; /*alternative: batch map, or through libxc*/ if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz), domid, gnt->gref_pfn[j * 2].gref, &pindex[pfn_num]) == NULL) { goto mmap_failed; } pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num; pfn_num++; } } mmap_failed: if (pfn_num) munmap(addr, pfn_num * pg_sz); for (i = 0; i < pfn_num; i++) { xen_unmap_grant_ref(pindex[i]); } free(pindex); free(pfn); return NULL; _end: if (ppindex) *ppindex = pindex; else free(pindex); if (ppfn) *ppfn = pfn; else free(pfn); if (pgs) *pgs = total_pages; return addr; }