void cmd_mem_layout(void) { const struct rte_memseg* memseg = rte_eal_get_physmem_layout(); for (uint32_t i = 0; i < RTE_MAX_MEMSEG; i++) { if (memseg[i].addr == NULL) break; const char *sz_str; switch (memseg[i].hugepage_sz >> 20) { case 2: sz_str = "2MB"; break; case 1024: sz_str = "1GB"; break; default: sz_str = "??"; } plog_info("Segment %u: [%#lx-%#lx] at %p using %zu pages of %s\n", i, memseg[i].phys_addr, memseg[i].phys_addr + memseg[i].len, memseg[i].addr, memseg[i].len/memseg[i].hugepage_sz, sz_str); } }
/* * Init the memzone subsystem */ int rte_eal_memzone_init(void) { struct rte_mem_config *mcfg; const struct rte_memseg *memseg; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; /* secondary processes don't need to initialise anything */ if (rte_eal_process_type() == RTE_PROC_SECONDARY) return 0; memseg = rte_eal_get_physmem_layout(); if (memseg == NULL) { RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__); return -1; } rte_rwlock_write_lock(&mcfg->mlock); /* delete all zones */ mcfg->memzone_cnt = 0; memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); rte_rwlock_write_unlock(&mcfg->mlock); return rte_eal_malloc_heap_init(); }
int vfio_type1_dma_map(int vfio_container_fd) { const struct rte_memseg *ms = rte_eal_get_physmem_layout(); int i, ret; /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { struct vfio_iommu_type1_dma_map dma_map; if (ms[i].addr == NULL) break; memset(&dma_map, 0, sizeof(dma_map)); dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; dma_map.iova = ms[i].phys_addr; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " "error %i (%s)\n", errno, strerror(errno)); return -1; } } return 0; }
/** * Register mempool as a memory region. * * @param pd * Pointer to protection domain. * @param mp * Pointer to memory pool. * * @return * Memory region pointer, NULL in case of error. */ struct ibv_mr * mlx5_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp) { const struct rte_memseg *ms = rte_eal_get_physmem_layout(); uintptr_t start = mp->elt_va_start; uintptr_t end = mp->elt_va_end; unsigned int i; DEBUG("mempool %p area start=%p end=%p size=%zu", (const void *)mp, (void *)start, (void *)end, (size_t)(end - start)); /* Round start and end to page boundary if found in memory segments. */ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { uintptr_t addr = (uintptr_t)ms[i].addr; size_t len = ms[i].len; unsigned int align = ms[i].hugepage_sz; if ((start > addr) && (start < addr + len)) start = RTE_ALIGN_FLOOR(start, align); if ((end > addr) && (end < addr + len)) end = RTE_ALIGN_CEIL(end, align); } DEBUG("mempool %p using start=%p end=%p size=%zu for MR", (const void *)mp, (void *)start, (void *)end, (size_t)(end - start)); return ibv_reg_mr(pd, (void *)start, end - start, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); }
int test_memory(void) { uint64_t s; unsigned i, j; const struct rte_memseg *mem; /* * dump the mapped memory: the python-expect script checks * that at least one line is dumped */ printf("Dump memory layout\n"); rte_dump_physmem_layout(); /* check that memory size is != 0 */ s = rte_eal_get_physmem_size(); if (s == 0) { printf("No memory detected\n"); return -1; } /* try to read memory (should not segfault) */ mem = rte_eal_get_physmem_layout(); for (i = 0; i < RTE_MAX_MEMSEG && mem[i].addr != NULL ; i++) { /* check memory */ for (j = 0; j<mem[i].len; j++) { *((volatile uint8_t *) mem[i].addr + j); } } return 0; }
static const struct rte_memzone * queue_dma_zone_reserve(const char *queue_name, uint32_t queue_size, int socket_id) { const struct rte_memzone *mz; unsigned memzone_flags = 0; const struct rte_memseg *ms; PMD_INIT_FUNC_TRACE(); mz = rte_memzone_lookup(queue_name); if (mz != 0) { if (((size_t)queue_size <= mz->len) && ((socket_id == SOCKET_ID_ANY) || (socket_id == mz->socket_id))) { PMD_DRV_LOG(DEBUG, "re-use memzone already " "allocated for %s", queue_name); return mz; } PMD_DRV_LOG(ERR, "Incompatible memzone already " "allocated %s, size %u, socket %d. " "Requested size %u, socket %u", queue_name, (uint32_t)mz->len, mz->socket_id, queue_size, socket_id); return NULL; } PMD_DRV_LOG(DEBUG, "Allocate memzone for %s, size %u on socket %u", queue_name, queue_size, socket_id); ms = rte_eal_get_physmem_layout(); switch (ms[0].hugepage_sz) { case(RTE_PGSIZE_2M): memzone_flags = RTE_MEMZONE_2MB; break; case(RTE_PGSIZE_1G): memzone_flags = RTE_MEMZONE_1GB; break; case(RTE_PGSIZE_16M): memzone_flags = RTE_MEMZONE_16MB; break; case(RTE_PGSIZE_16G): memzone_flags = RTE_MEMZONE_16GB; break; default: memzone_flags = RTE_MEMZONE_SIZE_HINT_ONLY; } #ifdef RTE_LIBRTE_XEN_DOM0 return rte_memzone_reserve_bounded(queue_name, queue_size, socket_id, 0, RTE_CACHE_LINE_SIZE, RTE_PGSIZE_2M); #else return rte_memzone_reserve_aligned(queue_name, queue_size, socket_id, memzone_flags, queue_size); #endif }
/* * Init the memzone subsystem */ int rte_eal_memzone_init(void) { struct rte_mem_config *mcfg; const struct rte_memseg *memseg; unsigned i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; /* mirror the runtime memsegs from config */ free_memseg = mcfg->free_memseg; /* secondary processes don't need to initialise anything */ if (rte_eal_process_type() == RTE_PROC_SECONDARY) return 0; memseg = rte_eal_get_physmem_layout(); if (memseg == NULL) { RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__); return -1; } rte_rwlock_write_lock(&mcfg->mlock); /* fill in uninitialized free_memsegs */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { if (memseg[i].addr == NULL) break; if (free_memseg[i].addr != NULL) continue; memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg)); } /* make all zones cache-aligned */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { if (free_memseg[i].addr == NULL) break; if (memseg_sanitize(&free_memseg[i]) < 0) { RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__); rte_rwlock_write_unlock(&mcfg->mlock); return -1; } } /* delete all zones */ mcfg->memzone_idx = 0; memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); rte_rwlock_write_unlock(&mcfg->mlock); return 0; }
uint8_t number_of_sockets(void) { int sockets = 0; int i; const struct rte_memseg *ms = rte_eal_get_physmem_layout(); for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) { if (sockets < ms[i].socket_id) sockets = ms[i].socket_id; } /* Number of sockets = maximum socket_id + 1 */ return ++sockets; }
static void eal_check_mem_on_local_socket(void) { const struct rte_memseg *ms; int i, socket_id; socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); ms = rte_eal_get_physmem_layout(); for (i = 0; i < RTE_MAX_MEMSEG; i++) if (ms[i].socket_id == socket_id && ms[i].len > 0) return; RTE_LOG(WARNING, EAL, "WARNING: Master core has no " "memory on local socket!\n"); }
static int test_memzone_reserve_max_aligned(void) { const struct rte_memzone *mz; const struct rte_config *config; const struct rte_memseg *ms; int memseg_idx = 0; int memzone_idx = 0; uintptr_t addr_offset; size_t len = 0; void* last_addr; size_t maxlen = 0; /* random alignment */ rte_srand((unsigned)rte_rdtsc()); const unsigned align = 1 << ((rte_rand() % 8) + 5); /* from 128 up to 4k alignment */ /* get pointer to global configuration */ config = rte_eal_get_configuration(); ms = rte_eal_get_physmem_layout(); addr_offset = 0; for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){ /* ignore smaller memsegs as they can only get smaller */ if (ms[memseg_idx].len < maxlen) continue; /* align everything */ last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE); len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr); len &= ~((size_t) RTE_CACHE_LINE_MASK); /* cycle through all memzones */ for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) { /* stop when reaching last allocated memzone */ if (config->mem_config->memzone[memzone_idx].addr == NULL) break; /* check if the memzone is in our memseg and subtract length */ if ((config->mem_config->memzone[memzone_idx].addr >= ms[memseg_idx].addr) && (config->mem_config->memzone[memzone_idx].addr < (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) { /* since the zones can now be aligned and occasionally skip * some space, we should calculate the length based on * reported length and start addresses difference. */ len -= (uintptr_t) RTE_PTR_SUB( config->mem_config->memzone[memzone_idx].addr, (uintptr_t) last_addr); len -= config->mem_config->memzone[memzone_idx].len; last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr, (size_t) config->mem_config->memzone[memzone_idx].len); } } /* make sure we get the alignment offset */ if (len > maxlen) { addr_offset = RTE_PTR_ALIGN_CEIL((uintptr_t) last_addr, align) - (uintptr_t) last_addr; maxlen = len; } } if (maxlen == 0 || maxlen == addr_offset) { printf("There is no space left for biggest %u-aligned memzone!\n", align); return 0; } maxlen -= addr_offset; mz = rte_memzone_reserve_aligned("max_zone_aligned", 0, SOCKET_ID_ANY, 0, align); if (mz == NULL){ printf("Failed to reserve a big chunk of memory\n"); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } if (mz->len != maxlen) { printf("Memzone reserve with 0 size and alignment %u did not return" " bigest block\n", align); printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } return 0; }
static int test_memzone_reserve_max(void) { const struct rte_memzone *mz; const struct rte_config *config; const struct rte_memseg *ms; int memseg_idx = 0; int memzone_idx = 0; size_t len = 0; void* last_addr; size_t maxlen = 0; /* get pointer to global configuration */ config = rte_eal_get_configuration(); ms = rte_eal_get_physmem_layout(); for (memseg_idx = 0; memseg_idx < RTE_MAX_MEMSEG; memseg_idx++){ /* ignore smaller memsegs as they can only get smaller */ if (ms[memseg_idx].len < maxlen) continue; /* align everything */ last_addr = RTE_PTR_ALIGN_CEIL(ms[memseg_idx].addr, RTE_CACHE_LINE_SIZE); len = ms[memseg_idx].len - RTE_PTR_DIFF(last_addr, ms[memseg_idx].addr); len &= ~((size_t) RTE_CACHE_LINE_MASK); /* cycle through all memzones */ for (memzone_idx = 0; memzone_idx < RTE_MAX_MEMZONE; memzone_idx++) { /* stop when reaching last allocated memzone */ if (config->mem_config->memzone[memzone_idx].addr == NULL) break; /* check if the memzone is in our memseg and subtract length */ if ((config->mem_config->memzone[memzone_idx].addr >= ms[memseg_idx].addr) && (config->mem_config->memzone[memzone_idx].addr < (RTE_PTR_ADD(ms[memseg_idx].addr, ms[memseg_idx].len)))) { /* since the zones can now be aligned and occasionally skip * some space, we should calculate the length based on * reported length and start addresses difference. Addresses * are allocated sequentially so we don't need to worry about * them being in the right order. */ len -= RTE_PTR_DIFF( config->mem_config->memzone[memzone_idx].addr, last_addr); len -= config->mem_config->memzone[memzone_idx].len; last_addr = RTE_PTR_ADD(config->mem_config->memzone[memzone_idx].addr, (size_t) config->mem_config->memzone[memzone_idx].len); } } /* we don't need to calculate offset here since length * is always cache-aligned */ if (len > maxlen) maxlen = len; } if (maxlen == 0) { printf("There is no space left!\n"); return 0; } mz = rte_memzone_reserve("max_zone", 0, SOCKET_ID_ANY, 0); if (mz == NULL){ printf("Failed to reserve a big chunk of memory\n"); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } if (mz->len != maxlen) { printf("Memzone reserve with 0 size did not return bigest block\n"); printf("Expected size = %zu, actual size = %zu\n", maxlen, mz->len); rte_dump_physmem_layout(stdout); rte_memzone_dump(stdout); return -1; } return 0; }
static int test_memzone_reserve_flags(void) { const struct rte_memzone *mz; const struct rte_memseg *ms; int hugepage_2MB_avail = 0; int hugepage_1GB_avail = 0; int hugepage_16MB_avail = 0; int hugepage_16GB_avail = 0; const size_t size = 100; int i = 0; ms = rte_eal_get_physmem_layout(); for (i = 0; i < RTE_MAX_MEMSEG; i++) { if (ms[i].hugepage_sz == RTE_PGSIZE_2M) hugepage_2MB_avail = 1; if (ms[i].hugepage_sz == RTE_PGSIZE_1G) hugepage_1GB_avail = 1; if (ms[i].hugepage_sz == RTE_PGSIZE_16M) hugepage_16MB_avail = 1; if (ms[i].hugepage_sz == RTE_PGSIZE_16G) hugepage_16GB_avail = 1; } /* Display the availability of 2MB ,1GB, 16MB, 16GB pages */ if (hugepage_2MB_avail) printf("2MB Huge pages available\n"); if (hugepage_1GB_avail) printf("1GB Huge pages available\n"); if (hugepage_16MB_avail) printf("16MB Huge pages available\n"); if (hugepage_16GB_avail) printf("16GB Huge pages available\n"); /* * If 2MB pages available, check that a small memzone is correctly * reserved from 2MB huge pages when requested by the RTE_MEMZONE_2MB flag. * Also check that RTE_MEMZONE_SIZE_HINT_ONLY flag only defaults to an * available page size (i.e 1GB ) when 2MB pages are unavailable. */ if (hugepage_2MB_avail) { mz = rte_memzone_reserve("flag_zone_2M", size, SOCKET_ID_ANY, RTE_MEMZONE_2MB); if (mz == NULL) { printf("MEMZONE FLAG 2MB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_2M) { printf("hugepage_sz not equal 2M\n"); return -1; } mz = rte_memzone_reserve("flag_zone_2M_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_2MB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL) { printf("MEMZONE FLAG 2MB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_2M) { printf("hugepage_sz not equal 2M\n"); return -1; } /* Check if 1GB huge pages are unavailable, that function fails unless * HINT flag is indicated */ if (!hugepage_1GB_avail) { mz = rte_memzone_reserve("flag_zone_1G_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL) { printf("MEMZONE FLAG 1GB & HINT\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_2M) { printf("hugepage_sz not equal 2M\n"); return -1; } mz = rte_memzone_reserve("flag_zone_1G", size, SOCKET_ID_ANY, RTE_MEMZONE_1GB); if (mz != NULL) { printf("MEMZONE FLAG 1GB\n"); return -1; } } } /*As with 2MB tests above for 1GB huge page requests*/ if (hugepage_1GB_avail) { mz = rte_memzone_reserve("flag_zone_1G", size, SOCKET_ID_ANY, RTE_MEMZONE_1GB); if (mz == NULL) { printf("MEMZONE FLAG 1GB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_1G) { printf("hugepage_sz not equal 1G\n"); return -1; } mz = rte_memzone_reserve("flag_zone_1G_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL) { printf("MEMZONE FLAG 1GB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_1G) { printf("hugepage_sz not equal 1G\n"); return -1; } /* Check if 1GB huge pages are unavailable, that function fails unless * HINT flag is indicated */ if (!hugepage_2MB_avail) { mz = rte_memzone_reserve("flag_zone_2M_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_2MB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL){ printf("MEMZONE FLAG 2MB & HINT\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_1G) { printf("hugepage_sz not equal 1G\n"); return -1; } mz = rte_memzone_reserve("flag_zone_2M", size, SOCKET_ID_ANY, RTE_MEMZONE_2MB); if (mz != NULL) { printf("MEMZONE FLAG 2MB\n"); return -1; } } if (hugepage_2MB_avail && hugepage_1GB_avail) { mz = rte_memzone_reserve("flag_zone_2M_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_2MB|RTE_MEMZONE_1GB); if (mz != NULL) { printf("BOTH SIZES SET\n"); return -1; } } } /* * This option is for IBM Power. If 16MB pages available, check * that a small memzone is correctly reserved from 16MB huge pages * when requested by the RTE_MEMZONE_16MB flag. Also check that * RTE_MEMZONE_SIZE_HINT_ONLY flag only defaults to an available * page size (i.e 16GB ) when 16MB pages are unavailable. */ if (hugepage_16MB_avail) { mz = rte_memzone_reserve("flag_zone_16M", size, SOCKET_ID_ANY, RTE_MEMZONE_16MB); if (mz == NULL) { printf("MEMZONE FLAG 16MB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_16M) { printf("hugepage_sz not equal 16M\n"); return -1; } mz = rte_memzone_reserve("flag_zone_16M_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_16MB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL) { printf("MEMZONE FLAG 2MB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_16M) { printf("hugepage_sz not equal 16M\n"); return -1; } /* Check if 1GB huge pages are unavailable, that function fails * unless HINT flag is indicated */ if (!hugepage_16GB_avail) { mz = rte_memzone_reserve("flag_zone_16G_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_16GB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL) { printf("MEMZONE FLAG 16GB & HINT\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_16M) { printf("hugepage_sz not equal 16M\n"); return -1; } mz = rte_memzone_reserve("flag_zone_16G", size, SOCKET_ID_ANY, RTE_MEMZONE_16GB); if (mz != NULL) { printf("MEMZONE FLAG 16GB\n"); return -1; } } } /*As with 16MB tests above for 16GB huge page requests*/ if (hugepage_16GB_avail) { mz = rte_memzone_reserve("flag_zone_16G", size, SOCKET_ID_ANY, RTE_MEMZONE_16GB); if (mz == NULL) { printf("MEMZONE FLAG 16GB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_16G) { printf("hugepage_sz not equal 16G\n"); return -1; } mz = rte_memzone_reserve("flag_zone_16G_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_16GB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL) { printf("MEMZONE FLAG 16GB\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_16G) { printf("hugepage_sz not equal 16G\n"); return -1; } /* Check if 1GB huge pages are unavailable, that function fails * unless HINT flag is indicated */ if (!hugepage_16MB_avail) { mz = rte_memzone_reserve("flag_zone_16M_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_16MB|RTE_MEMZONE_SIZE_HINT_ONLY); if (mz == NULL) { printf("MEMZONE FLAG 16MB & HINT\n"); return -1; } if (mz->hugepage_sz != RTE_PGSIZE_16G) { printf("hugepage_sz not equal 16G\n"); return -1; } mz = rte_memzone_reserve("flag_zone_16M", size, SOCKET_ID_ANY, RTE_MEMZONE_16MB); if (mz != NULL) { printf("MEMZONE FLAG 16MB\n"); return -1; } } if (hugepage_16MB_avail && hugepage_16GB_avail) { mz = rte_memzone_reserve("flag_zone_16M_HINT", size, SOCKET_ID_ANY, RTE_MEMZONE_16MB|RTE_MEMZONE_16GB); if (mz != NULL) { printf("BOTH SIZES SET\n"); return -1; } } } return 0; }
static int vfio_spapr_dma_map(int vfio_container_fd) { const struct rte_memseg *ms = rte_eal_get_physmem_layout(); int i, ret; struct vfio_iommu_spapr_register_memory reg = { .argsz = sizeof(reg), .flags = 0 }; struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info), }; struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create), }; struct vfio_iommu_spapr_tce_remove remove = { .argsz = sizeof(remove), }; /* query spapr iommu info */ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); if (ret) { RTE_LOG(ERR, EAL, " cannot get iommu info, " "error %i (%s)\n", errno, strerror(errno)); return -1; } /* remove default DMA of 32 bit window */ remove.start_addr = info.dma32_window_start; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); if (ret) { RTE_LOG(ERR, EAL, " cannot remove default DMA window, " "error %i (%s)\n", errno, strerror(errno)); return -1; } /* create DMA window from 0 to max(phys_addr + len) */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { if (ms[i].addr == NULL) break; create.window_size = RTE_MAX(create.window_size, ms[i].iova + ms[i].len); } /* sPAPR requires window size to be a power of 2 */ create.window_size = rte_align64pow2(create.window_size); create.page_shift = __builtin_ctzll(ms->hugepage_sz); create.levels = 1; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); if (ret) { RTE_LOG(ERR, EAL, " cannot create new DMA window, " "error %i (%s)\n", errno, strerror(errno)); return -1; } if (create.start_addr != 0) { RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); return -1; } /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { struct vfio_iommu_type1_dma_map dma_map; if (ms[i].addr == NULL) break; reg.vaddr = (uintptr_t) ms[i].addr; reg.size = ms[i].len; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®); if (ret) { RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, " "error %i (%s)\n", errno, strerror(errno)); return -1; } memset(&dma_map, 0, sizeof(dma_map)); dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " "error %i (%s)\n", errno, strerror(errno)); return -1; } } return 0; } static int vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) { /* No-IOMMU mode does not need DMA mapping */ return 0; } int rte_vfio_noiommu_is_enabled(void) { int fd, ret, cnt __rte_unused; char c; ret = -1; fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); if (fd < 0) return -1; cnt = read(fd, &c, 1); if (c == 'Y') ret = 1; close(fd); return ret; }