// Create a backup copy of the indicated MemoryRegion. // Generally used in prepartion for splitting a MemoryRegion. static uint8_t *copy_region(struct uc_struct *uc, MemoryRegion *mr) { uint8_t *block = (uint8_t *)malloc(int128_get64(mr->size)); if (block != NULL) { uc_err err = uc_mem_read(uc, mr->addr, block, int128_get64(mr->size)); if (err != UC_ERR_OK) { free(block); block = NULL; } } return block; }
/** * Add a MemoryRegionSection to the new regions list */ static void hostmem_append_new_region(HostMem *hostmem, MemoryRegionSection *section) { void *ram_ptr = memory_region_get_ram_ptr(section->mr); size_t num = hostmem->num_new_regions; size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]); hostmem->new_regions = g_realloc(hostmem->new_regions, new_size); hostmem->new_regions[num] = (HostMemRegion){ .host_addr = ram_ptr + section->offset_within_region, .guest_addr = section->offset_within_address_space, .size = int128_get64(section->size), .readonly = section->readonly, }; hostmem->num_new_regions++; } static void hostmem_listener_append_region(MemoryListener *listener, MemoryRegionSection *section) { HostMem *hostmem = container_of(listener, HostMem, listener); /* Ignore non-RAM regions, we may not be able to map them */ if (!memory_region_is_ram(section->mr)) { return; } /* Ignore regions with dirty logging, we cannot mark them dirty */ if (memory_region_is_logging(section->mr)) { return; } hostmem_append_new_region(hostmem, section); }
/* vring_map can be coupled with vring_unmap or (if you still have the * value returned in *mr) memory_region_unref. */ static void *vring_map(MemoryRegion **mr, hwaddr phys, hwaddr len, bool is_write) { MemoryRegionSection section = memory_region_find(get_system_memory(), phys, len); if (!section.mr || int128_get64(section.size) < len) { goto out; } if (is_write && section.readonly) { goto out; } if (!memory_region_is_ram(section.mr)) { goto out; } /* Ignore regions with dirty logging, we cannot mark them dirty */ if (memory_region_is_logging(section.mr)) { goto out; } *mr = section.mr; return memory_region_get_ram_ptr(section.mr) + section.offset_within_region; out: memory_region_unref(section.mr); *mr = NULL; return NULL; }
static void whpx_process_section(MemoryRegionSection *section, int add) { MemoryRegion *mr = section->mr; hwaddr start_pa = section->offset_within_address_space; ram_addr_t size = int128_get64(section->size); unsigned int delta; uint64_t host_va; if (!memory_region_is_ram(mr)) { return; } delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask); delta &= ~qemu_real_host_page_mask; if (delta > size) { return; } start_pa += delta; size -= delta; size &= qemu_real_host_page_mask; if (!size || (start_pa & ~qemu_real_host_page_mask)) { return; } host_va = (uintptr_t)memory_region_get_ram_ptr(mr) + section->offset_within_region + delta; whpx_update_mapping(start_pa, size, (void *)host_va, add, memory_region_is_rom(mr), mr->name); }
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { if (n->cmbsz && addr >= n->ctrl_mem.addr && addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); } else { pci_dma_read(&n->parent_obj, addr, buf, size); } }
static void whpx_log_sync(MemoryListener *listener, MemoryRegionSection *section) { MemoryRegion *mr = section->mr; if (!memory_region_is_ram(mr)) { return; } memory_region_set_dirty(mr, 0, int128_get64(section->size)); }
/* vring_map can be coupled with vring_unmap or (if you still have the * value returned in *mr) memory_region_unref. * Returns NULL on failure. * Callers that can handle a partial mapping must supply mapped_len pointer to * get the actual length mapped. * Passing mapped_len == NULL requires either a full mapping or a failure. */ static void *vring_map(MemoryRegion **mr, hwaddr phys, hwaddr len, hwaddr *mapped_len, bool is_write) { MemoryRegionSection section = memory_region_find(get_system_memory(), phys, len); uint64_t size; if (!section.mr) { goto out; } size = int128_get64(section.size); assert(size); /* Passing mapped_len == NULL requires either a full mapping or a failure. */ if (!mapped_len && size < len) { goto out; } if (is_write && section.readonly) { goto out; } if (!memory_region_is_ram(section.mr)) { goto out; } /* Ignore regions with dirty logging, we cannot mark them dirty */ if (memory_region_get_dirty_log_mask(section.mr)) { goto out; } if (mapped_len) { *mapped_len = MIN(size, len); } *mr = section.mr; return memory_region_get_ram_ptr(section.mr) + section.offset_within_region; out: memory_region_unref(section.mr); *mr = NULL; return NULL; }
static void vfio_prereg_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainer *container = container_of(listener, VFIOContainer, prereg_listener); const hwaddr gpa = section->offset_within_address_space; hwaddr end; int ret; hwaddr page_mask = qemu_real_host_page_mask; struct vfio_iommu_spapr_register_memory reg = { .argsz = sizeof(reg), .flags = 0, }; if (vfio_prereg_listener_skipped_section(section)) { trace_vfio_prereg_listener_region_add_skip( section->offset_within_address_space, section->offset_within_address_space + int128_get64(int128_sub(section->size, int128_one()))); return; } if (unlikely((section->offset_within_address_space & ~page_mask) || (section->offset_within_region & ~page_mask) || (int128_get64(section->size) & ~page_mask))) { error_report("%s received unaligned region", __func__); return; } end = section->offset_within_address_space + int128_get64(section->size); if (gpa >= end) { return; } memory_region_ref(section->mr); reg.vaddr = (uintptr_t) vfio_prereg_gpa_to_vaddr(section, gpa); reg.size = end - gpa; ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®); trace_vfio_prereg_register(reg.vaddr, reg.size, ret ? -errno : 0); if (ret) { /* * On the initfn path, store the first error in the container so we * can gracefully fail. Runtime, there's not much we can do other * than throw a hardware error. */ if (!container->initialized) { if (!container->error) { container->error = ret; } } else { hw_error("vfio: Memory registering failed, unable to continue"); } } } static void vfio_prereg_listener_region_del(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainer *container = container_of(listener, VFIOContainer, prereg_listener); const hwaddr gpa = section->offset_within_address_space; hwaddr end; int ret; hwaddr page_mask = qemu_real_host_page_mask; struct vfio_iommu_spapr_register_memory reg = { .argsz = sizeof(reg), .flags = 0, }; if (vfio_prereg_listener_skipped_section(section)) { trace_vfio_prereg_listener_region_del_skip( section->offset_within_address_space, section->offset_within_address_space + int128_get64(int128_sub(section->size, int128_one()))); return; } if (unlikely((section->offset_within_address_space & ~page_mask) || (section->offset_within_region & ~page_mask) || (int128_get64(section->size) & ~page_mask))) { error_report("%s received unaligned region", __func__); return; } end = section->offset_within_address_space + int128_get64(section->size); if (gpa >= end) { return; } reg.vaddr = (uintptr_t) vfio_prereg_gpa_to_vaddr(section, gpa); reg.size = end - gpa; ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, ®); trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0); } const MemoryListener vfio_prereg_listener = { .region_add = vfio_prereg_listener_region_add, .region_del = vfio_prereg_listener_region_del, }; int vfio_spapr_create_window(VFIOContainer *container, MemoryRegionSection *section, hwaddr *pgsize) { int ret; unsigned pagesize = memory_region_iommu_get_min_page_size(section->mr); unsigned entries, pages; struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create) }; /* * FIXME: For VFIO iommu types which have KVM acceleration to * avoid bouncing all map/unmaps through qemu this way, this * would be the right place to wire that up (tell the KVM * device emulation the VFIO iommu handles to use). */ create.window_size = int128_get64(section->size); create.page_shift = ctz64(pagesize); /* * SPAPR host supports multilevel TCE tables, there is some * heuristic to decide how many levels we want for our table: * 0..64 = 1; 65..4096 = 2; 4097..262144 = 3; 262145.. = 4 */ entries = create.window_size >> create.page_shift; pages = MAX((entries * sizeof(uint64_t)) / getpagesize(), 1); pages = MAX(pow2ceil(pages) - 1, 1); /* Round up */ create.levels = ctz64(pages) / 6 + 1; ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); if (ret) { error_report("Failed to create a window, ret = %d (%m)", ret); return -errno; } if (create.start_addr != section->offset_within_address_space) { vfio_spapr_remove_window(container, create.start_addr); error_report("Host doesn't support DMA window at %"HWADDR_PRIx", must be %"PRIx64, section->offset_within_address_space, (uint64_t)create.start_addr); return -EINVAL; } trace_vfio_spapr_create_window(create.page_shift, create.window_size, create.start_addr); *pgsize = pagesize; return 0; } int vfio_spapr_remove_window(VFIOContainer *container, hwaddr offset_within_address_space) { struct vfio_iommu_spapr_tce_remove remove = { .argsz = sizeof(remove), .start_addr = offset_within_address_space, }; int ret; ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); if (ret) { error_report("Failed to remove window at %"PRIx64, (uint64_t)remove.start_addr); return -errno; } trace_vfio_spapr_remove_window(offset_within_address_space); return 0; }
static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, uint64_t prp2, uint32_t len, NvmeCtrl *n) { hwaddr trans_len = n->page_size - (prp1 % n->page_size); trans_len = MIN(len, trans_len); int num_prps = (len >> n->page_bits) + 1; if (unlikely(!prp1)) { trace_nvme_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { qsg->nsg = 0; qemu_iovec_init(iov, num_prps); qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); } else { pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); qemu_sglist_add(qsg, prp1, trans_len); } len -= trans_len; if (len) { if (unlikely(!prp2)) { trace_nvme_err_invalid_prp2_missing(); goto unmap; } if (len > n->page_size) { uint64_t prp_list[n->max_prp_ents]; uint32_t nents, prp_trans; int i = 0; nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); nvme_addr_read(n, prp2, (void *)prp_list, prp_trans); while (len != 0) { uint64_t prp_ent = le64_to_cpu(prp_list[i]); if (i == n->max_prp_ents - 1 && len > n->page_size) { if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } i = 0; nents = (len + n->page_size - 1) >> n->page_bits; prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); nvme_addr_read(n, prp_ent, (void *)prp_list, prp_trans); prp_ent = le64_to_cpu(prp_list[i]); } if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { trace_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } trans_len = MIN(len, n->page_size); if (qsg->nsg){ qemu_sglist_add(qsg, prp_ent, trans_len); } else { qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len); } len -= trans_len; i++; } } else { if (unlikely(prp2 & (n->page_size - 1))) {
void framebuffer_update_display( DisplaySurface *ds, MemoryRegion *address_space, hwaddr base, int cols, /* Width in pixels. */ int rows, /* Height in pixels. */ int src_width, /* Length of source line, in bytes. */ int dest_row_pitch, /* Bytes between adjacent horizontal output pixels. */ int dest_col_pitch, /* Bytes between adjacent vertical output pixels. */ int invalidate, /* nonzero to redraw the whole image. */ drawfn fn, void *opaque, int *first_row, /* Input and output. */ int *last_row /* Output only */) { hwaddr src_len; uint8_t *dest; uint8_t *src; uint8_t *src_base; int first, last = 0; int dirty; int i; ram_addr_t addr; MemoryRegionSection mem_section; MemoryRegion *mem; i = *first_row; *first_row = -1; src_len = src_width * rows; mem_section = memory_region_find(address_space, base, src_len); mem = mem_section.mr; if (int128_get64(mem_section.size) != src_len || !memory_region_is_ram(mem_section.mr)) { goto out; } assert(mem); assert(mem_section.offset_within_address_space == base); memory_region_sync_dirty_bitmap(mem); if (!memory_region_is_logging(mem, DIRTY_MEMORY_VGA)) { invalidate = true; } src_base = cpu_physical_memory_map(base, &src_len, 0); /* If we can't map the framebuffer then bail. We could try harder, but it's not really worth it as dirty flag tracking will probably already have failed above. */ if (!src_base) goto out; if (src_len != src_width * rows) { cpu_physical_memory_unmap(src_base, src_len, 0, 0); goto out; } src = src_base; dest = surface_data(ds); if (dest_col_pitch < 0) dest -= dest_col_pitch * (cols - 1); if (dest_row_pitch < 0) { dest -= dest_row_pitch * (rows - 1); } first = -1; addr = mem_section.offset_within_region; addr += i * src_width; src += i * src_width; dest += i * dest_row_pitch; for (; i < rows; i++) { dirty = memory_region_get_dirty(mem, addr, src_width, DIRTY_MEMORY_VGA); if (dirty || invalidate) { fn(opaque, dest, src, cols, dest_col_pitch); if (first == -1) first = i; last = i; } addr += src_width; src += src_width; dest += dest_row_pitch; } cpu_physical_memory_unmap(src_base, src_len, 0, 0); if (first < 0) { goto out; } memory_region_reset_dirty(mem, mem_section.offset_within_region, src_len, DIRTY_MEMORY_VGA); *first_row = first; *last_row = last; out: memory_region_unref(mem); }
// TODO: investigate whether qemu region manipulation functions already offered // this capability static bool split_region(struct uc_struct *uc, MemoryRegion *mr, uint64_t address, size_t size, bool do_delete) { uint8_t *backup; uint32_t perms; uint64_t begin, end, chunk_end; size_t l_size, m_size, r_size; chunk_end = address + size; // if this region belongs to area [address, address+size], // then there is no work to do. if (address <= mr->addr && chunk_end >= mr->end) return true; if (size == 0) // trivial case return true; if (address >= mr->end || chunk_end <= mr->addr) // impossible case return false; backup = copy_region(uc, mr); if (backup == NULL) return false; // save the essential information required for the split before mr gets deleted perms = mr->perms; begin = mr->addr; end = mr->end; // unmap this region first, then do split it later if (uc_mem_unmap(uc, mr->addr, int128_get64(mr->size)) != UC_ERR_OK) goto error; /* overlapping cases * |------mr------| * case 1 |---size--| * case 2 |--size--| * case 3 |---size--| */ // adjust some things if (address < begin) address = begin; if (chunk_end > end) chunk_end = end; // compute sub region sizes l_size = (size_t)(address - begin); r_size = (size_t)(end - chunk_end); m_size = (size_t)(chunk_end - address); // If there are error in any of the below operations, things are too far gone // at that point to recover. Could try to remap orignal region, but these smaller // allocation just failed so no guarantee that we can recover the original // allocation at this point if (l_size > 0) { if (uc_mem_map(uc, begin, l_size, perms) != UC_ERR_OK) goto error; if (uc_mem_write(uc, begin, backup, l_size) != UC_ERR_OK) goto error; } if (m_size > 0 && !do_delete) { if (uc_mem_map(uc, address, m_size, perms) != UC_ERR_OK) goto error; if (uc_mem_write(uc, address, backup + l_size, m_size) != UC_ERR_OK) goto error; } if (r_size > 0) { if (uc_mem_map(uc, chunk_end, r_size, perms) != UC_ERR_OK) goto error; if (uc_mem_write(uc, chunk_end, backup + l_size + m_size, r_size) != UC_ERR_OK) goto error; } return true; error: free(backup); return false; }