int au_himem_map(void *cookie, bus_addr_t addr, bus_size_t size, int flags, bus_space_handle_t *bshp, int acct) { au_himem_cookie_t *c = (au_himem_cookie_t *)cookie; int err; paddr_t pa; vaddr_t va; vsize_t realsz; int s; /* make sure we can map this bus address */ if (addr < c->c_start || (addr + size) > c->c_end) { return EINVAL; } /* physical address, page aligned */ pa = TRUNC_PAGE(c->c_physoff + addr); /* * we are only going to work with whole pages. the * calculation is the offset into the first page, plus the * intended size, rounded up to a whole number of pages. */ realsz = ROUND_PAGE((addr % PAGE_SIZE) + size); va = uvm_km_alloc(kernel_map, realsz, PAGE_SIZE, UVM_KMF_VAONLY | UVM_KMF_NOWAIT); if (va == 0) { return ENOMEM; } /* virtual address in handle (offset appropriately) */ *bshp = va + (addr % PAGE_SIZE); /* map the pages in the kernel pmap */ s = splhigh(); while (realsz) { pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE); pa += PAGE_SIZE; va += PAGE_SIZE; realsz -= PAGE_SIZE; } pmap_update(pmap_kernel()); splx(s); /* record our allocated range of bus addresses */ if (acct && c->c_extent != NULL) { err = extent_alloc_region(c->c_extent, addr, size, EX_NOWAIT); if (err) { au_himem_unmap(cookie, *bshp, size, 0); return err; } } return 0; }
pmap_t* pmap_create() { pmap_t *pmap = (pmap_t*)kheap_alloc(sizeof(pmap_t)); memset(pmap, 0, sizeof(pmap_t)); // Create pgd // TODO: This will not work! We need to allocate 16 KiB of contiguous memory aligned to a 16 KiB address boundary pmap->pgd = (pgd_t*)kheap_alloc(sizeof(pgd_t)); memset(pmap->pgd, 0, sizeof(pgd_t)); // Get the physical address of the pgd pmap->pgd_pa = TRUNC_PAGE(KERNEL_PGTS_BASE[PGD_GET_INDEX((vaddr_t)pmap->pgd)-KERNEL_PGD_PGT_INDEX_BASE].pte[PGT_GET_INDEX((vaddr_t)pmap->pgd)]); pmap_reference(pmap); return pmap; }
void ept_invalidate_addr(paddr_t gpaddr) { /* See Vol3B 24.3.3 */ if (TRUNC_PAGE(gpaddr) == kvtophys(dma_test_page)) { kprintf("ept_invalidate_addr>gpaddr = "PFMT"\n", gpaddr); } #ifdef HYP_PAE notify_all((nb_func_t)__ept_invalidate_addr, (nb_arg_t)bits(gpaddr, 31, 0), (nb_arg_t)bits(gpaddr, 63, 32)); #else notify_all((nb_func_t)__ept_invalidate_addr, (nb_arg_t)gpaddr, (nb_arg_t)0); #endif }
void pmap_init() { // Set the end of the kernel's virtual and physical address space kernel_vend = ROUND_PAGE((vaddr_t)(PGTPHYSICALSTARTADDR-MEMBASEADDR+KVIRTUALBASEADDR) + sizeof(pgt_t) * (vaddr_t)(NUMPAGETABLES)); kernel_pend = ROUND_PAGE((paddr_t)(PGTPHYSICALSTARTADDR) + sizeof(pgt_t) * (paddr_t)(NUMPAGETABLES)); // Initialize the kernel pmap _pmap_kernel_init(); // Initialize pmm pmm_init(); // Reserve the pages used by the kernel for(uint32_t i = 0, n_tot_entries = (uint32_t)(NUMPAGETABLES) * PGTNENTRIES, *pte = (uint32_t*)KERNEL_PGTS_BASE; i < n_tot_entries; i++) { if(pte[i] & PTE_PAGE_BIT) { // Count the resident and wired pages for the kernel (will be the same) kernel_pmap.pmap_stats.wired_count++; kernel_pmap.pmap_stats.resident_count++; pmm_reserve(TRUNC_PAGE(pte[i])); } } }
static void #ifdef HYP_PAE __ept_invalidate_addr(vm_t *v, u32 gpaddr0, u32 gpaddr1) { paddr_t gpaddr = ((paddr_t)gpaddr1 << 32) | gpaddr0; #else __ept_invalidate_addr(vm_t *v, paddr_t gpaddr) { #endif /* There is currently no option to invept to invalidate * a particular page, so gpaddr is ignored */ u64 eptp = vmread64(VMCE_EPT_PTR); un err = invept(INVEPT_TYPE_SINGLE, eptp); if (err) { kprintf("__ept_invalidate_addr>ERROR eptp 0x%llx\n", eptp); return; } if (TRUNC_PAGE(gpaddr) == kvtophys(dma_test_page)) { kprintf("__ept_invalidate_addr>succeeded gpaddr ="PFMT"\n", gpaddr); } }
void au_himem_unmap(void *cookie, bus_space_handle_t bsh, bus_size_t size, int acct) { au_himem_cookie_t *c = (au_himem_cookie_t *)cookie; vaddr_t va; vsize_t realsz; paddr_t pa; int s; va = (vaddr_t)TRUNC_PAGE(bsh); realsz = (vsize_t)ROUND_PAGE((bsh % PAGE_SIZE) + size); s = splhigh(); /* make sure that any pending writes are flushed */ wbflush(); /* * we have to get the bus address, so that we can free it in the * extent manager. this is the unfortunate thing about using * virtual memory instead of just a 1:1 mapping scheme. */ if (pmap_extract(pmap_kernel(), va, &pa) == false) panic("au_himem_unmap: virtual address invalid!"); /* now remove it from the pmap */ pmap_kremove(va, realsz); pmap_update(pmap_kernel()); splx(s); /* finally we can release both virtual and bus address ranges */ uvm_km_free(kernel_map, va, realsz, UVM_KMF_VAONLY); if (acct) { bus_addr_t addr; addr = ((pa - c->c_physoff) + (bsh % PAGE_SIZE)); extent_free(c->c_extent, addr, size, EX_NOWAIT); } }
void map_segments (long fd, Elf32_Phdr *segs[2], Elf32_Half type, dso *so) { /* Adjust text segment addresses to page size */ Elf32_Off text_offset = TRUNC_PAGE(segs[0]->p_offset); Elf32_Addr text_vaddr = TRUNC_PAGE(segs[0]->p_vaddr); Elf32_Addr text_vlimit = ROUND_PAGE(segs[1]->p_vaddr + segs[1]->p_memsz); unsigned long mapsize = text_vlimit - text_vaddr; /* Executable has to be loaded at constant address */ void *base_addr = 0; if (type == ET_EXEC) { base_addr = (void *)text_vaddr; } /* TODO: what if base address lies in already mapped area? E.g. where the loader resides? */ /* Map text segment into memory */ char *mapbase = sl_mmap(base_addr, mapsize, convert_prot(segs[0]->p_flags), MAP_PRIVATE, fd, text_offset); if ((long)mapbase == -1) { sl_close(fd); sl_printf("Error map_segments: mapping of text segment failed.\n"); sl_exit(1); } /* Adjust data segment addresses to page size */ Elf32_Off data_offset = TRUNC_PAGE(segs[1]->p_offset); Elf32_Addr data_vaddr = TRUNC_PAGE(segs[1]->p_vaddr); Elf32_Addr data_vlimit = ROUND_PAGE(segs[1]->p_vaddr + segs[1]->p_filesz); void *data_addr = mapbase + (data_vaddr - text_vaddr); long data_prot = convert_prot(segs[1]->p_flags); /* Map data segment into memory */ if ((long)sl_mmap(data_addr, data_vlimit - data_vaddr, data_prot, MAP_PRIVATE | MAP_FIXED, fd, data_offset) == -1) { sl_close(fd); sl_printf("Error map_segments: mapping of data segment failed.\n"); sl_exit(1); } /* Clear BSS part */ Elf32_Addr clear_vaddr = segs[1]->p_vaddr + segs[1]->p_filesz; void *clear_addr = mapbase + (clear_vaddr - text_vaddr); void *clear_page = mapbase + (TRUNC_PAGE(clear_vaddr) - text_vaddr); unsigned long nclear = data_vlimit - clear_vaddr; if (nclear > 0) { /* Make sure the end of the segment is writable */ if ((data_prot & PROT_WRITE) == 0 && sl_mprotect(clear_page, PAGE_SIZE, data_prot|PROT_WRITE) == -1) { sl_printf("Error map_segments: mprotect on data segment failed.\n"); sl_exit(1); } sl_memset(clear_addr, 0, nclear); /* Reset the data protection */ if ((data_prot & PROT_WRITE) == 0) { sl_mprotect(clear_page, PAGE_SIZE, data_prot); } } /* Allocate remaining part of bss section */ Elf32_Addr bss_vaddr = data_vlimit; Elf32_Addr bss_vlimit = ROUND_PAGE(segs[1]->p_vaddr + segs[1]->p_memsz); void *bss_addr = mapbase + (bss_vaddr - text_vaddr); if (bss_vlimit > bss_vaddr) { if ((long)sl_mmap(bss_addr, bss_vlimit - bss_vaddr, data_prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == -1) { sl_printf("Error map_segments: mmap of bss segment failed.\n"); sl_exit(1); } } /* Save important information */ so->base_addr = (type == ET_EXEC) ? 0 : mapbase; so->text_addr = mapbase; so->text_size = mapsize; so->data_addr = data_addr; so->data_size = data_vlimit - data_vaddr; so->bss_addr = bss_addr; so->bss_size = bss_vlimit - bss_vaddr; so->end_addr = bss_addr + so->bss_size; so->text_prot = convert_prot(segs[0]->p_flags); so->data_prot = data_prot; so->bss_prot = data_prot; }
/* Verifies the kernel provided program header PT_LOAD entries and does the * segment mappings only if required. As the kernel already mapped the PT_LOAD * segments our RTLD should not map them again. */ void map_segments_RTLD (long fd, Elf32_Phdr *segs[2], Elf32_Half type, dso *so, Elf32_Phdr *segs_auxv[2]) { /* TODO: improve error handling ;) */ if(segs[0]->p_offset != segs_auxv[0]->p_offset) { sl_printf("map_segments_RTLD: difference in program headers found!\n"); sl_exit(1); } if(segs[0]->p_vaddr != segs_auxv[0]->p_vaddr) { sl_printf("map_segments_RTLD: difference in program headers found!\n"); sl_exit(1); } if(segs[0]->p_memsz != segs_auxv[0]->p_memsz) { sl_printf("map_segments_RTLD: difference in program headers found!\n"); sl_exit(1); } if(segs[1]->p_offset != segs_auxv[1]->p_offset) { sl_printf("map_segments_RTLD: difference in program headers found!\n"); sl_exit(1); } if(segs[1]->p_vaddr != segs_auxv[1]->p_vaddr) { sl_printf("map_segments_RTLD: difference in program headers found!\n"); sl_exit(1); } if(segs[1]->p_memsz != segs_auxv[1]->p_memsz) { sl_printf("map_segments_RTLD: difference in program headers found!\n"); sl_exit(1); } /* Adjust text segment addresses to page size */ //Elf32_Off text_offset = TRUNC_PAGE(segs[0]->p_offset); Elf32_Addr text_vaddr = TRUNC_PAGE(segs[0]->p_vaddr); Elf32_Addr text_vlimit = ROUND_PAGE(segs[1]->p_vaddr + segs[1]->p_memsz); unsigned long mapsize = text_vlimit - text_vaddr; /* Executable has to be loaded at constant address */ void *base_addr = 0; if (type == ET_EXEC) { base_addr = (void *)text_vaddr; } else { sl_printf("map_segments_RTLD: first program header entry is not ET_EXEC!\n"); sl_exit(1); } /* TODO: what if base address lies in already mapped area? E.g. where the loader resides? */ /* Text segment already mapped */ char *mapbase = base_addr; /* Adjust data segment addresses to page size */ //Elf32_Off data_offset = TRUNC_PAGE(segs[1]->p_offset); Elf32_Addr data_vaddr = TRUNC_PAGE(segs[1]->p_vaddr); Elf32_Addr data_vlimit = ROUND_PAGE(segs[1]->p_vaddr + segs[1]->p_filesz); void *data_addr = mapbase + (data_vaddr - text_vaddr); //long data_prot = convert_prot(segs[1]->p_flags); /* Clear BSS part */ //Elf32_Addr clear_vaddr = segs[1]->p_vaddr + segs[1]->p_filesz; //void *clear_addr = mapbase + (clear_vaddr - text_vaddr); //void *clear_page = mapbase + (TRUNC_PAGE(clear_vaddr) - text_vaddr); //unsigned long nclear = data_vlimit - clear_vaddr; /* Allocate remaining part of bss section */ Elf32_Addr bss_vaddr = data_vlimit; Elf32_Addr bss_vlimit = ROUND_PAGE(segs[1]->p_vaddr + segs[1]->p_memsz); void *bss_addr = mapbase + (bss_vaddr - text_vaddr); /* Save important information */ so->base_addr = (type == ET_EXEC) ? 0 : mapbase; so->text_addr = mapbase; so->text_size = mapsize; so->data_addr = data_addr; so->data_size = data_vlimit - data_vaddr; so->bss_addr = bss_addr; so->bss_size = bss_vlimit - bss_vaddr; so->end_addr = bss_addr + so->bss_size; so->text_prot = convert_prot(segs[0]->p_flags); so->data_prot = convert_prot(segs[1]->p_flags); so->bss_prot = convert_prot(segs[1]->p_flags); }
bool vm_exit_ept(registers_t *regs) { u64 gpaddr = vmread64(VMCE_GUEST_PHYS_ADDR); un xq = vmread(VMCE_EXIT_QUALIFICATION); un gladdr = vmread(VMCE_GUEST_LINEAR_ADDR); bool dump = false; if (TRUNC_PAGE(gpaddr) == kvtophys(dma_test_page)) { dump = true; } epte_t *p = epte_ptr_get(gpaddr, false); if ((p == NULL) || (!bit_test(*p, EPT_VALID))) { u8 mt = mtrr_type(gpaddr); if (mt != MT_UC) { kprintf("vm_exit_ept>attempted access " "to unmapped, non IO page 0x%llx, MT %d\n", gpaddr, mt); goto protection_violation; } /* This is a MMIO page that hasn't yet * been set up. */ epte_t epte = mkepte(gpaddr, EPT_PERM_RW|(mt << EPT_MT_SHIFT)); if (p == NULL) { p = epte_ptr_get(gpaddr, true); } if (p == NULL) { kprintf("vm_exit_ept>page_table alloc failed\n"); vmx_clear_exec_cpu2(VMEXEC_CPU2_ENABLE_EPT); return false; } epte_t old_epte = *p; epte_t result = atomic_cx(p, old_epte, epte); if (result == old_epte && result != epte) { /* Update succeeded, so flush needed */ iommu_flush_cache((un)p, sizeof(epte_t)); } return true; } epte_t old_epte = *p; assert(bit_test(old_epte, EPT_VALID)); if (bit_test(old_epte, EPT_HYP)) { kprintf("vm_exit_ept>attempted access " "to hyp page 0x%llx\n", gpaddr); goto protection_violation; } if (!bit_test(old_epte, EPT_GUEST)) { kprintf("vm_exit_ept>attempted access " "to non-guest page 0x%llx\n", gpaddr); goto protection_violation; } if (vm_nx_is_enabled()) { return vm_exit_ept_nx(regs, gpaddr, p, xq); } if (bit_test(xq, EPT_XQ_ACCESS_EXECUTE) && !bit_test(xq, EPT_XQ_PERM_EXECUTE)) { epte_t epte = *p; un prot = epte & EPT_PERM_ALL; bit_set(prot, EPT_X); if (vm_nx_is_enabled()) { /* Not yet. Need a hook to set W again when no longer * executable. Catching write fault won't work if the * first write is a DMA write */ bit_clear(prot, EPT_W); } ret_t ret = vm_protect_page(gpaddr, prot, VMPF_FLUSH); if (ret) { kprintf("vm_exit_ept>vm_protect_page(0x%llx, 0x%lx) " "returned %ld\n", gpaddr, prot, ret); return false; } return true; } else if (bit_test(xq, EPT_XQ_ACCESS_WRITE) && !bit_test(xq, EPT_XQ_PERM_WRITE)) { #ifdef NOTDEF epte_t epte = *p; static un count = 0; un n; if (((n = atomic_inc(&count)) < 5) || (n % 100 == 0)) { kprintf("vm_exit_ept>write attempt %ld " "but no write permission\n", n); kprintf("vm_exit_ept>epte = 0x%llx\n", epte); dump = true; } #endif ret_t ret = vm_protect_page(gpaddr, EPT_PERM_RW, VMPF_FLUSH); if (ret) { kprintf("vm_exit_ept>vm_protect_page(0x%llx, 0x%lx) " "returned %ld\n", gpaddr, (un)EPT_PERM_RW, ret); return false; } return true; } protection_violation: #ifdef NOTDEF vmx_clear_exec_cpu2(VMEXEC_CPU2_ENABLE_EPT); #else vm_entry_inject_exception(VEC_GP, 0); #endif dump = true; if (dump) { kprintf("vm_exit_ept>access type %s%s%s\n", bit_test(xq, EPT_XQ_ACCESS_READ) ? "R" : "", bit_test(xq, EPT_XQ_ACCESS_WRITE) ? "W" : "", bit_test(xq, EPT_XQ_ACCESS_EXECUTE) ? "X" : ""); kprintf("vm_exit_ept>permission %s%s%s\n", bit_test(xq, EPT_XQ_PERM_READ) ? "R" : "", bit_test(xq, EPT_XQ_PERM_WRITE) ? "W" : "", bit_test(xq, EPT_XQ_PERM_EXECUTE) ? "X" : ""); if (bit_test(xq, EPT_XQ_GUEST_LADDR_VALID)) { kprintf("vm_exit_ept>guest linear address 0x%lx\n", gladdr); if (bit_test(xq, EPT_XQ_NOT_PT_ACCESS)) { kprintf("vm_exit_ept>" "access to guest physical address\n"); } else { kprintf("vm_exit_ept>access to page table\n"); } } kprintf("vm_exit_ept>guest physical address 0x%llx\n", gpaddr); } return true; }