__forceinline void SetPML4(__in PAGE_TABLE_ENTRY& pte) { PAGE_TABLE_ENTRY* _pte = PML4(); if (_pte) *_pte = pte; }
CMMU( __in const void* address ) : m_va(*reinterpret_cast<const VIRTUAL_ADDRESS*>(&address)), m_pml4(readcr3() + m_va.Selector.PML4Selector * sizeof(void*), sizeof(PAGE_TABLE_ENTRY)), m_pdp(GetNextTable(PML4(), m_va.Selector.PDPSelector), sizeof(PAGE_TABLE_ENTRY)), m_pt(GetNextTable(PDP(), m_va.Selector.PTSelector), sizeof(PAGE_TABLE_ENTRY)), m_pte(GetNextTable(PT(), m_va.Selector.PTESelector), sizeof(PAGE_TABLE_ENTRY)) { }
int setupt_proc_vm(ProcStruct* NewProc) { PageStruct* pa=allocate_page(); if(!pa) return 0; NewProc->pml4e=(uint64_t*)KADDR(pageToPhysicalAddress(pa)); //printf("userpml4=%p",NewProc->pml4e); NewProc->cr3 = (physaddr_t*)PADDR(NewProc->pml4e); /* CHANGE THIS LATER. SET extries at indexes less than UTOP to 0 */ NewProc->pml4e[PML4(PHYSBASE)]=boot_pml4e[PML4(PHYSBASE)]; NewProc->pml4e[PML4(VIDEO_START)]=boot_pml4e[PML4(VIDEO_START)]; return 1; }
pte_t * pml4e_walk(pml4e_t *pml4e, const void *va, int create) { struct Page *newPage = NULL; //if(!create) cprintf("va = %0x, pml4e[PML4(va)] = %0x\n", va, pml4e[PML4(va)]); if (!pml4e[PML4(va)]) { if (!create) return NULL; else { newPage = page_alloc(0); if (newPage == 0) { return NULL; } else { newPage->pp_ref++; pml4e[PML4(va)] = page2pa(newPage) | PTE_U | PTE_W | PTE_P; memset(page2kva(newPage), 0x00, PGSIZE); } } } pdpe_t* pdpe = (pdpe_t*)(KADDR((PTE_ADDR(pml4e[PML4(va)])))); pte_t *result = pdpe_walk(pdpe, va, create); if (!result && newPage) { pml4e[PML4(va)] = 0; newPage->pp_ref = 0; page_free(newPage); } //return result + PTX(va); if (result) { return result + PTX(va); } else { return result; } }
__checkReturn bool GetPML4(__out PAGE_TABLE_ENTRY& pte) { PAGE_TABLE_ENTRY* _pte = PML4(); if (_pte) { pte = *_pte; return true; } return false; }
void pagetable_init(uint64_t max_addr, uint32_t kernel_end) { uint32_t i; page_table_area = kernel_end; memset((uint8_t *)page_table_area, 0, PT_NUM_PAGES(max_addr) * PAGE_SIZE); printk("page_table_area: 0x%lx\n", page_table_area); printk("page_table_end: 0x%lx\n", page_table_area + PT_NUM_PAGES(max_addr) * PAGE_SIZE); /* direct map all but the zero page in the page tables */ for (i = 1; i < NUM_PTES(max_addr); i++ ) { struct dw *pt = (struct dw *)PT(i); pt->lo = PTE(i) | ENTRY_RW | ENTRY_PRESENT; } /* set up the page directories */ for (i = 0; i < NUM_PTPGS(max_addr); i++) { struct dw *pd = (struct dw *)PD(i); pd->lo = PDE(i) | ENTRY_RW | ENTRY_PRESENT; } /* set up the pdp's */ for (i = 0; i < NUM_PDPGS(max_addr); i++) { struct dw *pdp = (struct dw *)PDP(i); pdp->lo = PDPE(i) | ENTRY_RW | ENTRY_PRESENT; } /* set up the pml4 */ for (i = 0; i < NUM_PDPPGS(max_addr); i++) { struct dw *pml4 = (struct dw *)PML4(i); pml4->lo = PML4E(i) | ENTRY_RW | ENTRY_PRESENT; } walk_pagetable(max_addr); to64_prep_paging(PML4(0)); //return PML4(0); }
static physaddr_t check_va2pa(pml4e_t *pml4e, uintptr_t va) { pte_t *pte; pdpe_t *pdpe; pde_t *pde; pml4e = &pml4e[PML4(va)]; if(!(*pml4e & PTE_P)) return ~0; pdpe = (pdpe_t *) KADDR(PTE_ADDR(*pml4e)); if (!(pdpe[PDPE(va)] & PTE_P)) return ~0; pde = (pde_t *) KADDR(PTE_ADDR(pdpe[PDPE(va)])); pde = &pde[PDX(va)]; if (!(*pde & PTE_P)) return ~0; pte = (pte_t*) KADDR(PTE_ADDR(*pde)); if (!(pte[PTX(va)] & PTE_P)) return ~0; return PTE_ADDR(pte[PTX(va)]); }
int main(int argc, char **argv) { int vmmflags = VMM_VMCALL_PRINTF; uint64_t entry = 0; int ret; struct vm_trapframe *vm_tf; int c; int option_index; static struct option long_options[] = { {"debug", no_argument, 0, 'd'}, {"vmmflags", required_argument, 0, 'v'}, {"memsize", required_argument, 0, 'm'}, {"memstart", required_argument, 0, 'M'}, {"stack", required_argument, 0, 'S'}, {"cmdline_extra", required_argument, 0, 'c'}, {"greedy", no_argument, 0, 'g'}, {"scp", no_argument, 0, 's'}, {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; fprintf(stderr, "%p %p %p %p\n", PGSIZE, PGSHIFT, PML1_SHIFT, PML1_PTE_REACH); if ((uintptr_t)__procinfo.program_end >= MinMemory) { fprintf(stderr, "Panic: vmrunkernel binary extends into guest memory\n"); exit(1); } while ((c = getopt_long(argc, argv, "dv:m:M:S:gsh", long_options, &option_index)) != -1) { switch (c) { case 'd': debug++; break; case 'v': vmmflags = strtoull(optarg, 0, 0); break; case 'm': memsize = strtoull(optarg, 0, 0); break; case 'M': memstart = strtoull(optarg, 0, 0); break; case 'S': stack = strtoull(optarg, 0, 0); break; case 'g': /* greedy */ parlib_never_yield = TRUE; break; case 's': /* scp */ parlib_wants_to_be_mcp = FALSE; break; case 'h': default: // Sadly, the getopt_long struct does // not have a pointer to help text. for (int i = 0; i < sizeof(long_options)/sizeof(long_options[0]) - 1; i++) { struct option *l = &long_options[i]; fprintf(stderr, "%s or %c%s\n", l->name, l->val, l->has_arg ? " <arg>" : ""); } exit(0); } } argc -= optind; argv += optind; if (argc < 1) { fprintf(stderr, "Usage: %s vmimage [-n (no vmcall printf)]\n", argv[0]); exit(1); } if ((uintptr_t)(memstart + memsize) >= (uintptr_t)BRK_START) { fprintf(stderr, "memstart 0x%lx memsize 0x%lx -> 0x%lx is too large; overlaps BRK_START at %p\n", memstart, memsize, memstart + memsize, BRK_START); exit(1); } ram = mmap((void *)memstart, memsize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_POPULATE | MAP_ANONYMOUS, -1, 0); if (ram != (void *)memstart) { fprintf(stderr, "Could not mmap 0x%lx bytes at 0x%lx\n", memsize, memstart); exit(1); } entry = load_kernel(argv[0]); if (entry == 0) { fprintf(stderr, "Unable to load kernel %s\n", argv[0]); exit(1); } vm->nr_gpcs = 1; vm->gpcis = &gpci; ret = vmm_init(vm, vmmflags); if (ret) { fprintf(stderr, "vmm_init failed: %r\n"); exit(1); } /* Allocate 3 pages for page table pages: a page of 512 GiB * PTEs with only one entry filled to point to a page of 1 GiB * PTEs; a page of 1 GiB PTEs with only one entry filled to * point to a page of 2 MiB PTEs; and a page of 2 MiB PTEs, * all of which may be filled. For now, we don't handle * starting addresses not aligned on 512 GiB boundaries or * sizes > GiB */ ret = posix_memalign((void **)&p512, PGSIZE, 3 * PGSIZE); if (ret) { perror("ptp alloc"); exit(1); } /* Set up a 1:1 ("identity") page mapping from guest virtual * to guest physical using the (host virtual) * `kerneladdress`. This mapping may be used for only a short * time, until the guest sets up its own page tables. Be aware * that the values stored in the table are physical addresses. * This is subtle and mistakes are easily disguised due to the * identity mapping, so take care when manipulating these * mappings. */ p1 = &p512[NPTENTRIES]; p2m = &p512[2 * NPTENTRIES]; fprintf(stderr, "Map %p for %zu bytes\n", memstart, memsize); /* TODO: fix this nested loop so it's correct for more than * one GiB. */ for(uintptr_t p4 = memstart; p4 < memstart + memsize; p4 += PML4_PTE_REACH) { p512[PML4(p4)] = (uint64_t)p1 | PTE_KERN_RW; for (uintptr_t p3 = p4; p3 < memstart + memsize; p3 += PML3_PTE_REACH) { p1[PML3(p3)] = (uint64_t)p2m | PTE_KERN_RW; for (uintptr_t p2 = p3; p2 < memstart + memsize; p2 += PML2_PTE_REACH) { p2m[PML2(p2)] = (uint64_t)(p2) | PTE_KERN_RW | PTE_PS; } } } fprintf(stderr, "p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, p512[0], p1, p1[0]); vm_tf = gth_to_vmtf(vm->gths[0]); vm_tf->tf_cr3 = (uint64_t) p512; vm_tf->tf_rip = entry; vm_tf->tf_rsp = stack; vm_tf->tf_rsi = (uint64_t) 0; start_guest_thread(vm->gths[0]); uthread_sleep_forever(); return 0; }
// check page_insert, page_remove, &c static void page_check(void) { struct Page *pp0, *pp1, *pp2,*pp3,*pp4,*pp5; struct Page * fl; pte_t *ptep, *ptep1; pdpe_t *pdpe; pde_t *pde; void *va; int i; uintptr_t mm1, mm2; pp0 = pp1 = pp2 = pp3 = pp4 = pp5 =0; assert(pp0 = page_alloc(0)); assert(pp1 = page_alloc(0)); assert(pp2 = page_alloc(0)); assert(pp3 = page_alloc(0)); assert(pp4 = page_alloc(0)); assert(pp5 = page_alloc(0)); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(pp3 && pp3 != pp2 && pp3 != pp1 && pp3 != pp0); assert(pp4 && pp4 != pp3 && pp4 != pp2 && pp4 != pp1 && pp4 != pp0); assert(pp5 && pp5 != pp4 && pp5 != pp3 && pp5 != pp2 && pp5 != pp1 && pp5 != pp0); // temporarily steal the rest of the free pages fl = page_free_list; page_free_list = NULL; // should be no free memory assert(!page_alloc(0)); // there is no page allocated at address 0 assert(page_lookup(boot_pml4e, (void *) 0x0, &ptep) == NULL); // there is no free memory, so we can't allocate a page table assert(page_insert(boot_pml4e, pp1, 0x0, 0) < 0); // free pp0 and try again: pp0 should be used for page table page_free(pp0); assert(page_insert(boot_pml4e, pp1, 0x0, 0) < 0); page_free(pp2); page_free(pp3); //cprintf("pp1 ref count = %d\n",pp1->pp_ref); //cprintf("pp0 ref count = %d\n",pp0->pp_ref); //cprintf("pp2 ref count = %d\n",pp2->pp_ref); assert(page_insert(boot_pml4e, pp1, 0x0, 0) == 0); assert((PTE_ADDR(boot_pml4e[0]) == page2pa(pp0) || PTE_ADDR(boot_pml4e[0]) == page2pa(pp2) || PTE_ADDR(boot_pml4e[0]) == page2pa(pp3) )); assert(check_va2pa(boot_pml4e, 0x0) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp0->pp_ref == 1); assert(pp2->pp_ref == 1); //should be able to map pp3 at PGSIZE because pp0 is already allocated for page table assert(page_insert(boot_pml4e, pp3, (void*) PGSIZE, 0) == 0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp3)); assert(pp3->pp_ref == 2); // should be no free memory assert(!page_alloc(0)); // should be able to map pp3 at PGSIZE because it's already there assert(page_insert(boot_pml4e, pp3, (void*) PGSIZE, 0) == 0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp3)); assert(pp3->pp_ref == 2); // pp3 should NOT be on the free list // could happen in ref counts are handled sloppily in page_insert assert(!page_alloc(0)); // check that pgdir_walk returns a pointer to the pte pdpe = KADDR(PTE_ADDR(boot_pml4e[PML4(PGSIZE)])); pde = KADDR(PTE_ADDR(pdpe[PDPE(PGSIZE)])); ptep = KADDR(PTE_ADDR(pde[PDX(PGSIZE)])); assert(pml4e_walk(boot_pml4e, (void*)PGSIZE, 0) == ptep+PTX(PGSIZE)); // should be able to change permissions too. assert(page_insert(boot_pml4e, pp3, (void*) PGSIZE, PTE_U) == 0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp3)); assert(pp3->pp_ref == 2); assert(*pml4e_walk(boot_pml4e, (void*) PGSIZE, 0) & PTE_U); assert(boot_pml4e[0] & PTE_U); // should not be able to map at PTSIZE because need free page for page table assert(page_insert(boot_pml4e, pp0, (void*) PTSIZE, 0) < 0); // insert pp1 at PGSIZE (replacing pp3) assert(page_insert(boot_pml4e, pp1, (void*) PGSIZE, 0) == 0); assert(!(*pml4e_walk(boot_pml4e, (void*) PGSIZE, 0) & PTE_U)); // should have pp1 at both 0 and PGSIZE assert(check_va2pa(boot_pml4e, 0) == page2pa(pp1)); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp1)); // ... and ref counts should reflect this assert(pp1->pp_ref == 2); assert(pp3->pp_ref == 1); // unmapping pp1 at 0 should keep pp1 at PGSIZE page_remove(boot_pml4e, 0x0); assert(check_va2pa(boot_pml4e, 0x0) == ~0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp3->pp_ref == 1); // Test re-inserting pp1 at PGSIZE. // Thanks to Varun Agrawal for suggesting this test case. assert(page_insert(boot_pml4e, pp1, (void*) PGSIZE, 0) == 0); assert(pp1->pp_ref); assert(pp1->pp_link == NULL); // unmapping pp1 at PGSIZE should free it page_remove(boot_pml4e, (void*) PGSIZE); assert(check_va2pa(boot_pml4e, 0x0) == ~0); assert(check_va2pa(boot_pml4e, PGSIZE) == ~0); assert(pp1->pp_ref == 0); assert(pp3->pp_ref == 1); #if 0 // should be able to page_insert to change a page // and see the new data immediately. memset(page2kva(pp1), 1, PGSIZE); memset(page2kva(pp2), 2, PGSIZE); page_insert(boot_pgdir, pp1, 0x0, 0); assert(pp1->pp_ref == 1); assert(*(int*)0 == 0x01010101); page_insert(boot_pgdir, pp2, 0x0, 0); assert(*(int*)0 == 0x02020202); assert(pp2->pp_ref == 1); assert(pp1->pp_ref == 0); page_remove(boot_pgdir, 0x0); assert(pp2->pp_ref == 0); #endif // forcibly take pp3 back assert(PTE_ADDR(boot_pml4e[0]) == page2pa(pp3)); boot_pml4e[0] = 0; assert(pp3->pp_ref == 1); page_decref(pp3); // check pointer arithmetic in pml4e_walk page_decref(pp0); page_decref(pp2); va = (void*)(PGSIZE * 100); ptep = pml4e_walk(boot_pml4e, va, 1); pdpe = KADDR(PTE_ADDR(boot_pml4e[PML4(va)])); pde = KADDR(PTE_ADDR(pdpe[PDPE(va)])); ptep1 = KADDR(PTE_ADDR(pde[PDX(va)])); assert(ptep == ptep1 + PTX(va)); // check that new page tables get cleared page_decref(pp4); memset(page2kva(pp4), 0xFF, PGSIZE); pml4e_walk(boot_pml4e, 0x0, 1); pdpe = KADDR(PTE_ADDR(boot_pml4e[0])); pde = KADDR(PTE_ADDR(pdpe[0])); ptep = KADDR(PTE_ADDR(pde[0])); for(i=0; i<NPTENTRIES; i++) assert((ptep[i] & PTE_P) == 0); boot_pml4e[0] = 0; // give free list back page_free_list = fl; // free the pages we took page_decref(pp0); page_decref(pp1); page_decref(pp2); // test mmio_map_region mm1 = (uintptr_t) mmio_map_region(0, 4097); mm2 = (uintptr_t) mmio_map_region(0, 4096); // check that they're in the right region assert(mm1 >= MMIOBASE && mm1 + 8096 < MMIOLIM); assert(mm2 >= MMIOBASE && mm2 + 8096 < MMIOLIM); // check that they're page-aligned assert(mm1 % PGSIZE == 0 && mm2 % PGSIZE == 0); // check that they don't overlap assert(mm1 + 8096 <= mm2); // check page mappingsasdfasd assert(check_va2pa(boot_pml4e, mm1) == 0); assert(check_va2pa(boot_pml4e, mm1+PGSIZE) == PGSIZE); assert(check_va2pa(boot_pml4e, mm2) == 0); assert(check_va2pa(boot_pml4e, mm2+PGSIZE) == ~0); // check permissions assert(*pml4e_walk(boot_pml4e, (void*) mm1, 0) & (PTE_W|PTE_PWT|PTE_PCD)); assert(!(*pml4e_walk(boot_pml4e, (void*) mm1, 0) & PTE_U)); // clear the mappings *pml4e_walk(boot_pml4e, (void*) mm1, 0) = 0; *pml4e_walk(boot_pml4e, (void*) mm1 + PGSIZE, 0) = 0; *pml4e_walk(boot_pml4e, (void*) mm2, 0) = 0; cprintf("check_page() succeeded!\n"); }
uint16_t map_vm_pm(pml4e_t* pml4e, uint64_t va,uint64_t pa,uint64_t size, uint16_t perm) { uint64_t* pdpe,*pde,*pte; //extract the upper 9 bits of VA to get the index into pml4e. for(uint64_t i=0; i<size; i+=PGSIZE) { if((pml4e[PML4(va+i)] & (uint64_t)PTE_P) == 0) { pdpe = pageToPhysicalAddress(allocate_page()); //printf("ret=%p",pdpe); if(physicalAddressToPage(pdpe)) { physicalAddressToPage(pdpe)->ref_count++; pml4e[PML4(va+i)] = (((uint64_t) pdpe) & (~0xFFF))|(perm|PTE_P); } else { printf("Failed in PML4E:%x",pdpe); return -1; } } pdpe = (uint64_t*) (KADDR(pml4e[PML4(va+i)]) & (~0xFFF)); // printf("pdpe retu=%p",pdpe); if((pdpe[PDPE(va+i)] & (uint64_t)PTE_P) == 0) { pde = pageToPhysicalAddress(allocate_page()); // printf("ret=%p",pde); // printf("pde retu=%p",pde); if(pde) { physicalAddressToPage(pde)->ref_count++; pdpe[PDPE(va+i)] = ( (uint64_t)pde & (~0xFFF))|(perm|PTE_P); } else { printf("Failed in PDPE:%x",pde); return -1; } } pde = (uint64_t*)(KADDR(pdpe[PDPE(va+i)]) & ~0xFFF); if((pde[PDX(va+i)] & (uint64_t)PTE_P) == 0) { pte = pageToPhysicalAddress(allocate_page()); // printf("ret=%p",pte); if(pte) { physicalAddressToPage(pte)->ref_count++; pde[PDX(va+i)] =((uint64_t)pte & (~0xFFF))|(perm|PTE_P); } else { printf("Failed in PDE:%x",pte); return -1; } } pte = (uint64_t*)(KADDR(pde[PDX(va+i)]) & ~0xFFF); pte[PTX(va+i)] = ((pa+i) & (~0xFFF))|(perm|PTE_P);; //printf("MAPPED"); tlb_invalidate(pml4e,(void*)va+i); }//for loop end //printf("Mapped Region:%p-%p to %p-%p\n",ROUNDDOWN(va,PGSIZE),ROUNDDOWN(va+size,PGSIZE), ROUNDDOWN(pa,PGSIZE),ROUNDDOWN(pa+size,PGSIZE)); return 0; }
static void walk_pagetable(uint64_t max_addr) { uint32_t pml4 = PML4(0); int dbg = 0; if (dbg) { printk("pml4pgs: %d\n", NUM_PML4PGS(max_addr)); printk("pdppgs: %d\n", NUM_PDPPGS(max_addr)); printk("pdpgs: %d\n", NUM_PDPGS(max_addr)); printk("ptpgs: %d\n", NUM_PTPGS(max_addr)); printk("pml4(0) is at 0x%lx\n", PML4(0)); printk("pml4e(0) is 0x%llx ", PT_ADDR(*(uint64_t *)pml4)); printk("should be 0x%llx\n", PML4E(0)); } assert(PT_ADDR(*(uint64_t *)pml4) == PML4E(0)); uint64_t *pdp, *pd, *pt; size_t i, j, k; for (i = 0; i < NUM_ENTRIES; i++) { pdp = (uint64_t *)(PT_ADDR(*(uint64_t *)pml4)) + i; if (!(*pdp & 0x1)) continue; if (dbg) { printk("PDP(%d) is at ", i); printk("0x%llx ", pdp); printk("should be 0x%llx\n", PDP(i)); } assert((uint32_t)pdp == PDP(i)); if (dbg) { printk("PDPE(%d) is at ", i); printk("0x%llx ", PT_ADDR(*pdp)); printk("should be 0x%llx\n", PDPE(i)); } assert(PT_ADDR(*pdp) == PDPE(i)); for (j = 0; j < NUM_ENTRIES; j++) { pd = ((uint64_t *)PT_ADDR(*pdp)) + j; if (!(*pd & 0x1)) continue; size_t jdx = i * NUM_ENTRIES + j; if (dbg) { printk("PD(%d) is at ", jdx); printk("0x%llx ", pd); printk("should be 0x%llx\n", PD(jdx)); } assert((uint32_t)pd == PD(jdx)); if (dbg) { printk("PDE(%d) is at ", jdx); printk("0x%llx ", PT_ADDR(*pd)); printk("should be 0x%llx\n", PDE(jdx)); } assert(PT_ADDR(*pd) == PDE(jdx)); for (k = 0; k < NUM_ENTRIES; k++) { pt = ((uint64_t *)PT_ADDR(*pd)) + k; if (!(*pt & 0x1)) continue; size_t idx = jdx * NUM_ENTRIES + k; if (dbg) { printk("PT(%d) is at ", idx); printk("0x%llx ", pt); printk("should be 0x%llx\n", PT(idx)); } assert((uint32_t)pt == PT(idx)); if (dbg) { printk("PTE(%d) is at ", idx); printk("0x%llx ", PT_ADDR(*pt)); printk("should be 0x%llx\n", PTE(idx)); } assert(PT_ADDR(*pt) == PTE(idx)); } } } }