int vtd_init(void) { const struct acpi_dmar_table *dmar; const struct acpi_dmar_drhd *drhd; unsigned int pt_levels, num_did; void *reg_base = NULL; unsigned long offset; unsigned long caps; int err; dmar = (struct acpi_dmar_table *)acpi_find_table("DMAR", NULL); if (!dmar) // return -ENODEV; { printk("WARNING: No VT-d support found!\n"); return 0; } if (sizeof(struct acpi_dmar_table) + sizeof(struct acpi_dmar_drhd) > dmar->header.length) return -EIO; drhd = (struct acpi_dmar_drhd *)dmar->remap_structs; if (drhd->header.type != ACPI_DMAR_DRHD) return -EIO; offset = (void *)dmar->remap_structs - (void *)dmar; do { if (drhd->header.length < sizeof(struct acpi_dmar_drhd) || offset + drhd->header.length > dmar->header.length) return -EIO; /* TODO: support multiple segments */ if (drhd->segment != 0) return -EIO; printk("Found DMAR @%p\n", drhd->register_base_addr); reg_base = page_alloc(&remap_pool, 1); if (!reg_base) return -ENOMEM; if (dmar_units == 0) dmar_reg_base = reg_base; else if (reg_base != dmar_reg_base + dmar_units * PAGE_SIZE) return -ENOMEM; err = page_map_create(hv_page_table, drhd->register_base_addr, PAGE_SIZE, (unsigned long)reg_base, PAGE_DEFAULT_FLAGS | PAGE_FLAG_UNCACHED, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS, PAGE_MAP_NON_COHERENT); if (err) return err; caps = mmio_read64(reg_base + VTD_CAP_REG); if (caps & VTD_CAP_SAGAW39) pt_levels = 3; else if (caps & VTD_CAP_SAGAW48) pt_levels = 4; else return -EIO; if (dmar_pt_levels > 0 && dmar_pt_levels != pt_levels) return -EIO; dmar_pt_levels = pt_levels; if (caps & VTD_CAP_CM) return -EIO; /* We only support IOTLB registers withing the first page. */ if (vtd_iotlb_reg_base(reg_base) >= reg_base + PAGE_SIZE) return -EIO; if (mmio_read32(reg_base + VTD_GSTS_REG) & VTD_GSTS_TES) return -EBUSY; num_did = 1 << (4 + (caps & VTD_CAP_NUM_DID_MASK) * 2); if (num_did < dmar_num_did) dmar_num_did = num_did; dmar_units++; offset += drhd->header.length; drhd = (struct acpi_dmar_drhd *) (((void *)drhd) + drhd->header.length); } while (offset < dmar->header.length && drhd->header.type == ACPI_DMAR_DRHD); return 0; }
// check page_insert, page_remove, &c static void page_check(void) { struct Page *pp0, *pp1, *pp2,*pp3,*pp4,*pp5; struct Page * fl; pte_t *ptep, *ptep1; pdpe_t *pdpe; pde_t *pde; void *va; int i; uintptr_t mm1, mm2; pp0 = pp1 = pp2 = pp3 = pp4 = pp5 =0; assert(pp0 = page_alloc(0)); assert(pp1 = page_alloc(0)); assert(pp2 = page_alloc(0)); assert(pp3 = page_alloc(0)); assert(pp4 = page_alloc(0)); assert(pp5 = page_alloc(0)); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(pp3 && pp3 != pp2 && pp3 != pp1 && pp3 != pp0); assert(pp4 && pp4 != pp3 && pp4 != pp2 && pp4 != pp1 && pp4 != pp0); assert(pp5 && pp5 != pp4 && pp5 != pp3 && pp5 != pp2 && pp5 != pp1 && pp5 != pp0); // temporarily steal the rest of the free pages fl = page_free_list; page_free_list = NULL; // should be no free memory assert(!page_alloc(0)); // there is no page allocated at address 0 assert(page_lookup(boot_pml4e, (void *) 0x0, &ptep) == NULL); // there is no free memory, so we can't allocate a page table assert(page_insert(boot_pml4e, pp1, 0x0, 0) < 0); // free pp0 and try again: pp0 should be used for page table page_free(pp0); assert(page_insert(boot_pml4e, pp1, 0x0, 0) < 0); page_free(pp2); page_free(pp3); //cprintf("pp1 ref count = %d\n",pp1->pp_ref); //cprintf("pp0 ref count = %d\n",pp0->pp_ref); //cprintf("pp2 ref count = %d\n",pp2->pp_ref); assert(page_insert(boot_pml4e, pp1, 0x0, 0) == 0); assert((PTE_ADDR(boot_pml4e[0]) == page2pa(pp0) || PTE_ADDR(boot_pml4e[0]) == page2pa(pp2) || PTE_ADDR(boot_pml4e[0]) == page2pa(pp3) )); assert(check_va2pa(boot_pml4e, 0x0) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp0->pp_ref == 1); assert(pp2->pp_ref == 1); //should be able to map pp3 at PGSIZE because pp0 is already allocated for page table assert(page_insert(boot_pml4e, pp3, (void*) PGSIZE, 0) == 0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp3)); assert(pp3->pp_ref == 2); // should be no free memory assert(!page_alloc(0)); // should be able to map pp3 at PGSIZE because it's already there assert(page_insert(boot_pml4e, pp3, (void*) PGSIZE, 0) == 0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp3)); assert(pp3->pp_ref == 2); // pp3 should NOT be on the free list // could happen in ref counts are handled sloppily in page_insert assert(!page_alloc(0)); // check that pgdir_walk returns a pointer to the pte pdpe = KADDR(PTE_ADDR(boot_pml4e[PML4(PGSIZE)])); pde = KADDR(PTE_ADDR(pdpe[PDPE(PGSIZE)])); ptep = KADDR(PTE_ADDR(pde[PDX(PGSIZE)])); assert(pml4e_walk(boot_pml4e, (void*)PGSIZE, 0) == ptep+PTX(PGSIZE)); // should be able to change permissions too. assert(page_insert(boot_pml4e, pp3, (void*) PGSIZE, PTE_U) == 0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp3)); assert(pp3->pp_ref == 2); assert(*pml4e_walk(boot_pml4e, (void*) PGSIZE, 0) & PTE_U); assert(boot_pml4e[0] & PTE_U); // should not be able to map at PTSIZE because need free page for page table assert(page_insert(boot_pml4e, pp0, (void*) PTSIZE, 0) < 0); // insert pp1 at PGSIZE (replacing pp3) assert(page_insert(boot_pml4e, pp1, (void*) PGSIZE, 0) == 0); assert(!(*pml4e_walk(boot_pml4e, (void*) PGSIZE, 0) & PTE_U)); // should have pp1 at both 0 and PGSIZE assert(check_va2pa(boot_pml4e, 0) == page2pa(pp1)); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp1)); // ... and ref counts should reflect this assert(pp1->pp_ref == 2); assert(pp3->pp_ref == 1); // unmapping pp1 at 0 should keep pp1 at PGSIZE page_remove(boot_pml4e, 0x0); assert(check_va2pa(boot_pml4e, 0x0) == ~0); assert(check_va2pa(boot_pml4e, PGSIZE) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp3->pp_ref == 1); // Test re-inserting pp1 at PGSIZE. // Thanks to Varun Agrawal for suggesting this test case. assert(page_insert(boot_pml4e, pp1, (void*) PGSIZE, 0) == 0); assert(pp1->pp_ref); assert(pp1->pp_link == NULL); // unmapping pp1 at PGSIZE should free it page_remove(boot_pml4e, (void*) PGSIZE); assert(check_va2pa(boot_pml4e, 0x0) == ~0); assert(check_va2pa(boot_pml4e, PGSIZE) == ~0); assert(pp1->pp_ref == 0); assert(pp3->pp_ref == 1); #if 0 // should be able to page_insert to change a page // and see the new data immediately. memset(page2kva(pp1), 1, PGSIZE); memset(page2kva(pp2), 2, PGSIZE); page_insert(boot_pgdir, pp1, 0x0, 0); assert(pp1->pp_ref == 1); assert(*(int*)0 == 0x01010101); page_insert(boot_pgdir, pp2, 0x0, 0); assert(*(int*)0 == 0x02020202); assert(pp2->pp_ref == 1); assert(pp1->pp_ref == 0); page_remove(boot_pgdir, 0x0); assert(pp2->pp_ref == 0); #endif // forcibly take pp3 back assert(PTE_ADDR(boot_pml4e[0]) == page2pa(pp3)); boot_pml4e[0] = 0; assert(pp3->pp_ref == 1); page_decref(pp3); // check pointer arithmetic in pml4e_walk page_decref(pp0); page_decref(pp2); va = (void*)(PGSIZE * 100); ptep = pml4e_walk(boot_pml4e, va, 1); pdpe = KADDR(PTE_ADDR(boot_pml4e[PML4(va)])); pde = KADDR(PTE_ADDR(pdpe[PDPE(va)])); ptep1 = KADDR(PTE_ADDR(pde[PDX(va)])); assert(ptep == ptep1 + PTX(va)); // check that new page tables get cleared page_decref(pp4); memset(page2kva(pp4), 0xFF, PGSIZE); pml4e_walk(boot_pml4e, 0x0, 1); pdpe = KADDR(PTE_ADDR(boot_pml4e[0])); pde = KADDR(PTE_ADDR(pdpe[0])); ptep = KADDR(PTE_ADDR(pde[0])); for(i=0; i<NPTENTRIES; i++) assert((ptep[i] & PTE_P) == 0); boot_pml4e[0] = 0; // give free list back page_free_list = fl; // free the pages we took page_decref(pp0); page_decref(pp1); page_decref(pp2); // test mmio_map_region mm1 = (uintptr_t) mmio_map_region(0, 4097); mm2 = (uintptr_t) mmio_map_region(0, 4096); // check that they're in the right region assert(mm1 >= MMIOBASE && mm1 + 8096 < MMIOLIM); assert(mm2 >= MMIOBASE && mm2 + 8096 < MMIOLIM); // check that they're page-aligned assert(mm1 % PGSIZE == 0 && mm2 % PGSIZE == 0); // check that they don't overlap assert(mm1 + 8096 <= mm2); // check page mappingsasdfasd assert(check_va2pa(boot_pml4e, mm1) == 0); assert(check_va2pa(boot_pml4e, mm1+PGSIZE) == PGSIZE); assert(check_va2pa(boot_pml4e, mm2) == 0); assert(check_va2pa(boot_pml4e, mm2+PGSIZE) == ~0); // check permissions assert(*pml4e_walk(boot_pml4e, (void*) mm1, 0) & (PTE_W|PTE_PWT|PTE_PCD)); assert(!(*pml4e_walk(boot_pml4e, (void*) mm1, 0) & PTE_U)); // clear the mappings *pml4e_walk(boot_pml4e, (void*) mm1, 0) = 0; *pml4e_walk(boot_pml4e, (void*) mm1 + PGSIZE, 0) = 0; *pml4e_walk(boot_pml4e, (void*) mm2, 0) = 0; cprintf("check_page() succeeded!\n"); }
// // Check the physical page allocator (page_alloc(),page_free(), // and page_init()). // static void check_page_alloc(void) { struct Page *pp, *pp0, *pp1, *pp2; int nfree; struct Page *fl; char *c; int i; struct Page *p; // if there's a page that shouldn't be on // the free list, try to make sure it // eventually causes trouble. for (pp0 = page_free_list, nfree = 0; pp0; pp0 = pp0->pp_link) { memset(page2kva(pp0), 0x97, PGSIZE); } for (pp0 = page_free_list, nfree = 0; pp0; pp0 = pp0->pp_link) { // check that we didn't corrupt the free list itself assert(pp0 >= pages); assert(pp0 < pages + npages); // check a few pages that shouldn't be on the free list assert(page2pa(pp0) != 0); assert(page2pa(pp0) != IOPHYSMEM); assert(page2pa(pp0) != EXTPHYSMEM - PGSIZE); assert(page2pa(pp0) != EXTPHYSMEM); } // should be able to allocate three pages pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc(0))); assert((pp1 = page_alloc(0))); assert((pp2 = page_alloc(0))); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(page2pa(pp0) < npages*PGSIZE); assert(page2pa(pp1) < npages*PGSIZE); assert(page2pa(pp2) < npages*PGSIZE); // temporarily steal the rest of the free pages fl = page_free_list; page_free_list = 0; // should be no free memory assert(!page_alloc(0)); // free and re-allocate? page_free(pp0); page_free(pp1); page_free(pp2); pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc(0))); assert((pp1 = page_alloc(0))); assert((pp2 = page_alloc(0))); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(!page_alloc(0)); // test flags memset(page2kva(pp0), 1, PGSIZE); page_free(pp0); assert((pp = page_alloc(ALLOC_ZERO))); assert(pp && pp0 == pp); c = page2kva(pp); for (i = 0; i < PGSIZE; i++) assert(c[i] == 0); // give free list back page_free_list = fl; // free the pages we took page_free(pp0); page_free(pp1); page_free(pp2); cprintf("check_page_alloc() succeeded!\n"); }
ret_t ept_construct(void) { kprintf("ept_construct>ENTER\n"); __cpu_supports_ept = vmx_exec_cpu2_supported(VMEXEC_CPU2_ENABLE_EPT); __cpu_supports_vpid = vmx_exec_cpu2_supported(VMEXEC_CPU2_ENABLE_VPID); if (!cpu_supports_ept()) { kprintf("ept_construct>EPT is not supported by this CPU\n"); if (bp->bp_flags & BPF_NO_EPT) { return 0; } return -ENXIO; } u64 msr = get_MSR(MSR_IA32_VMX_EPT_VPID_CAP); if (bit_test(msr, 0)) { kprintf("ept_construct>Execute-only translation is supported\n"); } if (bit_test(msr, 32)) { kprintf("ept_construct>INVVPID is supported\n"); } if (bit_test(msr, 40)) { kprintf("ept_construct>ADDRESS INVVPID is supported\n"); } if (bit_test(msr, 41)) { kprintf("ept_construct>SINGLE INVVPID is supported\n"); } if (bit_test(msr, 42)) { kprintf("ept_construct>ALL INVVPID is supported\n"); } if (bit_test(msr, 43)) { kprintf("ept_construct>SINGLE notGLOBALS INVVPID is supported\n"); } assert(ept_root == NULL); ept_root = (epte_t *)page_alloc(); if (ept_root == NULL) { return -ENOMEM; } un max_pfn = bp->num_physpages; for (un pfn = 0; pfn < max_pfn; pfn++) { if ((pfn & 0xffff) == 0) { kprintf("ept_construct>pfn = 0x%lx\n", pfn); } un gpaddr = pfn << VM_PAGE_SHIFT; ret_t ret = ept_construct_page(gpaddr); if (ret) { return ret; } } kprintf("ept_construct>calling ept_construct_page(0x%lx)\n", get_apic_base_phys()); ret_t ret = ept_construct_page(get_apic_base_phys()); if (ret) { return ret; } kprintf("ept_construct>EXIT\n"); return 0; }
/* Helper function for the ELF loader. Maps the specified segment * of the program header from the given file in to the given address * space with the given memory offset (in pages). On success returns 0, otherwise * returns a negative error code for the ELF loader to return. * Note that since any error returned by this function should * cause the ELF loader to give up, it is acceptable for the * address space to be modified after returning an error. * Note that memoff can be negative */ static int _elf32_map_segment(vmmap_t *map, vnode_t *file, int32_t memoff, const Elf32_Phdr *segment) { uintptr_t addr; if (memoff < 0) { KASSERT(ADDR_TO_PN(segment->p_vaddr) > (uint32_t) -memoff); addr = (uintptr_t)segment->p_vaddr - (uintptr_t)PN_TO_ADDR(-memoff); } else { addr = (uintptr_t)segment->p_vaddr + (uintptr_t)PN_TO_ADDR(memoff); } uint32_t off = segment->p_offset; uint32_t memsz = segment->p_memsz; uint32_t filesz = segment->p_filesz; dbg(DBG_ELF, "Mapping program segment: type %#x, offset %#08x," " vaddr %#08x, filesz %#x, memsz %#x, flags %#x, align %#x\n", segment->p_type, segment->p_offset, segment->p_vaddr, segment->p_filesz, segment->p_memsz, segment->p_flags, segment->p_align); /* check for bad data in the segment header */ if (PAGE_SIZE != segment->p_align) { dbg(DBG_ELF, "ERROR: segment does not have correct alignment\n"); return -ENOEXEC; } else if (filesz > memsz) { dbg(DBG_ELF, "ERROR: segment file size is greater than memory size\n"); return -ENOEXEC; } else if (PAGE_OFFSET(addr) != PAGE_OFFSET(off)) { dbg(DBG_ELF, "ERROR: segment address and offset are not aligned correctly\n"); return -ENOEXEC; } int perms = 0; if (PF_R & segment->p_flags) { perms |= PROT_READ; } if (PF_W & segment->p_flags) { perms |= PROT_WRITE; } if (PF_X & segment->p_flags) { perms |= PROT_EXEC; } if (0 < filesz) { /* something needs to be mapped from the file */ /* start from the starting address and include enough pages to * map all filesz bytes of the file */ uint32_t lopage = ADDR_TO_PN(addr); uint32_t npages = ADDR_TO_PN(addr + filesz - 1) - lopage + 1; off_t fileoff = (off_t)PAGE_ALIGN_DOWN(off); int ret; if (!vmmap_is_range_empty(map, lopage, npages)) { dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); return -ENOEXEC; } else if (0 > (ret = vmmap_map(map, file, lopage, npages, perms, MAP_PRIVATE | MAP_FIXED, fileoff, 0, NULL))) { return ret; } } if (memsz > filesz) { /* there is left over memory in the segment which must * be initialized to 0 (anonymously mapped) */ uint32_t lopage = ADDR_TO_PN(addr + filesz); uint32_t npages = ADDR_TO_PN(PAGE_ALIGN_UP(addr + memsz)) - lopage; int ret; if (npages > 1 && !vmmap_is_range_empty(map, lopage + 1, npages - 1)) { dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); return -ENOEXEC; } else if (0 > (ret = vmmap_map(map, NULL, lopage, npages, perms, MAP_PRIVATE | MAP_FIXED, 0, 0, NULL))) { return ret; } else if (!PAGE_ALIGNED(addr + filesz) && filesz > 0) { /* In this case, we have accidentally zeroed too much of memory, as * we zeroed all memory in the page containing addr + filesz. * However, the remaining part of the data is not a full page, so we * should not just map in another page (as there could be garbage * after addr+filesz). For instance, consider the data-bss boundary * (c.f. Intel x86 ELF supplement pp. 82). * To fix this, we need to read in the contents of the file manually * and put them at that user space addr in the anon map we just * added. */ void *buf; if (NULL == (buf = page_alloc())) return -ENOMEM; if (!(0 > (ret = file->vn_ops->read(file, (off_t) PAGE_ALIGN_DOWN(off + filesz), buf, PAGE_OFFSET(addr + filesz))))) { ret = vmmap_write(map, PAGE_ALIGN_DOWN(addr + filesz), buf, PAGE_OFFSET(addr + filesz)); } page_free(buf); return ret; } } return 0; }
// check page_insert, page_remove, &c static void check_page(void) { struct PageInfo *pp, *pp0, *pp1, *pp2; struct PageInfo *fl; pte_t *ptep, *ptep1; void *va; int i; extern pde_t entry_pgdir[]; // should be able to allocate three pages pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc(0))); assert((pp1 = page_alloc(0))); assert((pp2 = page_alloc(0))); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); // temporarily steal the rest of the free pages fl = page_free_list; page_free_list = 0; // should be no free memory assert(!page_alloc(0)); // there is no page allocated at address 0 assert(page_lookup(kern_pgdir, (void*)0x0, &ptep) == NULL); // there is no free memory, so we can't allocate a page table assert(page_insert(kern_pgdir, pp1, 0x0, PTE_W) < 0); // free pp0 and try again: pp0 should be used for page table page_free(pp0); assert(page_insert(kern_pgdir, pp1, 0x0, PTE_W) == 0); assert(PTE_ADDR(kern_pgdir[0]) == page2pa(pp0)); assert(check_va2pa(kern_pgdir, 0x0) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp0->pp_ref == 1); // should be able to map pp2 at PGSIZE because pp0 is already allocated for page table assert(page_insert(kern_pgdir, pp2, (void*)PGSIZE, PTE_W) == 0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp2)); assert(pp2->pp_ref == 1); // should be no free memory assert(!page_alloc(0)); // should be able to map pp2 at PGSIZE because it's already there assert(page_insert(kern_pgdir, pp2, (void*)PGSIZE, PTE_W) == 0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp2)); assert(pp2->pp_ref == 1); // pp2 should NOT be on the free list // could happen in ref counts are handled sloppily in page_insert assert(!page_alloc(0)); // check that pgdir_walk returns a pointer to the pte ptep = (pte_t*)KADDR(PTE_ADDR(kern_pgdir[PDX(PGSIZE)])); assert(pgdir_walk(kern_pgdir, (void*)PGSIZE, 0) == ptep+PTX(PGSIZE)); // should be able to change permissions too. assert(page_insert(kern_pgdir, pp2, (void*)PGSIZE, PTE_W|PTE_U) == 0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp2)); assert(pp2->pp_ref == 1); assert(*pgdir_walk(kern_pgdir, (void*)PGSIZE, 0) & PTE_U); assert(kern_pgdir[0] & PTE_U); // should be able to remap with fewer permissions assert(page_insert(kern_pgdir, pp2, (void*)PGSIZE, PTE_W) == 0); assert(*pgdir_walk(kern_pgdir, (void*)PGSIZE, 0) & PTE_W); assert(!(*pgdir_walk(kern_pgdir, (void*)PGSIZE, 0) & PTE_U)); // should not be able to map at PTSIZE because need free page for page table assert(page_insert(kern_pgdir, pp0, (void*)PTSIZE, PTE_W) < 0); // insert pp1 at PGSIZE (replacing pp2) assert(page_insert(kern_pgdir, pp1, (void*)PGSIZE, PTE_W) == 0); assert(!(*pgdir_walk(kern_pgdir, (void*)PGSIZE, 0) & PTE_U)); // should have pp1 at both 0 and PGSIZE, pp2 nowhere, ... assert(check_va2pa(kern_pgdir, 0) == page2pa(pp1)); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp1)); // ... and ref counts should reflect this assert(pp1->pp_ref == 2); assert(pp2->pp_ref == 0); // pp2 should be returned by page_alloc assert((pp = page_alloc(0)) && pp == pp2); // unmapping pp1 at 0 should keep pp1 at PGSIZE page_remove(kern_pgdir, 0x0); assert(check_va2pa(kern_pgdir, 0x0) == ~0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp2->pp_ref == 0); // test re-inserting pp1 at PGSIZE assert(page_insert(kern_pgdir, pp1, (void*)PGSIZE, 0) == 0); assert(pp1->pp_ref); assert(pp1->pp_link == NULL); // unmapping pp1 at PGSIZE should free it page_remove(kern_pgdir, (void*)PGSIZE); assert(check_va2pa(kern_pgdir, 0x0) == ~0); assert(check_va2pa(kern_pgdir, PGSIZE) == ~0); assert(pp1->pp_ref == 0); assert(pp2->pp_ref == 0); // so it should be returned by page_alloc assert((pp = page_alloc(0)) && pp == pp1); // should be no free memory assert(!page_alloc(0)); // forcibly take pp0 back assert(PTE_ADDR(kern_pgdir[0]) == page2pa(pp0)); kern_pgdir[0] = 0; assert(pp0->pp_ref == 1); pp0->pp_ref = 0; // check pointer arithmetic in pgdir_walk page_free(pp0); va = (void*)(PGSIZE * NPDENTRIES + PGSIZE); ptep = pgdir_walk(kern_pgdir, va, 1); ptep1 = (pte_t*)KADDR(PTE_ADDR(kern_pgdir[PDX(va)])); assert(ptep == ptep1 + PTX(va)); kern_pgdir[PDX(va)] = 0; pp0->pp_ref = 0; // check that new page tables get cleared memset(page2kva(pp0), 0xFF, PGSIZE); page_free(pp0); pgdir_walk(kern_pgdir, 0x0, 1); ptep = (pte_t*)page2kva(pp0); for (i = 0; i < NPTENTRIES; i++) assert((ptep[i] & PTE_P) == 0); kern_pgdir[0] = 0; pp0->pp_ref = 0; // give free list back page_free_list = fl; // free the pages we took page_free(pp0); page_free(pp1); page_free(pp2); cprintf("check_page() succeeded!\n"); }
// // Set up the initial program binary, stack, and processor flags // for a user process. // // This function loads all loadable segments from the ELF binary image // into the environment's user memory, starting at the appropriate // virtual addresses indicated in the ELF program header. // It also clears to zero any portions of these segments // that are marked in the program header as being mapped // but not actually present in the ELF file -- i.e., the program's bss section. // // Finally, this function maps one page for the program's initial stack. // // load_elf panics if it encounters problems. // - How might load_elf fail? What might be wrong with the given input? // static void load_elf(struct Env *e, uint8_t *binary, size_t size) { struct Elf *elf = (struct Elf *) binary; // Load each program segment into environment 'e's virtual memory // at the address specified in the ELF section header. // Only load segments with ph->p_type == ELF_PROG_LOAD. // Each segment's virtual address can be found in ph->p_va // and its size in memory can be found in ph->p_memsz. // The ph->p_filesz bytes from the ELF binary, starting at // 'binary + ph->p_offset', should be copied to virtual address // ph->p_va. Any remaining memory bytes should be cleared to zero. // (The ELF header should have ph->p_filesz <= ph->p_memsz.) // Use functions from the previous lab to allocate and map pages. // // All page protection bits should be user read/write for now. // ELF segments are not necessarily page-aligned, but you can // assume for this function that no two segments will touch // the same virtual page. // // You may find a function like segment_alloc useful. // // Loading the segments is much simpler if you can move data // directly into the virtual addresses stored in the ELF binary. // So which page directory should be in force during // this function? // // All this is very similar to what our boot loader does, except the // boot loader reads the code from disk and doesn't check whether // segments are loadable. Take a look at boot/main.c to get ideas. // // You must also store the program's entry point somewhere, // to make sure that the environment starts executing at that point. // See env_run() and env_iret() below. // LAB 3: Your code here. lcr3(PADDR(e->env_pgdir)); if (elf->e_magic != ELF_MAGIC) panic("Invalid Elf Magic"); struct Proghdr *ph = (struct Proghdr *) ((uint8_t *) elf + elf->e_phoff); int ph_num = elf->e_phnum; // iterate over all program headers for (; --ph_num >= 0; ph++) if (ph->p_type == ELF_PROG_LOAD) { segment_alloc(e, ph->p_va, ph->p_memsz); // copy data from binary to address space memmove((void *)ph->p_va, binary + ph->p_offset, ph->p_filesz); } // set entry point for new env e->env_tf.tf_eip = elf->e_entry; e->env_tf.tf_esp = USTACKTOP; // Now map one page for the program's initial stack // at virtual address USTACKTOP - PGSIZE. // (What should the permissions be?) struct Page *p; if ((p = page_alloc()) == NULL || page_insert(e->env_pgdir, p, USTACKTOP-PGSIZE, PTE_U|PTE_W|PTE_P)) panic("segment_alloc: Can't allocate page"); memset(page2kva(p), 0, PGSIZE); }
static vm_map_t do_fork(vm_map_t org_map) { vm_map_t new_map; struct region *tmp, *src, *dest; int map_type; if ((new_map = vm_create()) == NULL) return NULL; /* * Copy all regions */ tmp = &new_map->head; src = &org_map->head; /* * Copy top region */ *tmp = *src; tmp->next = tmp->prev = tmp; if (src == src->next) /* Blank memory ? */ return new_map; do { ASSERT(src != NULL); ASSERT(src->next != NULL); if (src == &org_map->head) { dest = tmp; } else { /* Create new region struct */ dest = kmem_alloc(sizeof(*dest)); if (dest == NULL) return NULL; *dest = *src; /* memcpy */ dest->prev = tmp; dest->next = tmp->next; tmp->next->prev = dest; tmp->next = dest; tmp = dest; } if (src->flags == REG_FREE) { /* * Skip free region */ } else { /* Check if the region can be shared */ if (!(src->flags & REG_WRITE) && !(src->flags & REG_MAPPED)) { dest->flags |= REG_SHARED; } if (!(dest->flags & REG_SHARED)) { /* Allocate new physical page. */ dest->phys = page_alloc(src->size); if (dest->phys == 0) return NULL; /* Copy source page */ memcpy(phys_to_virt(dest->phys), phys_to_virt(src->phys), src->size); } /* Map the region to virtual address */ if (dest->flags & REG_WRITE) map_type = PG_WRITE; else map_type = PG_READ; if (mmu_map(new_map->pgd, dest->phys, dest->addr, dest->size, map_type)) return NULL; } src = src->next; } while (src != &org_map->head); /* * No error. Now, link all shared regions */ dest = &new_map->head; src = &org_map->head; do { if (dest->flags & REG_SHARED) { src->flags |= REG_SHARED; dest->sh_prev = src; dest->sh_next = src->sh_next; src->sh_next->sh_prev = dest; src->sh_next = dest; } dest = dest->next; src = src->next; } while (src != &org_map->head); return new_map; }
// // Set up the initial program binary, stack, and processor flags // for a user process. // This function is ONLY called during kernel initialization, // before running the first user-mode environment. // // This function loads all loadable segments from the ELF binary image // into the environment's user memory, starting at the appropriate // virtual addresses indicated in the ELF program header. // At the same time it clears to zero any portions of these segments // that are marked in the program header as being mapped // but not actually present in the ELF file - i.e., the program's bss section. // // All this is very similar to what our boot loader does, except the boot // loader also needs to read the code from disk. Take a look at // boot/main.c to get ideas. // // Finally, this function maps one page for the program's initial stack. // // load_icode panics if it encounters problems. // - How might load_icode fail? What might be wrong with the given input? // // static void load_icode(struct Env *e, uint8_t *binary, size_t size) { // Hints: // Load each program segment into virtual memory // at the address specified in the ELF section header. // You should only load segments with ph->p_type == ELF_PROG_LOAD. // Each segment's virtual address can be found in ph->p_va // and its size in memory can be found in ph->p_memsz. // The ph->p_filesz bytes from the ELF binary, starting at // 'binary + ph->p_offset', should be copied to virtual address // ph->p_va. Any remaining memory bytes should be cleared to zero. // (The ELF header should have ph->p_filesz <= ph->p_memsz.) /* p_filesz <= ph->memsz ,sunus*/ // Use functions from the previous lab to allocate and map pages. // // All page protection bits should be user read/write for now. // ELF segments are not necessarily page-aligned, but you can // assume for this function that no two segments will touch // the same virtual page. // // You may find a function like segment_alloc useful. // // Loading the segments is much simpler if you can move data // directly into the virtual addresses stored in the ELF binary. // So which page directory should be in force during // this function? // // You must also do something with the program's entry point, // to make sure that the environment starts executing there. // What? (See env_run() and env_pop_tf() below.) // LAB 3: Your code here. // Now map one page for the program's initial stack // at virtual address USTACKTOP - PGSIZE. // LAB 3: Your code here. // You must also do something with the program's entry point, /* TO BE CODED!*/ /* DONE, LINE 328,338 and 339 */ // to make sure that the environment starts executing there. /* DEC 10,2010 */ // What? (See env_run() and env_pop_tf() below.) /* sunus */ // LAB 3: Your code here. //DEC 09,2010 sunus struct Proghdr *ph,*eph; struct Elf *env_elf; struct Page *pstack; env_elf = (struct Elf *)binary; assert(env_elf->e_magic == ELF_MAGIC); ph = (struct Proghdr *)((uint8_t *)binary + env_elf->e_phoff); eph = ph + env_elf->e_phnum; lcr3(e->env_cr3); // we will use env_cr3 for a little while :D for( ; ph < eph ; ph++) { if(ph->p_type == ELF_PROG_LOAD) { segment_alloc(e, (void *)ph->p_va,ph->p_memsz); memmove((void *)ph->p_va, (void *)(binary + ph->p_offset), ph->p_filesz); memset(((void *)ph->p_va + ph->p_filesz), 0, (ph->p_memsz - ph->p_filesz)); // .bss matters } } lcr3(boot_cr3); // restore boot_cr3 e->env_tf.tf_eip = env_elf->e_entry; // Now map one page for the program's initial stack // at virtual address USTACKTOP - PGSIZE. // LAB 3: Your code here. //DEC 10,2010,sunus assert(page_alloc(&pstack) == 0); assert(page_insert(e->env_pgdir, pstack,(void *)(USTACKTOP - PGSIZE), PTE_U|PTE_W) == 0); return ; }
// // Set up the initial program binary, stack, and processor flags // for a user process. // This function is ONLY called during kernel initialization, // before running the first user-mode environment. // // This function loads all loadable segments from the ELF binary image // into the environment's user memory, starting at the appropriate // virtual addresses indicated in the ELF program header. // At the same time it clears to zero any portions of these segments // that are marked in the program header as being mapped // but not actually present in the ELF file - i.e., the program's bss section. // // All this is very similar to what our boot loader does, except the boot // loader also needs to read the code from disk. Take a look at // boot/main.c to get ideas. // // Finally, this function maps one page for the program's initial stack. // // load_icode panics if it encounters problems. // - How might load_icode fail? What might be wrong with the given input? // static void load_icode(struct Env *e, uint8_t *binary, size_t size) { // Hints: // Load each program segment into virtual memory // at the address specified in the ELF section header. // You should only load segments with ph->p_type == ELF_PROG_LOAD. // Each segment's virtual address can be found in ph->p_va // and its size in memory can be found in ph->p_memsz. // The ph->p_filesz bytes from the ELF binary, starting at // 'binary + ph->p_offset', should be copied to virtual address // ph->p_va. Any remaining memory bytes should be cleared to zero. // (The ELF header should have ph->p_filesz <= ph->p_memsz.) // Use functions from the previous lab to allocate and map pages. // // All page protection bits should be user read/write for now. // ELF segments are not necessarily page-aligned, but you can // assume for this function that no two segments will touch // the same virtual page. // // You may find a function like segment_alloc useful. // // Loading the segments is much simpler if you can move data // directly into the virtual addresses stored in the ELF binary. // So which page directory should be in force during // this function? // // Hint: // You must also do something with the program's entry point, // to make sure that the environment starts executing there. // What? (See env_run() and env_pop_tf() below.) // LAB 3: Your code here. //cprintf("Begin to load icode\n"); struct Proghdr *ph, *eph; ph = (struct Proghdr *) (binary + ((struct Elf *)binary)->e_phoff); eph = ph + ((struct Elf *)binary)->e_phnum; lcr3(e->env_cr3); while (ph < eph) { if(ph->p_type == ELF_PROG_LOAD) { segment_alloc(e, (void*)ph->p_va,ph->p_memsz); memcpy((void *)ph->p_va, binary + ph->p_offset, ph->p_filesz); memset((void *)(ph->p_va + ph->p_filesz), 0x0, ph->p_memsz - ph->p_filesz); } ph++; } //lcr3(boot_cr3); //cprintf("segment copy success\n"); // Now map one page for the program's initial stack // at virtual address USTACKTOP - PGSIZE. // LAB 3: Your code here. struct Page * user_stack; if(page_alloc(&user_stack) == -E_NO_MEM) panic("No memory to alloc for user stack"); page_insert(e->env_pgdir, user_stack, (void *)(USTACKTOP - PGSIZE),PTE_W|PTE_U|PTE_P); e->env_tf.tf_eip = ((struct Elf*)binary)->e_entry; }
static int do_attribute(vm_map_t map, void *addr, int attr) { struct region *reg; int new_flags = 0; void *old_addr, *new_addr = NULL; int map_type; addr = (void *)PAGE_TRUNC(addr); /* * Find the target region. */ reg = region_find(&map->head, addr, 1); if (reg == NULL || reg->addr != addr || (reg->flags & REG_FREE)) { return EINVAL; /* not allocated */ } /* * The attribute of the mapped region can not be changed. */ if (reg->flags & REG_MAPPED) return EINVAL; /* * Check new and old flag. */ if (reg->flags & REG_WRITE) { if (!(attr & VMA_WRITE)) new_flags = REG_READ; } else { if (attr & VMA_WRITE) new_flags = REG_READ | REG_WRITE; } if (new_flags == 0) return 0; /* same attribute */ map_type = (new_flags & REG_WRITE) ? PG_WRITE : PG_READ; /* * If it is shared region, duplicate it. */ if (reg->flags & REG_SHARED) { old_addr = reg->phys; /* Allocate new physical page. */ if ((new_addr = page_alloc(reg->size)) == 0) return ENOMEM; /* Copy source page */ memcpy(phys_to_virt(new_addr), phys_to_virt(old_addr), reg->size); /* Map new region */ if (mmu_map(map->pgd, new_addr, reg->addr, reg->size, map_type)) { page_free(new_addr, reg->size); return ENOMEM; } reg->phys = new_addr; /* Unlink from shared list */ reg->sh_prev->sh_next = reg->sh_next; reg->sh_next->sh_prev = reg->sh_prev; if (reg->sh_prev == reg->sh_next) reg->sh_prev->flags &= ~REG_SHARED; reg->sh_next = reg->sh_prev = reg; } else { if (mmu_map(map->pgd, reg->phys, reg->addr, reg->size, map_type)) return ENOMEM; } reg->flags = new_flags; return 0; }
void build_process_address_space(struct PCB* process) { uint64_t entry = 0; uint64_t e_rsp = UXSTACKTOP; struct page* page_temp; //struct vma* vma_temp; //int ab=0; //Allocate for user pml4 table and initialize it //Since its in user space, will already be mapped. if(!process->pml4) { page_temp = page_alloc(); if(page_temp->ref_count==1) printf("PANIC\n"); memset((void *)getVA(page_temp),0,PAGE_SIZE); process->cr3 = (uint64_t *)getPA(page_temp); process->pml4 = (uint64_t *)getVA(page_temp); //Copy 1 entry from kernel_pml4 to this pml4 *(process->pml4 + PML4OFF(KERNBASE)) = *(kernel_pml4 + PML4OFF(KERNBASE)); } //printf("K: %x , U: %x\n", *(kernel_pml4 + PML4OFF(KERNBASE)), *(process->pml4 + PML4OFF(KERNBASE))); //New cr3 //loadcr3((void *)process->cr3); //ELF entry = getELF(process -> fileName, process); //printf("entry = %x\n", entry); (process -> reg).rip = entry; add_heap_vma(process); add_stack_vma(process); //AADY: 30 Apr if(process->processID < 0) { process -> processID = generate_pid(); e_rsp = UXSTACKTOP - (process->processID * PAGE_SIZE); (process -> ersp) = e_rsp; process -> hasRan = 0; //Already done in pcb_alloc, a safety check here. } /* //TEMP: 26th APR //ASM to set up tss __asm__ __volatile__("movq %%rsp, %0; \ movq $0x28,%%rax; \ ltr %%ax;" : "=r" (tss.rsp0) : : "rax"); //tss.rsp0 = process->ersp; //ASM to set up tss __asm__ __volatile__("movq $0x28,%%rax; \ ltr %%ax;" : : : "rax"); //printf("tss setting: %x\n",tss.rsp0); */ //switch_to_user_space(entry); }
int vm_fault(int faulttype, vaddr_t faultaddress) { bool lock = false; // Indicates if lock was aquired in "this" function //int spl = splhigh(); // bool lock = get_coremap_lock(); // DEBUG(DB_VM,"F:%p\n",(void*) faultaddress); struct addrspace *as = curthread->t_addrspace; //We ALWAYS update TLB with writable bits ASAP. So this means a fault. if(faulttype == VM_FAULT_READONLY && as->use_permissions) { // DEBUG(DB_VM, "NOT ALLOWED\n"); //splx(spl); return EFAULT; } //Null Pointer if(faultaddress == 0x0) { //splx(spl); return EFAULT; } //Align the fault address to a page (4k) boundary. faultaddress &= PAGE_FRAME; //Make sure address is valid if(faultaddress >= 0x80000000) { //splx(spl); return EFAULT; } /*If we're trying to access a region after the end of the heap but * before the stack, that's invalid (unless load_elf is running) */ if(as->loadelf_done && faultaddress < USER_STACK_LIMIT && faultaddress > as->heap_end) { //splx(spl); return EFAULT; } //Translate.... struct page_table *pt = pgdir_walk(as,faultaddress,false); int pt_index = VA_TO_PT_INDEX(faultaddress); int pfn = PTE_TO_PFN(pt->table[pt_index]); int permissions = PTE_TO_PERMISSIONS(pt->table[pt_index]); int swapped = PTE_TO_LOCATION(pt->table[pt_index]); struct page *page = NULL; /*If the PFN is 0, we might need to dynamically allocate on the stack or the heap */ if(pfn == 0) { //Stack if(faultaddress < as->stack && faultaddress > USER_STACK_LIMIT) { as->stack -= PAGE_SIZE; lock = get_coremap_lock(); page = page_alloc(as,as->stack, PF_RW); release_coremap_lock(lock); } //Heap else if(faultaddress < as->heap_end && faultaddress >= as->heap_start) { lock = get_coremap_lock(); page = page_alloc(as,faultaddress, PF_RW); release_coremap_lock(lock); } //Static Segment(s) else if(faultaddress < as->heap_start && faultaddress >= as->static_start) { panic("code not loaded: %p",(void*) faultaddress); //TODO // page = page_alloc(as,faultaddress,PF_) } else { //splx(spl); return EFAULT; } } /*We grew the stack and/or heap dynamically. Try translating again */ pt = pgdir_walk(as,faultaddress,false); pt_index = VA_TO_PT_INDEX(faultaddress); pfn = PTE_TO_PFN(pt->table[pt_index]); permissions = PTE_TO_PERMISSIONS(pt->table[pt_index]); swapped = PTE_TO_LOCATION(pt->table[pt_index]); /* If we're swapped out, time to do some extra stuff. */ while(swapped == PTE_SWAPPING) { // Busy wait for the swap to complete, since we cannot sleep in an interrupt thread_yield(); pfn = PTE_TO_PFN(pt->table[pt_index]); permissions = PTE_TO_PERMISSIONS(pt->table[pt_index]); swapped = PTE_TO_LOCATION(pt->table[pt_index]); } // Swap completed and page is now in memory or on disk; if disk, bring it back to memory if(swapped == PTE_SWAP) { //bool lock = get_coremap_lock(); //TODO get the page back in to ram. //Does this work? // DEBUG(DB_SWAP,"PTE (vmfault)1:%p\n",(void*) pt->table[pt_index]); lock = get_coremap_lock(); page = page_alloc(as,faultaddress,permissions); /* Page now has a home in RAM. But set the swap bit to 1 so we can swap the page in*/ pt->table[pt_index] |= PTE_SWAP; // DEBUG(DB_SWAP,"PTE (vmfault)2:%p\n",(void*) pt->table[pt_index]); swapin_page(as,faultaddress,page); release_coremap_lock(lock); /* Page was swapped back in. Re-translate */ pt = pgdir_walk(as,faultaddress,false); pt_index = VA_TO_PT_INDEX(faultaddress); pfn = PTE_TO_PFN(pt->table[pt_index]); permissions = PTE_TO_PERMISSIONS(pt->table[pt_index]); swapped = PTE_TO_LOCATION(pt->table[pt_index]); //release_coremap_lock(lock); } // DEBUG(DB_VM, "PTERWX:%d\n",permissions); //Page is writable if permissions say so or if we're ignoring permissions. bool writable = (permissions & PF_W) || !(as->use_permissions); //This time, it shouldn't be 0. //Static Segment(s) // if(faultaddress < as->heap_start && faultaddress >= as->static_start) // { // panic("code not loaded: %p",(void*) faultaddress); // //TODO // // page = page_alloc(as,faultaddress,PF_) // } KASSERT(pfn > 0); KASSERT(pfn <= PAGE_SIZE * (int) page_count); uint32_t ehi,elo; /* Disable interrupts on this CPU while frobbing the TLB. */ lock = get_coremap_spinlock(); int spl = splhigh(); // What does it mean for the page to be NULL in this case? if(page != NULL) { // DEBUG(DB_SWAP, "Page : %p\n", &page); KASSERT(page->state != SWAPPINGOUT); page->state = DIRTY; } for (int i=0; i<NUM_TLB; i++) { tlb_read(&ehi, &elo, i); if (elo & TLBLO_VALID) { // kprintf("Index %d in use\n",i); continue; } ehi = faultaddress; elo = pfn | TLBLO_VALID; if(writable) { elo |= TLBLO_DIRTY; } // kprintf("Writing TLB Index %d\n",i); // DEBUG(DB_VM, "dumbvm: 0x%x -> 0x%x\n", faultaddress, pfn); tlb_write(ehi, elo, i); splx(spl); release_coremap_spinlock(lock); return 0; } /*If we get here, TLB was full. Kill an entry, round robin style*/ ehi = faultaddress; elo = pfn | TLBLO_VALID; if(writable) { elo |= TLBLO_DIRTY; } tlb_write(ehi,elo,tlb_offering); tlb_offering++; if(tlb_offering == NUM_TLB) { //At the end of the TLB. Start back at 0 again. tlb_offering = 0; } splx(spl); release_coremap_spinlock(lock); return 0; }
// // Set up the initial program binary, stack, and processor flags // for a user process. // This function is ONLY called during kernel initialization, // before running the first user-mode environment. // // This function loads all loadable segments from the ELF binary image // into the environment's user memory, starting at the appropriate // virtual addresses indicated in the ELF program header. // At the same time it clears to zero any portions of these segments // that are marked in the program header as being mapped // but not actually present in the ELF file - i.e., the program's bss section. // // All this is very similar to what our boot loader does, except the boot // loader also needs to read the code from disk. Take a look at // boot/main.c to get ideas. // // Finally, this function maps one page for the program's initial stack. // // load_icode panics if it encounters problems. // - How might load_icode fail? What might be wrong with the given input? // static void load_icode(struct Env *e, uint8_t *binary, size_t size) { // Hints: // Load each program segment into virtual memory // at the address specified in the ELF section header. // You should only load segments with ph->p_type == ELF_PROG_LOAD. // Each segment's virtual address can be found in ph->p_va // and its size in memory can be found in ph->p_memsz. // The ph->p_filesz bytes from the ELF binary, starting at // 'binary + ph->p_offset', should be copied to virtual address // ph->p_va. Any remaining memory bytes should be cleared to zero. // (The ELF header should have ph->p_filesz <= ph->p_memsz.) // Use functions from the previous lab to allocate and map pages. // // All page protection bits should be user read/write for now. // ELF segments are not necessarily page-aligned, but you can // assume for this function that no two segments will touch // the same virtual page. // // You may find a function like segment_alloc useful. // // Loading the segments is much simpler if you can move data // directly into the virtual addresses stored in the ELF binary. // So which page directory should be in force during // this function? // // Hint: // You must also do something with the program's entry point, // to make sure that the environment starts executing there. // What? (See env_run() and env_pop_tf() below.) // LAB 3: Your code here. struct Elf *env_elf; struct Proghdr *ph; struct Page *pg; int i; unsigned int old_cr3; env_elf = (struct Elf *)binary; old_cr3 = rcr3(); lcr3(PADDR(e->env_pgdir)); if( env_elf->e_magic != ELF_MAGIC) return; ph = (struct Proghdr*)((unsigned int)env_elf + env_elf->e_phoff); for(i=0; i < env_elf->e_phnum;i++){ if(ph->p_type == ELF_PROG_LOAD){ segment_alloc(e,(void *)ph->p_va, ph->p_memsz); memset((void *)ph->p_va, 0, ph->p_memsz); memmove((void *)ph->p_va, (void *)((unsigned int)env_elf + ph->p_offset), ph->p_filesz); } ph++; } e->env_tf.tf_eip = env_elf->e_entry; // Now map one page for the program's initial stack // at virtual address USTACKTOP - PGSIZE. // LAB 3: Your code here. if(page_alloc(&pg) != 0){ cprintf("load_icode page_alloc fail!!\n"); return; } page_insert(e->env_pgdir, pg, (void *)(USTACKTOP - PGSIZE), PTE_U | PTE_W); lcr3(old_cr3); }
static inline grow_page *grow_page_new() { grow_page *res = malloc(sizeof(grow_page)); res->page = page_alloc(0); res->next = 0; return res; }
// check page_insert, page_remove, &c static void check_page(void) { struct PageInfo *pp, *pp0, *pp1, *pp2; struct PageInfo *fl; pte_t *ptep, *ptep1; void *va; uintptr_t mm1, mm2; int i; extern pde_t entry_pgdir[]; // should be able to allocate three pages pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc(0))); assert((pp1 = page_alloc(0))); assert((pp2 = page_alloc(0))); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); // temporarily steal the rest of the free pages fl = page_free_list; page_free_list = 0; // should be no free memory assert(!page_alloc(0)); // there is no page allocated at address 0 assert(page_lookup(kern_pgdir, (void *) 0x0, &ptep) == NULL); // there is no free memory, so we can't allocate a page table assert(page_insert(kern_pgdir, pp1, 0x0, PTE_W) < 0); // free pp0 and try again: pp0 should be used for page table page_free(pp0); assert(page_insert(kern_pgdir, pp1, 0x0, PTE_W) == 0); assert(PTE_ADDR(kern_pgdir[0]) == page2pa(pp0)); assert(check_va2pa(kern_pgdir, 0x0) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp0->pp_ref == 1); // should be able to map pp2 at PGSIZE because pp0 is already allocated for page table assert(page_insert(kern_pgdir, pp2, (void*) PGSIZE, PTE_W) == 0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp2)); assert(pp2->pp_ref == 1); // should be no free memory assert(!page_alloc(0)); // should be able to map pp2 at PGSIZE because it's already there assert(page_insert(kern_pgdir, pp2, (void*) PGSIZE, PTE_W) == 0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp2)); assert(pp2->pp_ref == 1); // pp2 should NOT be on the free list // could happen in ref counts are handled sloppily in page_insert //cprintf("p2: %p, free_list %p, p2 ref: %d", pp2, page_free_list, (int)pp2->pp_ref); assert(!page_alloc(0)); // check that pgdir_walk returns a pointer to the pte // 从这里也可以推测出pgdir_walk的功能(因为page table entry的歧义: // 是pointer to page table, 还是pointer of entry in page table) // 给定虚拟地址va, kern_pgdir[PDX(va)]是va二级页表, page table的物理地址。 // 再KADDR一下,就成了page table的虚拟地址,即ptep // ptep + PTX(va)即va在page table中的表项的位置,的虚拟地址 // 这就是pgdir_walk需要返回的。 ptep = (pte_t *) KADDR(PTE_ADDR(kern_pgdir[PDX(PGSIZE)])); assert(pgdir_walk(kern_pgdir, (void*)PGSIZE, 0) == ptep+PTX(PGSIZE)); // should be able to change permissions too. assert(page_insert(kern_pgdir, pp2, (void*) PGSIZE, PTE_W|PTE_U) == 0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp2)); assert(pp2->pp_ref == 1); assert(*pgdir_walk(kern_pgdir, (void*) PGSIZE, 0) & PTE_U); assert(kern_pgdir[0] & PTE_U); // should be able to remap with fewer permissions assert(page_insert(kern_pgdir, pp2, (void*) PGSIZE, PTE_W) == 0); assert(*pgdir_walk(kern_pgdir, (void*) PGSIZE, 0) & PTE_W); assert(!(*pgdir_walk(kern_pgdir, (void*) PGSIZE, 0) & PTE_U)); // should not be able to map at PTSIZE because need free page for page table assert(page_insert(kern_pgdir, pp0, (void*) PTSIZE, PTE_W) < 0); // insert pp1 at PGSIZE (replacing pp2) assert(page_insert(kern_pgdir, pp1, (void*) PGSIZE, PTE_W) == 0); assert(!(*pgdir_walk(kern_pgdir, (void*) PGSIZE, 0) & PTE_U)); // should have pp1 at both 0 and PGSIZE, pp2 nowhere, ... assert(check_va2pa(kern_pgdir, 0) == page2pa(pp1)); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp1)); // ... and ref counts should reflect this assert(pp1->pp_ref == 2); assert(pp2->pp_ref == 0); // pp2 should be returned by page_alloc assert((pp = page_alloc(0)) && pp == pp2); // unmapping pp1 at 0 should keep pp1 at PGSIZE page_remove(kern_pgdir, 0x0); assert(check_va2pa(kern_pgdir, 0x0) == ~0); assert(check_va2pa(kern_pgdir, PGSIZE) == page2pa(pp1)); assert(pp1->pp_ref == 1); assert(pp2->pp_ref == 0); // unmapping pp1 at PGSIZE should free it page_remove(kern_pgdir, (void*) PGSIZE); assert(check_va2pa(kern_pgdir, 0x0) == ~0); assert(check_va2pa(kern_pgdir, PGSIZE) == ~0); assert(pp1->pp_ref == 0); assert(pp2->pp_ref == 0); // so it should be returned by page_alloc assert((pp = page_alloc(0)) && pp == pp1); // should be no free memory assert(!page_alloc(0)); // forcibly take pp0 back assert(PTE_ADDR(kern_pgdir[0]) == page2pa(pp0)); kern_pgdir[0] = 0; assert(pp0->pp_ref == 1); pp0->pp_ref = 0; // check pointer arithmetic in pgdir_walk page_free(pp0); va = (void*)(PGSIZE * NPDENTRIES + PGSIZE); ptep = pgdir_walk(kern_pgdir, va, 1); ptep1 = (pte_t *) KADDR(PTE_ADDR(kern_pgdir[PDX(va)])); assert(ptep == ptep1 + PTX(va)); kern_pgdir[PDX(va)] = 0; pp0->pp_ref = 0; // check that new page tables get cleared memset(page2kva(pp0), 0xFF, PGSIZE); page_free(pp0); pgdir_walk(kern_pgdir, 0x0, 1); ptep = (pte_t *) page2kva(pp0); for(i=0; i<NPTENTRIES; i++) assert((ptep[i] & PTE_P) == 0); kern_pgdir[0] = 0; pp0->pp_ref = 0; // give free list back page_free_list = fl; // free the pages we took page_free(pp0); page_free(pp1); page_free(pp2); // test mmio_map_region mm1 = (uintptr_t) mmio_map_region(0, 4097); mm2 = (uintptr_t) mmio_map_region(0, 4096); // check that they're in the right region assert(mm1 >= MMIOBASE && mm1 + 8096 < MMIOLIM); assert(mm2 >= MMIOBASE && mm2 + 8096 < MMIOLIM); // check that they're page-aligned assert(mm1 % PGSIZE == 0 && mm2 % PGSIZE == 0); // check that they don't overlap assert(mm1 + 8096 <= mm2); // check page mappings assert(check_va2pa(kern_pgdir, mm1) == 0); assert(check_va2pa(kern_pgdir, mm1+PGSIZE) == PGSIZE); assert(check_va2pa(kern_pgdir, mm2) == 0); assert(check_va2pa(kern_pgdir, mm2+PGSIZE) == ~0); // check permissions assert(*pgdir_walk(kern_pgdir, (void*) mm1, 0) & (PTE_W|PTE_PWT|PTE_PCD)); assert(!(*pgdir_walk(kern_pgdir, (void*) mm1, 0) & PTE_U)); // clear the mappings *pgdir_walk(kern_pgdir, (void*) mm1, 0) = 0; *pgdir_walk(kern_pgdir, (void*) mm1 + PGSIZE, 0) = 0; *pgdir_walk(kern_pgdir, (void*) mm2, 0) = 0; cprintf("check_page() succeeded!\n"); }
// // Check the physical page allocator (page_alloc(), page_free(), // and page_init()). // static void check_page_alloc(void) { struct PageInfo *pp, *pp0, *pp1, *pp2; int nfree; struct PageInfo *fl; char *c; int i; if (!pages) panic("'pages' is a null pointer!"); // check number of free pages for (pp = page_free_list, nfree = 0; pp; pp = pp->pp_link) ++nfree; // should be able to allocate three pages pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc(0))); assert((pp1 = page_alloc(0))); assert((pp2 = page_alloc(0))); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(page2pa(pp0) < npages*PGSIZE); assert(page2pa(pp1) < npages*PGSIZE); assert(page2pa(pp2) < npages*PGSIZE); // temporarily steal the rest of the free pages fl = page_free_list; page_free_list = 0; // should be no free memory assert(!page_alloc(0)); // free and re-allocate? page_free(pp0); page_free(pp1); page_free(pp2); pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc(0))); assert((pp1 = page_alloc(0))); assert((pp2 = page_alloc(0))); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(!page_alloc(0)); // test flags memset(page2kva(pp0), 1, PGSIZE); page_free(pp0); assert((pp = page_alloc(ALLOC_ZERO))); assert(pp && pp0 == pp); c = page2kva(pp); for (i = 0; i < PGSIZE; i++) assert(c[i] == 0); // give free list back page_free_list = fl; // free the pages we took page_free(pp0); page_free(pp1); page_free(pp2); // number of free pages should be the same for (pp = page_free_list; pp; pp = pp->pp_link) --nfree; assert(nfree == 0); cprintf("check_page_alloc() succeeded!\n"); }
// // Check the physical page allocator (page_alloc(), page_free(), // and page_init()). // static void check_page_alloc() { struct Page *pp, *pp0, *pp1, *pp2; int nfree; struct Page *fl; if (!pages) panic("'pages' is a null pointer!"); // Sort the free list so that pages with lower addresses // come first. (The entry_pgdir does not map all pages.) { struct Page **tp[2] = { &pp1, &pp2 }; for (pp0 = page_free_list; pp0; pp0 = pp0->pp_link) { int pagetype = PDX(page2pa(pp0)) >= 4; *tp[pagetype] = pp0; tp[pagetype] = &pp0->pp_link; } *tp[1] = 0; *tp[0] = pp2; page_free_list = pp1; } // check number of free pages for (pp = page_free_list, nfree = 0; pp; pp = pp->pp_link) ++nfree; // should be able to allocate three pages pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc())); assert((pp1 = page_alloc())); assert((pp2 = page_alloc())); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(page2pa(pp0) < npages*PGSIZE); assert(page2pa(pp1) < npages*PGSIZE); assert(page2pa(pp2) < npages*PGSIZE); // temporarily steal the rest of the free pages fl = page_free_list; page_free_list = 0; // should be no free memory assert(!page_alloc()); // free and re-allocate? page_free(pp0); page_free(pp1); page_free(pp2); pp0 = pp1 = pp2 = 0; assert((pp0 = page_alloc())); assert((pp1 = page_alloc())); assert((pp2 = page_alloc())); assert(pp0); assert(pp1 && pp1 != pp0); assert(pp2 && pp2 != pp1 && pp2 != pp0); assert(!page_alloc()); // give free list back page_free_list = fl; // free the pages we took page_free(pp0); page_free(pp1); page_free(pp2); // number of free pages should be the same for (pp = page_free_list; pp; pp = pp->pp_link) --nfree; assert(nfree == 0); cprintf("check_page_alloc() succeeded!\n"); }
/** * This is the first real C function ever called. It performs a lot of * hardware-specific initialization, then creates a pseudo-context to * execute the bootstrap function in. */ void kmain() { GDB_CALL_HOOK(boot); dbg_init(); dbgq(DBG_CORE, "Kernel binary:\n"); dbgq(DBG_CORE, " text: 0x%p-0x%p\n", &kernel_start_text, &kernel_end_text); dbgq(DBG_CORE, " data: 0x%p-0x%p\n", &kernel_start_data, &kernel_end_data); dbgq(DBG_CORE, " bss: 0x%p-0x%p\n", &kernel_start_bss, &kernel_end_bss); page_init(); pt_init(); slab_init(); pframe_init(); acpi_init(); apic_init(); pci_init(); intr_init(); gdt_init(); /* initialize slab allocators */ #ifdef __VM__ anon_init(); shadow_init(); #endif vmmap_init(); proc_init(); kthread_init(); #ifdef __DRIVERS__ bytedev_init(); blockdev_init(); #endif void *bstack = page_alloc(); pagedir_t *bpdir = pt_get(); KASSERT(NULL != bstack && "Ran out of memory while booting."); /* This little loop gives gdb a place to synch up with weenix. In the * past the weenix command started qemu was started with -S which * allowed gdb to connect and start before the boot loader ran, but * since then a bug has appeared where breakpoints fail if gdb connects * before the boot loader runs. See * * https://bugs.launchpad.net/qemu/+bug/526653 * * This loop (along with an additional command in init.gdb setting * gdb_wait to 0) sticks weenix at a known place so gdb can join a * running weenix, set gdb_wait to zero and catch the breakpoint in * bootstrap below. See Config.mk for how to set GDBWAIT correctly. * * DANGER: if GDBWAIT != 0, and gdb is not running, this loop will never * exit and weenix will not run. Make SURE the GDBWAIT is set the way * you expect. */ /*while (gdb_wait) ;*/ context_setup(&bootstrap_context, bootstrap, 0, NULL, bstack, PAGE_SIZE, bpdir); context_make_active(&bootstrap_context); panic("\nReturned to kmain()!!!\n"); }
/** @brief Function deals with the page fault * * This is an amazing function. * * @param void * @return void */ void page_fault_handler(void){ int fault_addr = get_cr2(); mutex_lock(&cur_task->pcb_mutex); uint32_t align_addr = fault_addr & PGALIGN_MASK; uint32_t *ptep = NULL; Page *phy_page = NULL; phy_page = page_lookup(cur_task->task_pgdir, align_addr, &ptep); uint32_t pte = *ptep; /** Catch COW page fualt */ if((ptep != NULL) &&(pte & PTE_P) && (pte & PTE_COW)) { mutex_lock(&mtx_m.frame_mutex); if(phy_page->pp_ref == 1) { *ptep = (pte | PTE_W) &(~PTE_COW); mutex_unlock(&mtx_m.frame_mutex); } else{ mutex_unlock(&mtx_m.frame_mutex); if(pte & PTE_RWMARK){ lprintf("ERROR: Cannot access TXT or ro_data area!"); mutex_unlock(&cur_task->pcb_mutex); sys_vanish(); return; } Page *new_page = page_alloc(); if(new_page == NULL){ lprintf("ERROR: No pages for COW in page_fault_handler"); mutex_unlock(&cur_task->pcb_mutex); sys_vanish(); return; } uint32_t *temp_page = smemalign(PAGE_SIZE, PAGE_SIZE); if (temp_page == NULL){ lprintf("ERROR: No memory for temp_page in page_fault_handler!"); mutex_unlock(&cur_task->pcb_mutex); sys_vanish(); return; } /* Copy the physical page to a temporary page in kernel space */ memcpy((void*)temp_page, (void*)align_addr, PAGE_SIZE); if(page_insert(cur_task->task_pgdir, new_page, align_addr, PTE_P | PTE_U | PTE_W) < 0) { lprintf("ERROR: No memory for COW in page_fault_handler"); mutex_unlock(&cur_task->pcb_mutex); sys_vanish(); return; } /* Copy the content to the new mapped physical page */ memcpy((void*)align_addr, (void*)temp_page, PAGE_SIZE); /* Free the temp physical page */ sfree(temp_page, PAGE_SIZE); mutex_lock(&mtx_m.frame_mutex); phy_page->pp_ref--; mutex_unlock(&mtx_m.frame_mutex); } } /** Catch the ZFOD */ else if ((ptep != NULL) &&(pte & PTE_P) && (pte & PTE_ZFOD)) { Page *pg = page_alloc(); if(pg == NULL){ lprintf("ERROR: No pages for ZFOD in page_fault_handler"); mutex_unlock(&cur_task->pcb_mutex); sys_vanish(); return; } uint32_t perm = PTE_P | PTE_U | PTE_W; if(page_insert(cur_task->task_pgdir, pg, align_addr, perm) < 0) { lprintf("ERROR: No memory for ZFOD in page_fault_handler"); mutex_unlock(&cur_task->pcb_mutex); sys_vanish(); return; } bzero((void*)align_addr, PAGE_SIZE); } /* Check if installed swexn handler can fix this up */ else if(cur_thread->swexn_eip != NULL) { mutex_unlock(&cur_task->pcb_mutex); swexn_handler(HAS_ERROR_CODE, SWEXN_CAUSE_PAGEFAULT); return; } else{ mutex_unlock(&cur_task->pcb_mutex); sys_vanish(); return; } mutex_unlock(&cur_task->pcb_mutex); return; }