static void init_test() { pf_count = 0; invlpg(&test); invlpg(&USER_VAR(test)); write_pkru(0); set_cr0_wp(0); }
bool elf_load_segment(page_directory_t* new_dir, unsigned char* src, elf_phdr* seg) { //loadable? if (seg->type != PT_LOAD) { printf_err("Tried to load non-loadable segment"); printk_err("Tried to load non-loadable segment"); return false; } unsigned char* src_base = src + seg->offset; //figure out range to map this binary to in virtual memory uint32_t dest_base = seg->vaddr; uint32_t dest_limit = dest_base + seg->memsz; printf("dest_base %x dest_limit %x\n", dest_base, dest_limit); //alloc enough mem for new task for (uint32_t i = dest_base, page_counter = 0; i <= dest_limit; i += PAGE_SIZE, page_counter++) { page_t* page = get_page(i, 1, new_dir); ASSERT(page, "elf_load_segment couldn't get page in new addrspace at %x\n", i); bool got_frame = alloc_frame(page, 0, 0); ASSERT(got_frame, "elf_load_segment couldn't alloc frame for page %x\n", i); char* pagebuf = kmalloc_a(PAGE_SIZE); page_t* local_page = get_page((uint32_t)pagebuf, 0, page_dir_current()); ASSERT(local_page, "couldn't get local_page!"); int old_frame = local_page->frame; local_page->frame = page->frame; invlpg(pagebuf); //create buffer in current address space, //copy data, //and then map frame into new address space memset(pagebuf, 0, (dest_limit - dest_base)); //only seg->filesz bytes are garuanteed to be in the file! //_not_ memsz //any extra bytes between filesz and memsz should be set to 0, which is done above //memcpy(dest_base, src_base, seg->filesz); memcpy(pagebuf, src_base + (page_counter * PAGE_SIZE), seg->filesz); //now that we've copied the data in the local address space, //get the page in local address space, //and copy backing physical frame data to physical frame of //page in new address space //now that the buffer has been copied, we can safely free the buffer local_page->frame = old_frame; invlpg(pagebuf); kfree(pagebuf); } // Copy data //memset((void*)dest_base, 0, (void*)(dest_limit - dest_base)); return true; }
void lapic_map(paddr_t lapic_base) { int s; vaddr_t va = (vaddr_t)&local_apic; disable_intr(); s = lapic_tpr; /* * Map local apic. If we have a local apic, it's safe to assume * we're on a 486 or better and can use invlpg and non-cacheable PTE's * * Whap the PTE "by hand" rather than calling pmap_kenter_pa because * the latter will attempt to invoke TLB shootdown code just as we * might have changed the value of cpu_number().. */ pmap_pte_set(va, lapic_base, PG_RW | PG_V | PG_N); invlpg(va); #ifdef MULTIPROCESSOR cpu_init_first(); #endif lapic_tpr = s; enable_intr(); }
// // Invalidate a TLB entry, but only if the page tables being // edited are the ones currently in use by the processor. // void tlb_invalidate(pde_t *pgdir, void *va) { // Flush the entry only if we're modifying the current address space. // For now, there is only one address space, so always invalidate. invlpg(va); }
// // Invalidate a TLB entry, but only if the page tables being // edited are the ones currently in use by the processor. // void tlb_invalidate(pde_t *pgdir, void *va) { // Flush the entry only if we're modifying the current address space. if (!curenv || curenv->env_pgdir == pgdir) invlpg(va); }
void refresh_tlb(pgd_t *pgdir, viraddr_t va) { // Flush the entry only if we're modifying the current address space. //if (!curenv || curenv->env_pgdir == pgdir) invlpg((void*)va); int cr3 = rcr3(); lcr3(cr3); }
// // Invalidate a TLB entry, but only if the page tables being // edited are the ones currently in use by the processor. // void tlb_invalidate(pml4e_t *pml4e, void *va) { // Flush the entry only if we're modifying the current address space. assert(pml4e!=NULL); if (!curenv || curenv->env_pml4e == pml4e) invlpg(va); }
void kmm_pgfault(struct trapframe *tf) { // uint64_t err = tf->tf_err; uintptr_t addr = rcr2(); if (addr >= PBASE && addr < PBASE + PSIZE) { pgd_t *pgd = KADDR_DIRECT(PTE_ADDR(rcr3())); pud_t *pud; pmd_t *pmd; pte_t *ptd; /* PHYSICAL ADDRRESS ACCESSING */ if (last_pgd != NULL) { pud = KADDR_DIRECT(PGD_ADDR(last_pgd[PGX(last_addr)])); pmd = KADDR_DIRECT(PUD_ADDR(pud[PUX(last_addr)])); ptd = KADDR_DIRECT(PMD_ADDR(pmd[PMX(last_addr)])); ptd[PTX(last_addr)] = 0; if (ptd == temp_ptd) { pmd[PUX(last_addr)] = 0; if (pmd == temp_pmd) { pud[PUX(last_addr)] = 0; if (pud == temp_pud) last_pgd[PGX(last_addr)] = 0; } if (last_pgd == pgd) { invlpg((void *)last_addr); } } } if (pgd[PGX(last_addr)] == 0) pgd[PGX(last_addr)] = PADDR_DIRECT(temp_pud) | PTE_W | PTE_P; pud = KADDR_DIRECT(PGD_ADDR(pgd[PGX(last_addr)])); if (pud[PUX(last_addr)] == 0) pud[PUX(last_addr)] = PADDR_DIRECT(temp_pmd) | PTE_W | PTE_P; pmd = KADDR_DIRECT(PUD_ADDR(pud[PUX(last_addr)])); if (pmd[PMX(last_addr)] == 0) pmd[PMX(last_addr)] = PADDR_DIRECT(temp_ptd) | PTE_W | PTE_P; ptd = KADDR_DIRECT(PMD_ADDR(pmd[PMX(last_addr)])); ptd[PTX(last_addr)] = PADDR_DIRECT(addr) | PTE_W | PTE_P; last_pgd = pgd; last_addr = addr; /* XXX? */ // invlpg((void *)addr); } }
// // Invalidate a TLB entry, but only if the page tables being // edited are the ones currently in use by the processor. // void tlb_invalidate(pde_t *pgdir, void *va) { // Flush the entry only if we're modifying the current address space. // For now, there is only one address space, so always invalidate. pte_t *tmppte; struct PageInfo *tmp = page_lookup(pgdir, va, &tmppte); if( tmp != NULL) { page_decref(tmp); *tmppte = 0; } invlpg(va); }
// // Invalidate a TLB entry, but only if the page tables being // edited are the ones currently in use by the processor. // void tlb_invalidate(pde_t *pgdir, void *va) { // Flush the entry only if we're modifying the current address space. if (!curenv || curenv->env_pgdir == pgdir) invlpg(va); /* // Flush the entry only if we're modifying the current address space. // For now, there is only one address space, so always invalidate. invlpg(va); //lab3*/ }
void* mappage(uint32 page) { int i; for (i = 0; i < 1024; ++i) { if (isavl(MTMAP, i)) { unavl(MTMAP, i); break; } } TMPMAP[i].base = page; TMPMAP[i].P = 1; void* addr = getvmaddr(MAPINDEX, i); invlpg(addr); return addr; }
void page_remove(pml4e_t *pml4, uintptr_t va) { struct page *p; pte_t *pte; p = page_lookup(pml4, va, &pte); if (p == NULL) // nothing to do return; page_decref(p); *pte = 0; invlpg((void *)va); }
/* Unmap memory previously mapped with table_map(). */ static void table_unmap(void *data, vm_offset_t length) { vm_offset_t va, off; va = (vm_offset_t)data; off = va & PAGE_MASK; length = roundup(length + off, PAGE_SIZE); va &= ~PAGE_MASK; while (length > 0) { pmap_kremove(va); invlpg(va); va += PAGE_SIZE; length -= PAGE_SIZE; } }
void putmmu(uintptr va, uintptr pa, Page *) { uintptr *pte, old; int x; x = splhi(); pte = mmuwalk(m->pml4, va, 0, 1); if(pte == 0) panic("putmmu: bug: va=%#p pa=%#p", va, pa); old = *pte; *pte = pa | PTEVALID|PTEUSER; splx(x); if(old & PTEVALID) invlpg(va); }
void kdp_print_phys(int src) { unsigned int *iptr; int i; *(int *) DMAP2 = 0x63 | (src & 0xfffff000); invlpg((uintptr_t) DADDR2); iptr = (unsigned int *) DADDR2; for (i = 0; i < 100; i++) { kprintf("0x%x ", *iptr++); if ((i % 8) == 0) kprintf("\n"); } kprintf("\n"); *(int *) DMAP2 = 0; }
/* * Map some memory using the crashdump map. 'offset' is an offset in * pages into the crashdump map to use for the start of the mapping. */ static void * table_map(vm_paddr_t pa, int offset, vm_offset_t length) { vm_offset_t va, off; void *data; off = pa & PAGE_MASK; length = roundup(length + off, PAGE_SIZE); pa = pa & PG_FRAME; va = (vm_offset_t)pmap_kenter_temporary(pa, offset) + (offset * PAGE_SIZE); data = (void *)(va + off); length -= PAGE_SIZE; while (length > 0) { va += PAGE_SIZE; pa += PAGE_SIZE; length -= PAGE_SIZE; pmap_kenter(va, pa); invlpg(va); } return (data); }
int page_insert(pml4e_t *pml4, struct page *p, uintptr_t va, unsigned perm) { pte_t *pte = mmap_lookup(pml4, va, 1); if (pte == NULL) // no memory return -1; // remap same page (possible change permissions) if (PTE_ADDR(*pte) == page2pa(p)) { invlpg((void *)va); *pte = page2pa(p) | perm | PTE_P; return 0; } // delete old mapping if exists page_remove(pml4, va); *pte = page2pa(p) | perm | PTE_P; page_incref(p); return 0; }
/* * the real IPI handler. this should be real fast. we may do this once per * scheduler loop so we won't miss any ipi's */ void ipi_handler() { #ifdef __SMP__ if (ipi_pending == 0) return; in_ipih = 1; MP_SPINLOCK_GET(&ipi_spinlock); while(ipi_pending > 0) { u_int ctrl = ipiq[ipi_pending-1].ctrl; switch(ctrl) { case IPI_CTRL_HALT: localapic_disable(); asm volatile("hlt"); break; case IPI_CTRL_TLBFLUSH: { u_int va = *(u_int*)(ipiq[ipi_pending-1].payload); invlpg(va); *(u_int*)(ipiq[ipi_pending-1].payload) = 0; } break; } ipi_pending--; } MP_SPINLOCK_RELEASE(&ipi_spinlock); in_ipih = 0; #endif /* __SMP__ */ }
/* * Function: sva_ghost_fault() * * Description: * Handle page faults of ghost memory pages. * * Inputs: * vaddr - The virtual address of the faulting ghost memory page. * code - The page fault code. * */ void sva_ghost_fault (uintptr_t vaddr, unsigned long code) { /* Old interrupt flags */ uintptr_t rflags; /* * Disable interrupts. */ rflags = sva_enter_critical(); /* Physical address of allocated secure memory pointer */ uintptr_t sp; /* The address of the PML4e page table */ pml4e_t pml4e; /* * Get the current interrupt context; the arguments will be in it. */ struct CPUState * cpup = getCPUState(); struct SVAThread * threadp = cpup->currentThread; /* copy-on-write page fault */ if((code & PGEX_P) && (code & PGEX_W)){ pml4e_t * pml4e_ptr = get_pml4eVaddr (get_pagetable(), vaddr); if(!isPresent (pml4e_ptr)) panic("sva_ghost_fault: cow pgfault pml4e %p does not exist\n", pml4e); pdpte_t * pdpte = get_pdpteVaddr (pml4e_ptr, vaddr); if(!isPresent (pdpte)) panic("sva_ghost_fault: cow pgfault pdpte %p does not exist\n", pdpte); pde_t * pde = get_pdeVaddr (pdpte, vaddr); if(!isPresent (pde)) panic("sva_ghost_fault: cow pgfault pde %p does not exist\n", pde); pte_t * pte = get_pteVaddr (pde, vaddr); uintptr_t paddr = *pte & PG_FRAME; page_desc_t * pgDesc = getPageDescPtr (paddr); if(pgDesc->type != PG_GHOST) panic("SVA: sva_ghost_fault: vaddr = 0x%lx paddr = 0x%lx is not a ghost memory page!\n", vaddr, paddr); /* If only one process maps this page, directly grant this process write permission */ if(pgDesc->count == 1) { * pte = (* pte) | PTE_CANWRITE; } /* Otherwise copy-on-write */ else { uintptr_t vaddr_old = (uintptr_t) getVirtualSVADMAP(paddr); uintptr_t paddr_new = alloc_frame(); page_desc_t * pgDesc_new = getPageDescPtr (paddr_new); if (pgRefCount (pgDesc_new) > 1) { panic ("SVA: Ghost page still in use somewhere else!\n"); } if (isPTP(pgDesc_new) || isCodePG (pgDesc_new)) { panic ("SVA: Ghost page has wrong type!\n"); } memcpy(getVirtualSVADMAP(paddr_new), (void *) vaddr_old, X86_PAGE_SIZE); *pte = (paddr_new & addrmask) | PTE_CANWRITE | PTE_CANUSER | PTE_PRESENT; invlpg(vaddr); getPageDescPtr (paddr_new)->type = PG_GHOST; getPageDescPtr (paddr_new)->count = 1; pgDesc->count --; } return; } /* * Determine if this is the first secure memory allocation. */ unsigned char firstSecAlloc = (threadp->secmemSize == 0); /* * Get a page of memory from the operating system. Note that the OS provides * the physical address of the allocated memory. */ if ((sp = alloc_frame()) != 0) { /* Physical address of the allocated page */ uintptr_t paddr = (uintptr_t) sp; /* * Map the memory into a part of the address space reserved for secure * memory. */ pml4e = mapSecurePage ((uintptr_t)vaddr, paddr); /* * If this is the first piece of secure memory that we've allocated, * record the address of the top-level page table that maps in the secure * memory region. The context switching intrinsics will want to know * where this entry is so that it can quickly enable and disable it on * context switches. */ if (firstSecAlloc) { threadp->secmemPML4e = pml4e; } } else { panic ("SVA: Kernel secure memory allocation failed!\n"); } /* * Zero out the ghost memory contents. */ memset ((void *)vaddr, 0, X86_PAGE_SIZE); /* Re-enable interrupts if necessary */ sva_exit_critical (rflags); return; }
/* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { vm_offset_t addr; u_int cpuid; int gsel_tss; /* bootAP is set in start_ap() to our ID. */ PCPU_SET(currentldt, _default_ldt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); #if 0 gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; #endif PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); #if 0 PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); #endif PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ /* * signal our startup to the BSP. */ mp_naps++; /* Spin until the BSP releases the AP's. */ while (!aps_ready) ia32_pause(); /* BSP may have changed PTD while we were waiting */ invltlb(); for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) invlpg(addr); /* set up FPU state on the AP */ npxinit(); #if 0 /* set up SSE registers */ enable_sse(); #endif #if 0 && defined(PAE) /* Enable the PTE no-execute bit. */ if ((amd_feature & AMDID_NX) != 0) { uint64_t msr; msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); } #endif #if 0 /* A quick check from sanity claus */ if (PCPU_GET(apic_id) != lapic_id()) { printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); panic("cpuid mismatch! boom!!"); } #endif /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); mtx_lock_spin(&ap_boot_mtx); #if 0 /* Init local apic for irq's */ lapic_setup(1); #endif smp_cpus++; cpuid = PCPU_GET(cpuid); CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); printf("SMP: AP CPU #%d Launched!\n", cpuid); /* Determine if we are a logical CPU. */ if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) CPU_SET(cpuid, &logical_cpus_mask); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) CPU_SET(cpuid, &hyperthreading_cpus_mask); #if 0 if (bootverbose) lapic_dump("AP"); #endif if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); smp_active = 1; /* historic */ } mtx_unlock_spin(&ap_boot_mtx); /* wait until all the AP's are up */ while (smp_started == 0) ia32_pause(); PCPU_SET(curthread, PCPU_GET(idlethread)); /* Start per-CPU event timers. */ cpu_initclocks_ap(); /* enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to %s", __func__); /* NOTREACHED */ }
// invalidate a TLB entry, but only if the page tables being // edited are the ones currently in use by the processor. void tlb_invalidate(pde_t *pgdir, uintptr_t la) { if (rcr3() == PADDR(pgdir)) { invlpg((void *)la); } }
void tlb_invalidate(pml4e_t *pml4e, void *va) { invlpg(va); }
int main(int ac, char **av) { unsigned long i; unsigned int pkey = 0x2; unsigned int pkru_ad = 0x10; unsigned int pkru_wd = 0x20; if (!(cpuid_indexed(7, 0).c & (1 << X86_FEATURE_PKU))) { printf("PKU not enabled\n"); return report_summary(); } setup_vm(); setup_alt_stack(); set_intr_alt_stack(14, pf_tss); wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_LMA); for (i = 0; i < USER_BASE; i += PAGE_SIZE) { *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~PT_USER_MASK; *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT); invlpg((void *)i); } for (i = USER_BASE; i < 2 * USER_BASE; i += PAGE_SIZE) { *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) &= ~USER_BASE; *get_pte(phys_to_virt(read_cr3()), phys_to_virt(i)) |= ((unsigned long)pkey << PTE_PKEY_BIT); invlpg((void *)i); } write_cr4(read_cr4() | X86_CR4_PKE); write_cr3(read_cr3()); init_test(); set_cr0_wp(1); write_pkru(pkru_ad); test = 21; report("write to supervisor page when pkru is ad and wp == 1", pf_count == 0 && test == 21); init_test(); set_cr0_wp(0); write_pkru(pkru_ad); test = 22; report("write to supervisor page when pkru is ad and wp == 0", pf_count == 0 && test == 22); init_test(); set_cr0_wp(1); write_pkru(pkru_wd); test = 23; report("write to supervisor page when pkru is wd and wp == 1", pf_count == 0 && test == 23); init_test(); set_cr0_wp(0); write_pkru(pkru_wd); test = 24; report("write to supervisor page when pkru is wd and wp == 0", pf_count == 0 && test == 24); init_test(); write_pkru(pkru_wd); set_cr0_wp(0); USER_VAR(test) = 25; report("write to user page when pkru is wd and wp == 0", pf_count == 0 && test == 25); init_test(); write_pkru(pkru_wd); set_cr0_wp(1); USER_VAR(test) = 26; report("write to user page when pkru is wd and wp == 1", pf_count == 1 && test == 26 && save == 25); init_test(); write_pkru(pkru_ad); (void)USER_VAR(test); report("read from user page when pkru is ad", pf_count == 1 && save == 26); // TODO: implicit kernel access from ring 3 (e.g. int) return report_summary(); }
/* * pg->pgszi indicates the page size in machp()->pgsz[] used for the mapping. * For the user, it can be either 2*MiB or 1*GiB pages. * For 2*MiB pages, we use three levels, not four. * For 1*GiB pages, we use two levels. */ void mmuput(uintptr_t va, Page *pg, uint attr) { Proc *up = externup(); int lvl, user, x, pgsz; PTE *pte; Page *page, *prev; Mpl pl; uintmem pa, ppn; char buf[80]; ppn = 0; pa = pg->pa; if(pa == 0) panic("mmuput: zero pa"); if(DBGFLG) { snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#ux\n", machp()->machno, up, va, pa, attr); print("%s", buf); } assert(pg->pgszi >= 0); pgsz = sys->pgsz[pg->pgszi]; if(pa & (pgsz-1)) panic("mmuput: pa offset non zero: %#ullx\n", pa); pa |= pteflags(attr); pl = splhi(); if(DBGFLG) mmuptpcheck(up); user = (va < KZERO); x = PTLX(va, 3); pte = UINT2PTR(machp()->MMU.pml4->va); pte += x; prev = machp()->MMU.pml4; for(lvl = 3; lvl >= 0; lvl--) { if(user) { if(pgsz == 2*MiB && lvl == 1) /* use 2M */ break; if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */ break; } for(page = up->MMU.mmuptp[lvl]; page != nil; page = page->next) if(page->prev == prev && page->daddr == x) { if(*pte == 0) { print("mmu: jmk and nemo had fun\n"); *pte = PPN(page->pa)|PteU|PteRW|PteP; } break; } if(page == nil) { if(up->MMU.mmuptp[0] == nil) page = mmuptpalloc(); else { page = up->MMU.mmuptp[0]; up->MMU.mmuptp[0] = page->next; } page->daddr = x; page->next = up->MMU.mmuptp[lvl]; up->MMU.mmuptp[lvl] = page; page->prev = prev; *pte = PPN(page->pa)|PteU|PteRW|PteP; if(lvl == 3 && x >= machp()->MMU.pml4->daddr) machp()->MMU.pml4->daddr = x+1; } x = PTLX(va, lvl-1); ppn = PPN(*pte); if(ppn == 0) panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte); pte = UINT2PTR(KADDR(ppn)); pte += x; prev = page; } if(DBGFLG) checkpte(ppn, pte); *pte = pa|PteU; if(user) switch(pgsz) { case 2*MiB: case 1*GiB: *pte |= PtePS; break; default: panic("mmuput: user pages must be 2M or 1G"); } splx(pl); if(DBGFLG) { snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llux\n", machp()->machno, up, pte, pte?*pte:~0); print("%s", buf); } invlpg(va); /* only if old entry valid? */ }