/* tear down the identity map we created in assembly. ONLY do this after all the * APs have started up (and you know they've done so. But you must do it BEFORE * you create address spaces for procs, i.e. userinit() */ static void teardownidmap(Mach *m) { int i; uintptr_t va = 0; PTE *p; /* loop on the level 2 because we should not assume we know * how many there are But stop after 1G no matter what, and * report if there were that many, as that is odd. */ for(i = 0; i < 512; i++, va += BIGPGSZ) { if (mmuwalk(UINT2PTR(m->pml4->va), va, 1, &p, nil) != 1) break; if (! *p) break; iprint("teardown: va %p, pte %p\n", (void *)va, p); *p = 0; } iprint("Teardown: zapped %d PML1 entries\n", i); for(i = 2; i < 4; i++) { if (mmuwalk(UINT2PTR(m->pml4->va), 0, i, &p, nil) != i) { iprint("weird; 0 not mapped at %d\n", i); continue; } iprint("teardown: zap %p at level %d\n", p, i); if (p) *p = 0; } }
void meminit(void) { int i; Map *mp; Confmem *cm; ulong pa, *pte; ulong maxmem, lost; char *p; if(p = getconf("*maxmem")) maxmem = strtoul(p, 0, 0); else maxmem = 0; /* * Set special attributes for memory between 640KB and 1MB: * VGA memory is writethrough; * BIOS ROM's/UMB's are uncached; * then scan for useful memory. */ for(pa = 0xA0000; pa < 0xC0000; pa += BY2PG){ pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 0); *pte |= PTEWT; } for(pa = 0xC0000; pa < 0x100000; pa += BY2PG){ pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 0); *pte |= PTEUNCACHED; } mmuflushtlb(PADDR(m->pdb)); umbscan(); lowraminit(); if(e820scan() < 0) ramscan(maxmem); /* * Set the conf entries describing banks of allocatable memory. */ for(i=0; i<nelem(mapram) && i<nelem(conf.mem); i++){ mp = &rmapram.map[i]; cm = &conf.mem[i]; cm->base = mp->addr; cm->npage = mp->size/BY2PG; } lost = 0; for(; i<nelem(mapram); i++) lost += rmapram.map[i].size; if(lost) print("meminit - lost %lud bytes\n", lost); if(MEMDEBUG) memdebug(); }
void dumpmmuwalk(uint64_t addr) { int l; PTE *pte, *pml4; pml4 = UINT2PTR(machp()->MMU.pml4->va); if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); }
uintmem mmuphysaddr(uintptr_t va) { int l; PTE *pte; uintmem mask, pa; /* * Given a VA, find the PA. * This is probably not the right interface, * but will do as an experiment. Usual * question, should va be void* or uintptr? */ l = mmuwalk(UINT2PTR(machp()->MMU.pml4->va), va, 0, &pte, nil); DBG("physaddr: va %#p l %d\n", va, l); if(l < 0) return ~0; mask = PGLSZ(l)-1; pa = (*pte & ~mask) + (va & mask); DBG("physaddr: l %d va %#p pa %#llux\n", l, va, pa); return pa; }
/* * Double-check the user MMU. * Error checking only. */ void checkmmu(uintptr va, uintptr pa) { uintptr *pte; pte = mmuwalk(m->pml4, va, 0, 0); if(pte != 0 && (*pte & PTEVALID) != 0 && PPN(*pte) != pa) print("%ld %s: va=%#p pa=%#p pte=%#p\n", up->pid, up->text, va, pa, *pte); }
static void checkpte(uintmem ppn, void *a) { Proc *up = externup(); int l; PTE *pte, *pml4; uint64_t addr; char buf[240], *s; addr = PTR2UINT(a); pml4 = UINT2PTR(machp()->pml4->va); pte = 0; s = buf; *s = 0; if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0) goto Panic; s = seprint(buf, buf+sizeof buf, "check3: l%d pte %#p = %llux\n", l, pte, pte?*pte:~0); if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0) goto Panic; s = seprint(s, buf+sizeof buf, "check2: l%d pte %#p = %llux\n", l, pte, pte?*pte:~0); if(*pte&PtePS) return; if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0) goto Panic; seprint(s, buf+sizeof buf, "check1: l%d pte %#p = %llux\n", l, pte, pte?*pte:~0); return; Panic: seprint(s, buf+sizeof buf, "checkpte: l%d addr %#p ppn %#ullx kaddr %#p pte %#p = %llux", l, a, ppn, KADDR(ppn), pte, pte?*pte:~0); print("%s\n", buf); seprint(buf, buf+sizeof buf, "start %#ullx unused %#ullx" " unmap %#ullx end %#ullx\n", sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend); panic("%s", buf); }
void debugtouser(void *va) { Mach *m = machp(); uintptr_t uva = (uintptr_t) va; PTE *pte, *pml4; pml4 = UINT2PTR(m->pml4->va); mmuwalk(pml4, uva, 0, &pte, nil); iprint("va %p m %p m>pml4 %p m->pml4->va %p pml4 %p PTE 0x%lx\n", va, m, m->pml4, m->pml4->va, (void *)pml4, *pte); }
void mmuinit(void) { ulong *pte, npgs, pa; if(paemode){ int i; xenpdpt = (uvlong*)m->pdb; m->pdb = xspanalloc(32, 32, 0); /* clear "reserved" bits in initial page directory pointers -- Xen bug? */ for(i = 0; i < 4; i++) ((uvlong*)m->pdb)[i] = xenpdpt[i] & ~0x1E6LL; } /* * So far only memory up to xentop is mapped, map the rest. * We cant use large pages because our contiguous PA space * is not necessarily contiguous in MA. */ npgs = conf.mem[0].npage; for(pa=conf.mem[0].base; npgs; npgs--, pa+=BY2PG) { pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 1); if(!pte) panic("mmuinit"); xenupdate(pte, pa|PTEVALID|PTEWRITE); } memglobal(); #ifdef we_may_eventually_want_this /* make kernel text unwritable */ for(x = KTZERO; x < (ulong)etext; x += BY2PG){ p = mmuwalk(m->pdb, x, 2, 0); if(p == nil) panic("mmuinit"); *p &= ~PTEWRITE; } #endif taskswitch(0, (ulong)m + BY2PG); }
void pmap(uintptr *pml4, uintptr pa, uintptr va, vlong size) { uintptr *pte, *ptee, flags; int z, l; if(size <= 0 || va < VMAP) panic("pmap: pa=%#p va=%#p size=%lld", pa, va, size); flags = pa; pa = PPN(pa); flags -= pa; if(va >= KZERO) flags |= PTEGLOBAL; while(size > 0){ if(size >= PGLSZ(1) && (va % PGLSZ(1)) == 0) flags |= PTESIZE; l = (flags & PTESIZE) != 0; z = PGLSZ(l); pte = mmuwalk(pml4, va, l, 1); if(pte == 0){ pte = mmuwalk(pml4, va, ++l, 0); if(pte && (*pte & PTESIZE)){ flags |= PTESIZE; z = va & (PGLSZ(l)-1); va -= z; pa -= z; size += z; continue; } panic("pmap: pa=%#p va=%#p size=%lld", pa, va, size); } ptee = pte + ptecount(va, l); while(size > 0 && pte < ptee){ *pte++ = pa | flags; pa += z; va += z; size -= z; } } }
/* * vmapsync() is currently unused as the VMAP and KZERO PDPs * are shared between processors. (see mpstartap) */ int vmapsync(uintptr va) { uintptr *pte1, *pte2; int level; if(va < VMAP || m->machno == 0) return 0; for(level=0; level<2; level++){ pte1 = mmuwalk(MACHP(0)->pml4, va, level, 0); if(pte1 && *pte1 & PTEVALID){ pte2 = mmuwalk(m->pml4, va, level, 1); if(pte2 == 0) break; if(pte1 != pte2) *pte2 = *pte1; return 1; } } return 0; }
void putmmu(uintptr va, uintptr pa, Page *) { uintptr *pte, old; int x; x = splhi(); pte = mmuwalk(m->pml4, va, 0, 1); if(pte == 0) panic("putmmu: bug: va=%#p pa=%#p", va, pa); old = *pte; *pte = pa | PTEVALID|PTEUSER; splx(x); if(old & PTEVALID) invlpg(va); }
void kunmap(KMap *k) { uintptr *pte, va; int x; va = (uintptr)k; if(va >= KZERO) return; x = splhi(); pte = mmuwalk(m->pml4, va, 0, 0); if(pte == 0 || (*pte & PTEVALID) == 0) panic("kunmap: va=%#p", va); *pte = 0; splx(x); }
KMap* kmap(Page *page) { uintptr *pte, pa, va; int x; pa = page->pa; if(cankaddr(pa) != 0) return (KMap*)KADDR(pa); x = splhi(); va = KMAP + ((uintptr)up->kmapindex << PGSHIFT); pte = mmuwalk(m->pml4, va, 0, 1); if(pte == 0 || *pte & PTEVALID) panic("kmap: pa=%#p va=%#p", pa, va); *pte = pa | PTEWRITE|PTEVALID; up->kmapindex = (up->kmapindex + 1) % (1<<PTSHIFT); if(up->kmapindex == 0) mmuflushtlb(); splx(x); return (KMap*)va; }
ulong mmukmap(ulong pa, ulong va, int size) { ulong pae, *table, *pdb, pgsz, *pte, x; int pse, sync; extern int cpuidax, cpuiddx; pdb = KADDR(getcr3()); if((cpuiddx & 0x08) && (getcr4() & 0x10)) pse = 1; else pse = 0; sync = 0; pa = PPN(pa); if(va == 0) va = (ulong)KADDR(pa); else va = PPN(va); pae = pa + size; lock(&mmukmaplock); while(pa < pae){ table = &pdb[PDX(va)]; /* * Possibly already mapped. */ if(*table & PTEVALID){ if(*table & PTESIZE){ /* * Big page. Does it fit within? * If it does, adjust pgsz so the correct end can be * returned and get out. * If not, adjust pgsz up to the next 4MiB boundary * and continue. */ x = PPN(*table); if(x != pa) panic("mmukmap1: pa 0x%ux entry 0x%ux\n", pa, *table); x += 4*MiB; if(pae <= x){ pa = pae; break; } pgsz = x - pa; pa += pgsz; va += pgsz; continue; } else{ /* * Little page. Walk to the entry. * If the entry is valid, set pgsz and continue. * If not, make it so, set pgsz, sync and continue. */ pte = mmuwalk(pdb, va, 2, 0); if(pte && *pte & PTEVALID){ x = PPN(*pte); if(x != pa) panic("mmukmap2: pa 0x%ux entry 0x%ux\n", pa, *pte); pgsz = BY2PG; pa += pgsz; va += pgsz; sync++; continue; } } } /* * Not mapped. Check if it can be mapped using a big page - * starts on a 4MiB boundary, size >= 4MiB and processor can do it. * If not a big page, walk the walk, talk the talk. * Sync is set. */ if(pse && (pa % (4*MiB)) == 0 && (pae >= pa+4*MiB)){ *table = pa|PTESIZE|PTEWRITE|PTEUNCACHED|PTEVALID; pgsz = 4*MiB; } else{ pte = mmuwalk(pdb, va, 2, 1); *pte = pa|PTEWRITE|PTEUNCACHED|PTEVALID; pgsz = BY2PG; } pa += pgsz; va += pgsz; sync++; } unlock(&mmukmaplock); /* * If something was added * then need to sync up. */ if(sync) putcr3(PADDR(pdb)); return pa; }
static void mpstartap(Apic* apic) { ulong *apbootp, *pdb, *pte; Mach *mach, *mach0; int i, machno; uchar *p; mach0 = MACHP(0); /* * Initialise the AP page-tables and Mach structure. The page-tables * are the same as for the bootstrap processor with the exception of * the PTE for the Mach structure. * Xspanalloc will panic if an allocation can't be made. */ p = xspanalloc(4*BY2PG, BY2PG, 0); pdb = (ulong*)p; memmove(pdb, mach0->pdb, BY2PG); p += BY2PG; if((pte = mmuwalk(pdb, MACHADDR, 1, 0)) == nil) return; memmove(p, KADDR(PPN(*pte)), BY2PG); *pte = PADDR(p)|PTEWRITE|PTEVALID; if(mach0->havepge) *pte |= PTEGLOBAL; p += BY2PG; mach = (Mach*)p; if((pte = mmuwalk(pdb, MACHADDR, 2, 0)) == nil) return; *pte = PADDR(mach)|PTEWRITE|PTEVALID; if(mach0->havepge) *pte |= PTEGLOBAL; p += BY2PG; machno = apic->machno; MACHP(machno) = mach; mach->machno = machno; mach->pdb = pdb; mach->gdt = (Segdesc*)p; /* filled by mmuinit */ /* * Tell the AP where its kernel vector and pdb are. * The offsets are known in the AP bootstrap code. */ apbootp = (ulong*)(APBOOTSTRAP+0x08); *apbootp++ = (ulong)squidboy; /* assembler jumps here eventually */ *apbootp++ = PADDR(pdb); *apbootp = (ulong)apic; /* * Universal Startup Algorithm. */ p = KADDR(0x467); /* warm-reset vector */ *p++ = PADDR(APBOOTSTRAP); *p++ = PADDR(APBOOTSTRAP)>>8; i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16; /* code assumes i==0 */ if(i != 0) print("mp: bad APBOOTSTRAP\n"); *p++ = i; *p = i>>8; coherence(); nvramwrite(0x0F, 0x0A); /* shutdown code: warm reset upon init ipi */ lapicstartap(apic, PADDR(APBOOTSTRAP)); for(i = 0; i < 1000; i++){ if(apic->online) break; delay(10); } nvramwrite(0x0F, 0x00); }
static void mpstartap(Apic* apic) { ulong *apbootp, *pdb, *pte; Mach *mach, *mach0; int i, machno; uchar *p; mach0 = MACHP(0); /* * Initialise the AP page-tables and Mach structure. The page-tables * are the same as for the bootstrap processor with the exception of * the PTE for the Mach structure. * Xspanalloc will panic if an allocation can't be made. */ p = xspanalloc(4*BY2PG, BY2PG, 0); pdb = (ulong*)p; memmove(pdb, mach0->pdb, BY2PG); p += BY2PG; if((pte = mmuwalk(pdb, MACHADDR, 1, 0)) == nil) return; memmove(p, KADDR(PPN(*pte)), BY2PG); *pte = PADDR(p)|PTEWRITE|PTEVALID; if(mach0->havepge) *pte |= PTEGLOBAL; p += BY2PG; mach = (Mach*)p; if((pte = mmuwalk(pdb, MACHADDR, 2, 0)) == nil) return; *pte = PADDR(mach)|PTEWRITE|PTEVALID; if(mach0->havepge) *pte |= PTEGLOBAL; p += BY2PG; machno = apic->machno; MACHP(machno) = mach; mach->machno = machno; mach->pdb = pdb; mach->gdt = (Segdesc*)p; /* filled by mmuinit */ /* * Tell the AP where its kernel vector and pdb are. * The offsets are known in the AP bootstrap code. */ apbootp = (ulong*)(APBOOTSTRAP+0x08); *apbootp++ = (ulong)squidboy; *apbootp++ = PADDR(pdb); *apbootp = (ulong)apic; /* * Universal Startup Algorithm. */ p = KADDR(0x467); *p++ = PADDR(APBOOTSTRAP); *p++ = PADDR(APBOOTSTRAP)>>8; i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16; *p++ = i; *p = i>>8; nvramwrite(0x0F, 0x0A); lapicstartap(apic, PADDR(APBOOTSTRAP)); for(i = 0; i < 1000; i++){ lock(&mprdthilock); if(mprdthi & ((1<<apic->apicno)<<24)){ unlock(&mprdthilock); break; } unlock(&mprdthilock); delay(10); } nvramwrite(0x0F, 0x00); }
void asmmeminit(void) { Proc *up = externup(); int i, l; Asm* assem; PTE *pte, *pml4; uintptr va; uintmem hi, lo, mem, nextmem, pa; #ifdef ConfCrap int cx; #endif /* ConfCrap */ assert(!((sys->vmunmapped|sys->vmend) & machp()->pgszmask[1])); if((pa = mmuphysaddr(sys->vmunused)) == ~0) panic("asmmeminit 1"); pa += sys->vmunmapped - sys->vmunused; mem = asmalloc(pa, sys->vmend - sys->vmunmapped, 1, 0); if(mem != pa) panic("asmmeminit 2"); DBG("pa %#llux mem %#llux\n", pa, mem); /* assume already 2MiB aligned*/ assert(ALIGNED(sys->vmunmapped, 2*MiB)); pml4 = UINT2PTR(machp()->pml4->va); while(sys->vmunmapped < sys->vmend) { l = mmuwalk(pml4, sys->vmunmapped, 1, &pte, asmwalkalloc); DBG("%#p l %d\n", sys->vmunmapped, l); *pte = pa|PtePS|PteRW|PteP; sys->vmunmapped += 2*MiB; pa += 2*MiB; } #ifdef ConfCrap cx = 0; #endif /* ConfCrap */ for(assem = asmlist; assem != nil; assem = assem->next) { if(assem->type != AsmMEMORY) continue; va = KSEG2+assem->addr; print("asm: addr %#P end %#P type %d size %P\n", assem->addr, assem->addr+assem->size, assem->type, assem->size); lo = assem->addr; hi = assem->addr+assem->size; /* Convert a range into pages */ for(mem = lo; mem < hi; mem = nextmem) { nextmem = (mem + PGLSZ(0)) & ~machp()->pgszmask[0]; /* Try large pages first */ for(i = m->npgsz - 1; i >= 0; i--) { if((mem & machp()->pgszmask[i]) != 0) continue; if(mem + PGLSZ(i) > hi) continue; /* This page fits entirely within the range. */ /* Mark it a usable */ if((l = mmuwalk(pml4, va, i, &pte, asmwalkalloc)) < 0) panic("asmmeminit 3"); *pte = mem|PteRW|PteP; if(l > 0) *pte |= PtePS; nextmem = mem + PGLSZ(i); va += PGLSZ(i); npg[i]++; break; } } #ifdef ConfCrap /* * Fill in conf crap. */ if(cx >= nelem(conf.mem)) continue; lo = ROUNDUP(assem->addr, PGSZ); //if(lo >= 600ull*MiB) // continue; conf.mem[cx].base = lo; hi = ROUNDDN(hi, PGSZ); //if(hi > 600ull*MiB) // hi = 600*MiB; conf.mem[cx].npage = (hi - lo)/PGSZ; conf.npage += conf.mem[cx].npage; print("cm %d: addr %#llux npage %lud\n", cx, conf.mem[cx].base, conf.mem[cx].npage); cx++; #endif /* ConfCrap */ } print("%d %d %d\n", npg[0], npg[1], npg[2]); #ifdef ConfCrap /* * Fill in more conf crap. * This is why I hate Plan 9. */ conf.upages = conf.npage; i = (sys->vmend - sys->vmstart)/PGSZ; /* close enough */ conf.ialloc = (i/2)*PGSZ; print("npage %llud upage %lud kpage %d\n", conf.npage, conf.upages, i); #endif /* ConfCrap */ }