void reboot(void *entry, void *code, ulong size) { void (*f)(ulong, ulong, ulong); ulong *pdb; writeconf(); /* * the boot processor is cpu0. execute this function on it * so that the new kernel has the same cpu0. this only matters * because the hardware has a notion of which processor was the * boot processor and we look at it at start up. */ if (m->machno != 0) { procwired(up, 0); sched(); } shutdown(0); /* * should be the only processor running now */ if (m->machno != 0) print("on cpu%d (not 0)!\n", m->machno); if (active.machs) print("still have active ap processors!\n"); print("shutting down...\n"); delay(200); splhi(); /* turn off buffered serial console */ serialoq = nil; /* shutdown devices */ chandevshutdown(); arch->introff(); /* * Modify the machine page table to directly map the low 4MB of memory * This allows the reboot code to turn off the page mapping */ pdb = m->pdb; pdb[PDX(0)] = pdb[PDX(KZERO)]; mmuflushtlb(PADDR(pdb)); /* setup reboot trampoline function */ f = (void*)REBOOTADDR; memmove(f, rebootcode, sizeof(rebootcode)); print("rebooting...\n"); /* off we go - never to return */ coherence(); (*f)(PADDR(entry), PADDR(code), size); }
void meminit(void) { int i; Map *mp; Confmem *cm; ulong pa, *pte; ulong maxmem, lost; char *p; if(p = getconf("*maxmem")) maxmem = strtoul(p, 0, 0); else maxmem = 0; /* * Set special attributes for memory between 640KB and 1MB: * VGA memory is writethrough; * BIOS ROM's/UMB's are uncached; * then scan for useful memory. */ for(pa = 0xA0000; pa < 0xC0000; pa += BY2PG){ pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 0); *pte |= PTEWT; } for(pa = 0xC0000; pa < 0x100000; pa += BY2PG){ pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 0); *pte |= PTEUNCACHED; } mmuflushtlb(PADDR(m->pdb)); umbscan(); lowraminit(); if(e820scan() < 0) ramscan(maxmem); /* * Set the conf entries describing banks of allocatable memory. */ for(i=0; i<nelem(mapram) && i<nelem(conf.mem); i++){ mp = &rmapram.map[i]; cm = &conf.mem[i]; cm->base = mp->addr; cm->npage = mp->size/BY2PG; } lost = 0; for(; i<nelem(mapram); i++) lost += rmapram.map[i].size; if(lost) print("meminit - lost %lud bytes\n", lost); if(MEMDEBUG) memdebug(); }
static void taskswitch(uintptr stack) { Tss *tss; tss = m->tss; tss->rsp0[0] = (u32int)stack; tss->rsp0[1] = stack >> 32; tss->rsp1[0] = (u32int)stack; tss->rsp1[1] = stack >> 32; tss->rsp2[0] = (u32int)stack; tss->rsp2[1] = stack >> 32; mmuflushtlb(); }
/* from ../pc: */ void reboot(void *entry, void *code, ulong size) { // writeconf(); // pass kernel environment to next kernel shutdown(0); /* * should be the only processor running now */ print("shutting down...\n"); delay(200); splhi(); /* turn off buffered serial console */ serialoq = nil; /* shutdown devices */ chandevshutdown(); #ifdef FUTURE { ulong *pdb; /* * Modify the machine page table to directly map the low 4MB of memory * This allows the reboot code to turn off the page mapping */ pdb = m->pdb; pdb[PDX(0)] = pdb[PDX(KZERO)]; mmuflushtlb(PADDR(pdb)); } /* setup reboot trampoline function */ { void (*f)(ulong, ulong, ulong) = (void*)REBOOTADDR; memmove(f, rebootcode, sizeof(rebootcode)); #else USED(entry, code, size); #endif print("rebooting...\n"); #ifdef FUTURE /* off we go - never to return */ (*f)(PADDR(entry), PADDR(code), size); } #endif setupboot(0); // reboot, don't halt exit(0); }
/* * Save the mach dependent part of the process state. */ void procsave(Proc *p) { uvlong t; cycles(&t); p->pcycles += t; /* * While this processor is in the scheduler, the process could run * on another processor and exit, returning the page tables to * the free list where they could be reallocated and overwritten. * When this processor eventually has to get an entry from the * trashed page tables it will crash. * * If there's only one processor, this can't happen. * You might think it would be a win not to do this in that case, * especially on VMware, but it turns out not to matter. */ mmuflushtlb(PADDR(m->pdb)); }
KMap* kmap(Page *page) { uintptr *pte, pa, va; int x; pa = page->pa; if(cankaddr(pa) != 0) return (KMap*)KADDR(pa); x = splhi(); va = KMAP + ((uintptr)up->kmapindex << PGSHIFT); pte = mmuwalk(m->pml4, va, 0, 1); if(pte == 0 || *pte & PTEVALID) panic("kmap: pa=%#p va=%#p", pa, va); *pte = pa | PTEWRITE|PTEVALID; up->kmapindex = (up->kmapindex + 1) % (1<<PTSHIFT); if(up->kmapindex == 0) mmuflushtlb(); splx(x); return (KMap*)va; }
/* * Save the mach dependent part of the process state. */ void procsave(Proc *p) { uvlong t; cycles(&t); p->kentry -= t; p->pcycles += t; if(p->fpstate == FPactive){ if(p->state == Moribund) fpclear(); else{ /* * Fpsave() stores without handling pending * unmasked exeptions. Postnote() can't be called * here as sleep() already has up->rlock, so * the handling of pending exceptions is delayed * until the process runs again and generates an * emulation fault to activate the FPU. */ fpsave(&p->fpsave); } p->fpstate = FPinactive; } /* * While this processor is in the scheduler, the process could run * on another processor and exit, returning the page tables to * the free list where they could be reallocated and overwritten. * When this processor eventually has to get an entry from the * trashed page tables it will crash. * * If there's only one processor, this can't happen. * You might think it would be a win not to do this in that case, * especially on VMware, but it turns out not to matter. */ mmuflushtlb(PADDR(m->pdb)); }
/* note: pdb must already be pinned */ static void taskswitch(Page *pdb, ulong stack) { HYPERVISOR_stack_switch(KDSEL, stack); mmuflushtlb(pdb); }
/* * If changing this routine, look also at sleep(). It * contains a copy of the guts of sched(). */ void sched(void) { Mach *m = machp(); Proc *p; if(m->ilockdepth) panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", m->machno, m->ilockdepth, m->externup? m->externup->lastilock: nil, (m->externup && m->externup->lastilock)? m->externup->lastilock->_pc: 0, getcallerpc(&p+2)); kstackok(); if(m->externup){ /* * Delay the sched until the process gives up the locks * it is holding. This avoids dumb lock loops. * Don't delay if the process is Moribund. * It called sched to die. * But do sched eventually. This avoids a missing unlock * from hanging the entire kernel. * But don't reschedule procs holding palloc or procalloc. * Those are far too important to be holding while asleep. * * This test is not exact. There can still be a few * instructions in the middle of taslock when a process * holds a lock but Lock.p has not yet been initialized. */ if(m->externup->nlocks) if(m->externup->state != Moribund) if(m->externup->delaysched < 20 || pga.Lock.p == m->externup || procalloc.Lock.p == m->externup){ m->externup->delaysched++; run.delayedscheds++; return; } m->externup->delaysched = 0; splhi(); /* statistics */ if(m->externup->nqtrap == 0 && m->externup->nqsyscall == 0) m->externup->nfullq++; m->cs++; procsave(m->externup); mmuflushtlb(m->pml4->pa); if(setlabel(&m->externup->sched)){ procrestore(m->externup); spllo(); return; } /*debug*/gotolabel(&m->sched); } m->inidle = 1; p = runproc(); /* core 0 never returns */ m->inidle = 0; if(!p->edf){ updatecpu(p); p->priority = reprioritize(p); } if(nosmp){ if(p != m->readied) m->schedticks = m->ticks + HZ/10; m->readied = 0; } m->externup = p; m->qstart = m->ticks; m->externup->nqtrap = 0; m->externup->nqsyscall = 0; m->externup->state = Running; //m->externup->mach = m; m->externup->mach = sys->machptr[m->machno]; m->proc = m->externup; // iprint("m->externup->sched.sp %p * %p\n", up->sched.sp, // *(void **) m->externup->sched.sp); mmuswitch(m->externup); assert(!m->externup->wired || m->externup->wired == m); if (0) hi("gotolabel\n"); /*debug*/gotolabel(&m->externup->sched); }
/* * sleep if a condition is not true. Another process will * awaken us after it sets the condition. When we awaken * the condition may no longer be true. * * we lock both the process and the rendezvous to keep r->p * and p->r synchronized. */ void sleep(Rendez *r, int (*f)(void*), void *arg) { Mach *m = machp(); Mpl pl; pl = splhi(); if(m->externup->nlocks) print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n", m->externup->pid, m->externup->nlocks, m->externup->lastlock, m->externup->lastlock->_pc, getcallerpc(&r)); lock(r); lock(&m->externup->rlock); if(r->_p){ print("double sleep called from %#p, %d %d\n", getcallerpc(&r), r->_p->pid, m->externup->pid); dumpstack(); } /* * Wakeup only knows there may be something to do by testing * r->p in order to get something to lock on. * Flush that information out to memory in case the sleep is * committed. */ r->_p = m->externup; if((*f)(arg) || m->externup->notepending){ /* * if condition happened or a note is pending * never mind */ r->_p = nil; unlock(&m->externup->rlock); unlock(r); } else { /* * now we are committed to * change state and call scheduler */ if(m->externup->trace) proctrace(m->externup, SSleep, 0); m->externup->state = Wakeme; m->externup->r = r; /* statistics */ m->cs++; procsave(m->externup); mmuflushtlb(m->pml4->pa); if(setlabel(&m->externup->sched)) { /* * here when the process is awakened */ procrestore(m->externup); spllo(); } else { /* * here to go to sleep (i.e. stop Running) */ unlock(&m->externup->rlock); unlock(r); /*debug*/gotolabel(&m->sched); } } if(m->externup->notepending) { m->externup->notepending = 0; splx(pl); if(m->externup->procctl == Proc_exitme && m->externup->closingfgrp) forceclosefgrp(); error(Eintr); } splx(pl); }
static void ramscan(ulong maxmem) { ulong *k0, kzero, map, maxkpa, maxpa, pa, *pte, *table, *va, vbase, x; int nvalid[NMemType]; /* * The bootstrap code has has created a prototype page * table which maps the first MemMin of physical memory to KZERO. * The page directory is at m->pdb and the first page of * free memory is after the per-processor MMU information. */ pa = MemMin; /* * Check if the extended memory size can be obtained from the CMOS. * If it's 0 then it's either not known or >= 64MB. Always check * at least 24MB in case there's a memory gap (up to 8MB) below 16MB; * in this case the memory from the gap is remapped to the top of * memory. * The value in CMOS is supposed to be the number of KB above 1MB. */ if(maxmem == 0){ x = (nvramread(0x18)<<8)|nvramread(0x17); if(x == 0 || x >= (63*KB)) maxpa = MemMax; else maxpa = MB+x*KB; if(maxpa < 24*MB) maxpa = 24*MB; }else maxpa = maxmem; maxkpa = (u32int)-KZERO; /* 2^32 - KZERO */ /* * March up memory from MemMin to maxpa 1MB at a time, * mapping the first page and checking the page can * be written and read correctly. The page tables are created here * on the fly, allocating from low memory as necessary. */ k0 = (ulong*)KADDR(0); kzero = *k0; map = 0; x = 0x12345678; memset(nvalid, 0, sizeof(nvalid)); /* * Can't map memory to KADDR(pa) when we're walking because * can only use KADDR for relatively low addresses. * Instead, map each 4MB we scan to the virtual address range * MemMin->MemMin+4MB while we are scanning. */ vbase = MemMin; while(pa < maxpa){ /* * Map the page. Use mapalloc(&rmapram, ...) to make * the page table if necessary, it will be returned to the * pool later if it isn't needed. Map in a fixed range (the second 4M) * because high physical addresses cannot be passed to KADDR. */ va = (void*)(vbase + pa%(4*MB)); table = &m->pdb[PDX(va)]; if(pa%(4*MB) == 0){ if(map == 0 && (map = mapalloc(&rmapram, 0, BY2PG, BY2PG)) == 0) break; memset(KADDR(map), 0, BY2PG); *table = map|PTEWRITE|PTEVALID; memset(nvalid, 0, sizeof(nvalid)); } table = KADDR(PPN(*table)); pte = &table[PTX(va)]; *pte = pa|PTEWRITE|PTEUNCACHED|PTEVALID; mmuflushtlb(PADDR(m->pdb)); /* * Write a pattern to the page and write a different * pattern to a possible mirror at KZERO. If the data * reads back correctly the chunk is some type of RAM (possibly * a linearly-mapped VGA framebuffer, for instance...) and * can be cleared and added to the memory pool. If not, the * chunk is marked uncached and added to the UMB pool if <16MB * or is marked invalid and added to the UPA pool. */ *va = x; *k0 = ~x; if(*va == x){ nvalid[MemRAM] += MB/BY2PG; mapfree(&rmapram, pa, MB); do{ *pte++ = pa|PTEWRITE|PTEVALID; pa += BY2PG; }while(pa % MB); mmuflushtlb(PADDR(m->pdb)); /* memset(va, 0, MB); so damn slow to memset all of memory */ } else if(pa < 16*MB){ nvalid[MemUMB] += MB/BY2PG; mapfree(&rmapumb, pa, MB); do{ *pte++ = pa|PTEWRITE|PTEUNCACHED|PTEVALID; pa += BY2PG; }while(pa % MB); } else{ nvalid[MemUPA] += MB/BY2PG; mapfree(&rmapupa, pa, MB); *pte = 0; pa += MB; } /* * Done with this 4MB chunk, review the options: * 1) not physical memory and >=16MB - invalidate the PDB entry; * 2) physical memory - use the 4MB page extension if possible; * 3) not physical memory and <16MB - use the 4MB page extension * if possible; * 4) mixed or no 4MB page extension - commit the already * initialised space for the page table. */ if(pa%(4*MB) == 0 && pa >= 32*MB && nvalid[MemUPA] == (4*MB)/BY2PG){ /* * If we encounter a 4MB chunk of missing memory * at a sufficiently high offset, call it the end of * memory. Otherwise we run the risk of thinking * that video memory is real RAM. */ break; } if(pa <= maxkpa && pa%(4*MB) == 0){ table = &m->pdb[PDX(KADDR(pa - 4*MB))]; if(nvalid[MemUPA] == (4*MB)/BY2PG) *table = 0; else if(nvalid[MemRAM] == (4*MB)/BY2PG && (m->cpuiddx & 0x08)) *table = (pa - 4*MB)|PTESIZE|PTEWRITE|PTEVALID; else if(nvalid[MemUMB] == (4*MB)/BY2PG && (m->cpuiddx & 0x08)) *table = (pa - 4*MB)|PTESIZE|PTEWRITE|PTEUNCACHED|PTEVALID; else{ *table = map|PTEWRITE|PTEVALID; map = 0; } } mmuflushtlb(PADDR(m->pdb)); x += 0x3141526; } /* * If we didn't reach the end of the 4MB chunk, that part won't * be mapped. Commit the already initialised space for the page table. */ if(pa % (4*MB) && pa <= maxkpa){ m->pdb[PDX(KADDR(pa))] = map|PTEWRITE|PTEVALID; map = 0; } if(map) mapfree(&rmapram, map, BY2PG); m->pdb[PDX(vbase)] = 0; mmuflushtlb(PADDR(m->pdb)); mapfree(&rmapupa, pa, (u32int)-pa); *k0 = kzero; }
void reboot(void *entry, void *code, ulong size) { int i; void (*f)(ulong, ulong, ulong); ulong *pdb; /* we do pass options to the kernel we loaded, however, at CONFADDR. */ // writeconf(); /* * the boot processor is cpu0. execute this function on it * so that the new kernel has the same cpu0. this only matters * because the hardware has a notion of which processor was the * boot processor and we look at it at start up. */ if (m->machno != 0) { procwired(up, 0); sched(); } if(conf.nmach > 1) { /* * the other cpus could be holding locks that will never get * released (e.g., in the print path) if we put them into * reset now, so force them to shutdown gracefully first. */ lock(&active); active.rebooting = 1; unlock(&active); shutdown(0); if(arch->resetothers) arch->resetothers(); delay(20); } /* * should be the only processor running now */ active.machs = 0; if (m->machno != 0) print("on cpu%d (not 0)!\n", m->machno); print("shutting down...\n"); delay(200); splhi(); /* turn off buffered serial console */ serialoq = nil; /* shutdown devices */ chandevshutdown(); arch->introff(); /* * Modify the machine page table to directly map low memory * This allows the reboot code to turn off the page mapping */ pdb = m->pdb; for (i = 0; i < LOWPTEPAGES; i++) pdb[PDX(i*4*MB)] = pdb[PDX(KZERO + i*4*MB)]; mmuflushtlb(PADDR(pdb)); /* setup reboot trampoline function */ f = (void*)REBOOTADDR; memmove(f, rebootcode, sizeof(rebootcode)); print("rebooting...\n"); /* off we go - never to return */ coherence(); (*f)(PADDR(entry), PADDR(code), size); }