void acmmuswitch(void) { extern Page mach0pml4; DBG("acmmuswitch mpl4 %#p mach0pml4 %#p m0pml4 %#p\n", machp()->MMU.pml4->pa, mach0pml4.pa, sys->machptr[0]->MMU.pml4->pa); cr3put(machp()->MMU.pml4->pa); }
void mmuflushtlb(uint64_t u) { Proc *up = externup(); m->tlbpurge++; if(machp()->pml4->daddr){ memset(UINT2PTR(machp()->pml4->va), 0, machp()->pml4->daddr*sizeof(PTE)); machp()->pml4->daddr = 0; } cr3put(machp()->pml4->pa); }
/* * Check if the AC kernel (mach) stack has more than 4*KiB free. * Do not call panic, the stack is gigantic. */ static void acstackok(void) { char dummy; char *sstart; sstart = (char *)machp() - PGSZ - 4*PTSZ - MACHSTKSZ; if(&dummy < sstart + 4*KiB){ print("ac kernel stack overflow, cpu%d stopped\n", machp()->machno); DONE(); } }
/* * Always splhi()'ed. */ void schedinit(void) /* never returns */ { Edf *e; machp()->inidle = 1; machp()->proc = nil; ainc(&run.nmach); setlabel(&machp()->sched); Proc *up = externup(); if(infected_with_std()){ print("mach %d got an std from %s (pid %d)!\n", machp()->machno, up ? up->text : "*notext", up ? up->pid : -1 ); disinfect_std(); } if(up) { if((e = up->edf) && (e->flags & Admitted)) edfrecord(up); machp()->qstart = 0; machp()->qexpired = 0; coherence(); machp()->proc = 0; switch(up->state) { case Running: ready(up); break; case Moribund: up->state = Dead; stopac(); edfstop(up); if (up->edf) free(up->edf); up->edf = nil; /* * Holding locks from pexit: * procalloc * pga */ mmurelease(up); unlock(&pga.l); psrelease(up); unlock(&procalloc.l); break; } up->mach = nil; updatecpu(up); machp()->externup = nil; } sched(); }
void hzsched(void) { Proc *up = externup(); /* once a second, rebalance will reprioritize ready procs */ if(machp()->machno == 0) rebalance(); /* unless preempted, get to run for at least 100ms */ if(anyhigher() || (!up->fixedpri && machp()->ticks > m->schedticks && anyready())){ m->readied = nil; /* avoid cooperative scheduling */ up->delaysched++; } }
void kstackok(void) { Proc *up = externup(); if(up == nil) { uintptr_t *stk = (uintptr_t*)machp()->stack; if(*stk != STACKGUARD) panic("trap: mach %d machstk went through bottom %p\n", machp()->machno, machp()->stack); } else { uintptr_t *stk = (uintptr_t*)up->kstack; if(*stk != STACKGUARD) panic("trap: proc %d kstack went through bottom %p\n", up->pid, up->kstack); } }
/* * called splhi() by notify(). See comment in notify for the * reasoning. */ void procctl(Proc *p) { Mach *m = machp(); Mpl pl; char *state; switch(p->procctl) { case Proc_exitbig: spllo(); pexit("Killed: Insufficient physical memory", 1); case Proc_exitme: spllo(); /* pexit has locks in it */ pexit("Killed", 1); case Proc_traceme: if(p->nnote == 0) return; /* No break */ case Proc_stopme: p->procctl = 0; state = p->psstate; p->psstate = "Stopped"; /* free a waiting debugger */ pl = spllo(); qlock(&p->debug); if(p->pdbg) { wakeup(&p->pdbg->sleep); p->pdbg = 0; } qunlock(&p->debug); splhi(); p->state = Stopped; sched(); p->psstate = state; splx(pl); return; case Proc_toac: p->procctl = 0; /* * This pretends to return from the system call, * by moving to a core, but never returns (unless * the process gets moved back to a TC.) */ spllo(); runacore(); return; case Proc_totc: p->procctl = 0; if(p != m->externup) panic("procctl: stopac: p != up"); spllo(); stopac(); return; } }
/* * called when a process writes to an interface's 'data' */ static void ipifckick(void *x) { Mach *m = machp(); Conv *c = x; Block *bp; Ipifc *ifc; bp = qget(c->wq); if(bp == nil) return; ifc = (Ipifc*)c->ptcl; if(!canrlock(ifc)){ freeb(bp); return; } if(waserror()){ runlock(ifc); nexterror(); } if(ifc->medium == nil || ifc->medium->pktin == nil) freeb(bp); else (*ifc->medium->pktin)(c->p->f, ifc, bp); runlock(ifc); poperror(); }
void tsleep(Rendez *r, int (*fn)(void*), void *arg, int32_t ms) { Mach *m = machp(); if (m->externup->tt){ print("tsleep: timer active: mode %d, tf %#p\n", m->externup->tmode, m->externup->tf); timerdel(m->externup); } m->externup->tns = MS2NS(ms); m->externup->tf = twakeup; m->externup->tmode = Trelative; m->externup->ta = m->externup; m->externup->trend = r; m->externup->tfn = fn; timeradd(m->externup); if(waserror()){ timerdel(m->externup); nexterror(); } sleep(r, tfn, arg); if (m->externup->tt) timerdel(m->externup); m->externup->twhen = 0; poperror(); }
int mmuwalk(PTE* pml4, uintptr_t va, int level, PTE** ret, uint64_t (*alloc)(usize)) { int l; uintmem pa; PTE *pte; Mpl pl; pl = splhi(); if(DBGFLG > 1) DBG("mmuwalk%d: va %#p level %d\n", machp()->machno, va, level); pte = &pml4[PTLX(va, 3)]; for(l = 3; l >= 0; l--) { if(l == level) break; if(!(*pte & PteP)) { if(alloc == nil) break; pa = alloc(PTSZ); if(pa == ~0) return -1; memset(UINT2PTR(KADDR(pa)), 0, PTSZ); *pte = pa|PteRW|PteP; } else if(*pte & PtePS) break; pte = UINT2PTR(KADDR(PPN(*pte))); pte += PTLX(va, l-1); } *ret = pte; splx(pl); return l; }
uintmem mmuphysaddr(uintptr_t va) { int l; PTE *pte; uintmem mask, pa; /* * Given a VA, find the PA. * This is probably not the right interface, * but will do as an experiment. Usual * question, should va be void* or uintptr? */ l = mmuwalk(UINT2PTR(machp()->MMU.pml4->va), va, 0, &pte, nil); DBG("physaddr: va %#p l %d\n", va, l); if(l < 0) return ~0; mask = PGLSZ(l)-1; pa = (*pte & ~mask) + (va & mask); DBG("physaddr: l %d va %#p pa %#llux\n", l, va, pa); return pa; }
/* * associate an address with the interface. This wipes out any previous * addresses. This is a macro that means, remove all the old interfaces * and add a new one. */ static char* ipifcconnect(Conv* c, char **argv, int argc) { Mach *m = machp(); char *err; Ipifc *ifc; ifc = (Ipifc*)c->ptcl; if(ifc->medium == nil) return "ipifc not yet bound to device"; if(waserror()){ wunlock(ifc); nexterror(); } wlock(ifc); while(ifc->lifc){ err = ipifcremlifc(ifc, ifc->lifc); if(err) error(err); } wunlock(ifc); poperror(); err = ipifcadd(ifc, argv, argc, 0, nil); if(err) return err; Fsconnected(c, nil); return nil; }
static void mmuptpfree(Proc* proc, int clear) { int l; PTE *pte; Page **last, *page; for(l = 1; l < 4; l++) { last = &proc->MMU.mmuptp[l]; if(*last == nil) continue; for(page = *last; page != nil; page = page->next) { //what is right here? 2 or 1? if(l <= 2 && clear) memset(UINT2PTR(page->va), 0, PTSZ); pte = UINT2PTR(page->prev->va); pte[page->daddr] = 0; last = &page->next; } *last = proc->MMU.mmuptp[0]; proc->MMU.mmuptp[0] = proc->MMU.mmuptp[l]; proc->MMU.mmuptp[l] = nil; } machp()->MMU.pml4->daddr = 0; }
/* go to user space */ void kexit(Ureg* u) { Mach *m = machp(); uint64_t t; Tos *tos; Mach *mp; /* * precise time accounting, kernel exit * initialized in exec, sysproc.c */ tos = (Tos*)(USTKTOP-sizeof(Tos)); cycles(&t); tos->kcycles += t - m->externup->kentry; tos->pcycles = m->externup->pcycles; tos->pid = m->externup->pid; if (m->externup->ac != nil) mp = m->externup->ac; else mp = m; tos->core = mp->machno; tos->nixtype = mp->nixtype; //_pmcupdate(m); /* * The process may change its core. * Be sure it has the right cyclefreq. */ tos->cyclefreq = mp->cyclefreq; }
int32_t netifwstat(Netif *nif, Chan *c, uint8_t *db, int32_t n) { Mach *m = machp(); Dir *dir; Netfile *f; int l; f = nif->f[NETID(c->qid.path)]; if(f == 0) error(Enonexist); if(netown(f, m->externup->user, OWRITE) < 0) error(Eperm); dir = smalloc(sizeof(Dir)+n); l = convM2D(db, n, &dir[0], (char*)&dir[1]); if(l == 0){ free(dir); error(Eshortstat); } if(!emptystr(dir[0].uid)) strncpy(f->owner, dir[0].uid, KNAMELEN); if(dir[0].mode != ~0UL) f->mode = dir[0].mode; free(dir); return l; }
/* * Update the cpu time average for this particular process, * which is about to change from up -> not up or vice versa. * p->lastupdate is the last time an updatecpu happened. * * The cpu time average is a decaying average that lasts * about D clock ticks. D is chosen to be approximately * the cpu time of a cpu-intensive "quick job". A job has to run * for approximately D clock ticks before we home in on its * actual cpu usage. Thus if you manage to get in and get out * quickly, you won't be penalized during your burst. Once you * start using your share of the cpu for more than about D * clock ticks though, your p->cpu hits 1000 (1.0) and you end up * below all the other quick jobs. Interactive tasks, because * they basically always use less than their fair share of cpu, * will be rewarded. * * If the process has not been running, then we want to * apply the filter * * cpu = cpu * (D-1)/D * * n times, yielding * * cpu = cpu * ((D-1)/D)^n * * but D is big enough that this is approximately * * cpu = cpu * (D-n)/D * * so we use that instead. * * If the process has been running, we apply the filter to * 1 - cpu, yielding a similar equation. Note that cpu is * stored in fixed point (* 1000). * * Updatecpu must be called before changing up, in order * to maintain accurate cpu usage statistics. It can be called * at any time to bring the stats for a given proc up-to-date. */ static void updatecpu(Proc *p) { Mach *m = machp(); int D, n, t, ocpu; if(p->edf) return; //t = sys->ticks*Scaling + Scaling/2; t = sys->machptr[0]->ticks*Scaling + Scaling/2; //Originally MACHP(0) n = t - p->lastupdate; p->lastupdate = t; if(n == 0) return; D = run.schedgain*HZ*Scaling; if(n > D) n = D; ocpu = p->cpu; if(p != m->externup) p->cpu = (ocpu*(D-n))/D; else{ t = 1000 - ocpu; t = (t*(D-n))/D; p->cpu = 1000 - t; } //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); }
void acsyscall(void) { panic("acsyscall"); #if 0 Proc *p; /* * If we saved the Ureg into m->proc->dbgregs, * There's nothing else we have to do. * Otherwise, we should m->proc->dbgregs = u; */ DBG("acsyscall: cpu%d\n", machp()->machno); _pmcupdate(m); p = m->proc; p->actime1 = fastticks(nil); m->syscall++; /* would also count it in the TS core */ m->icc->rc = ICCSYSCALL; m->cr2 = cr2get(); fpuprocsave(p); _pmcupdate(m); mfence(); m->icc->fn = nil; ready(p); /* * The next call is probably going to make us jmp * into user code, forgetting all our state in this * stack, upon the next syscall. * We don't nest calls in the current stack for too long. */ acsched(); #endif }
/* go to user space */ void kexit(Ureg* u) { Proc *up = externup(); uint64_t t; Tos *tos; Mach *mp; /* * precise time accounting, kernel exit * initialized in exec, sysproc.c */ tos = (Tos*)(USTKTOP-sizeof(Tos)); cycles(&t); tos->kcycles += t - up->kentry; tos->pcycles = up->pcycles; tos->pid = up->pid; if (up->ac != nil) mp = up->ac; else mp = machp(); tos->core = mp->machno; tos->nixtype = mp->NIX.nixtype; //_pmcupdate(m); /* * The process may change its core. * Be sure it has the right cyclefreq. */ tos->cyclefreq = mp->cyclefreq; /* thread local storage */ wrmsr(FSbase, up->tls); }
int pmcsetctr(uint32_t coreno, uint64_t v, uint32_t regno) { PmcCtr *p; Mach *mp; if(coreno == machp()->machno){ if (pmcdebug) { print("int getctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); } return setctr(v, regno); } mp = sys->machptr[coreno]; p = &mp->pmc[regno]; if (pmcdebug) { print("ext setctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); } ilock(&mp->pmclock); p->ctr = v; p->ctrset |= PmcSet; if(shouldipi(mp)) waitnotstale(mp, p); else iunlock(&mp->pmclock); return 0; }
uint64_t pmcgetctr(uint32_t coreno, uint32_t regno) { PmcCtr *p; Mach *mp; uint64_t v; if(coreno == machp()->machno){ v = getctr(regno); if (pmcdebug) { print("int getctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); } return v; } mp = sys->machptr[coreno]; p = &mp->pmc[regno]; ilock(&mp->pmclock); p->ctrset |= PmcGet; if(shouldipi(mp)){ waitnotstale(mp, p); ilock(&mp->pmclock); } v = p->ctr; iunlock(&mp->pmclock); if (pmcdebug) { print("ext getctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); } return v; }
void vunmap(void* v, usize size) { Proc *up = externup(); uintptr_t va; DBG("vunmap(%#p, %lud)\n", v, size); if(machp()->machno != 0) panic("vunmap"); /* * See the comments above in vmap. */ va = PTR2UINT(v); if(va >= KZERO && va+size < KZERO+1ull*MiB) return; /* * Here will have to deal with releasing any * resources used for the allocation (e.g. page table * pages). */ DBG("vunmap(%#p, %lud)\n", v, size); }
void mmurelease(Proc* proc) { Proc *up = externup(); Page *page, *next; mmuptpfree(proc, 0); for(page = proc->mmuptp[0]; page != nil; page = next){ next = page->next; if(--page->ref) panic("mmurelease: page->ref %d\n", page->ref); lock(&mmuptpfreelist); page->next = mmuptpfreelist.next; mmuptpfreelist.next = page; mmuptpfreelist.ref++; page->prev = nil; unlock(&mmuptpfreelist); } if(proc->mmuptp[0] && pga.r.p) wakeup(&pga.r); proc->mmuptp[0] = nil; tssrsp0(m, STACKALIGN(m->stack+MACHSTKSZ)); cr3put(machp()->pml4->pa); }
/* * recalculate priorities once a second. We need to do this * since priorities will otherwise only be recalculated when * the running process blocks. */ static void rebalance(void) { Mach *m = machp(); Mpl pl; int pri, npri, t; Schedq *rq; Proc *p; t = m->ticks; if(t - run.balancetime < HZ) return; run.balancetime = t; for(pri=0, rq=run.runq; pri<Npriq; pri++, rq++){ another: p = rq->head; if(p == nil) continue; if(p->mp != sys->machptr[m->machno]) //MACHP(m->machno) continue; if(pri == p->basepri) continue; updatecpu(p); npri = reprioritize(p); if(npri != pri){ pl = splhi(); p = dequeueproc(&run, rq, p); if(p) queueproc(&run, &run.runq[npri], p, 0); splx(pl); goto another; } } }
/* * return value and speed of timer */ uint64_t fastticks(uint64_t* hz) { if(hz != nil) *hz = machp()->cpuhz; return rdtsc(); }
void pgrpnote(uint32_t noteid, char *a, int32_t n, int flag) { Mach *m = machp(); int i; Proc *p; char buf[ERRMAX]; if(n >= ERRMAX-1) error(Etoobig); memmove(buf, a, n); buf[n] = 0; for(i = 0; (p = psincref(i)) != nil; i++){ if(p == m->externup || p->state == Dead || p->noteid != noteid || p->kp){ psdecref(p); continue; } qlock(&p->debug); if(p->pid == 0 || p->noteid != noteid){ qunlock(&p->debug); psdecref(p); continue; } if(!waserror()) { postnote(p, 0, buf, flag); poperror(); } qunlock(&p->debug); psdecref(p); } }
void sysziofree(Ar0 *ar0, ...) { Mach *m = machp(); Zio *io; int nio, i; Segment *s; va_list list; va_start(list, ar0); /* * zfree(Zio io[], int nio); */ io = va_arg(list, Zio*); nio = va_arg(list, int); va_end(list); io = validaddr(io, sizeof io[0] * nio, 1); for(i = 0; i < nio; i++){ s = seg(m->externup, PTR2UINT(io[i].data), 1); if(s == nil) error("invalid address in zio"); if((s->type&SG_ZIO) == 0){ qunlock(&s->lk); error("segment is not a zero-copy segment"); } zputaddr(s, PTR2UINT(io[i].data)); qunlock(&s->lk); io[i].data = nil; io[i].size = 0; } }
void panic(char *fmt, ...) { int n; Mpl pl; va_list arg; char buf[PRINTSIZE]; consdevs[1].q = nil; /* don't try to write to /dev/kprint */ if(panicking) for(;;); panicking = 1; pl = splhi(); seprint(buf, buf+sizeof buf, "panic: cpu%d: ", machp()->machno); va_start(arg, fmt); n = vseprint(buf+strlen(buf), buf+sizeof(buf), fmt, arg) - buf; va_end(arg); iprint("%s\n", buf); if(consdebug) (*consdebug)(); splx(pl); //prflush(); buf[n] = '\n'; putstrn(buf, n+1); //dumpstack(); delay(1000); /* give time to consoles */ die("wait forever"); exit(1); }
void dumpmmuwalk(uint64_t addr) { int l; PTE *pte, *pml4; pml4 = UINT2PTR(machp()->MMU.pml4->va); if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0) print("cpu%d: mmu l%d pte %#p = %llux\n", machp()->machno, l, pte, *pte); }
static void _kproftimer(uintptr_t pc) { if(kprof.time == 0) return; /* * if the pc corresponds to the idle loop, don't consider it. if(m->inidle) return; */ /* * if the pc is coming out of spllo or splx, * use the pc saved when we went splhi. */ if(pc>=PTR2UINT(spllo) && pc<=PTR2UINT(spldone)) pc = machp()->splpc; ilock(&kprof.l); kprof.buf[0] += TK2MS(1); if(kprof.minpc<=pc && pc<kprof.maxpc){ pc -= kprof.minpc; pc >>= LRES; kprof.buf[pc] += TK2MS(1); }else
int32_t devtabread(Chan* c, void* buf, int32_t n, int64_t off) { Mach *m = machp(); int i; Dev *dev; char *alloc, *e, *p; alloc = malloc(READSTR); if(alloc == nil) error(Enomem); p = alloc; e = p + READSTR; for(i = 0; devtab[i] != nil; i++){ dev = devtab[i]; p = seprint(p, e, "#%C %s\n", dev->dc, dev->name); } if(waserror()){ free(alloc); nexterror(); } n = readstr(off, buf, n, alloc); free(alloc); poperror(); return n; }