static char* initring(Ctlr *ctlr) { RXQ *rx; TXQ *tx; int i, q; rx = &ctlr->rx; if(rx->b == nil) rx->b = malloc(sizeof(Block*) * Nrx); if(rx->p == nil) rx->p = mallocalign(sizeof(u32int) * Nrx, 16 * 1024, 0, 0); if(rx->b == nil || rx->p == nil) return "no memory for rx ring"; for(i = 0; i<Nrx; i++){ rx->p[i] = 0; if(rx->b[i] != nil){ freeb(rx->b[i]); rx->b[i] = nil; } if(rbplant(ctlr, i) < 0) return "no memory for rx descriptors"; } rx->i = 0; if(ctlr->shared == nil) ctlr->shared = mallocalign(4096, 4096, 0, 0); if(ctlr->shared == nil) return "no memory for shared buffer"; memset(ctlr->shared, 0, 4096); for(q=0; q<nelem(ctlr->tx); q++){ tx = &ctlr->tx[q]; if(tx->b == nil) tx->b = malloc(sizeof(Block*) * Ntx); if(tx->d == nil) tx->d = mallocalign(Tdscsize * Ntx, 16 * 1024, 0, 0); if(tx->c == nil) tx->c = mallocalign(Tcmdsize * Ntx, 4, 0, 0); if(tx->b == nil || tx->d == nil || tx->c == nil) return "no memory for tx ring"; memset(tx->d, 0, Tdscsize * Ntx); memset(tx->c, 0, Tcmdsize * Ntx); for(i=0; i<Ntx; i++){ if(tx->b[i] != nil){ freeb(tx->b[i]); tx->b[i] = nil; } } ctlr->shared->txbase[q] = PCIWADDR(tx->d); tx->i = 0; tx->n = 0; tx->lastcmd = 0; } return nil; }
/* * Rendezvous with other cores. Set roles for those that came * up online, and wait until they are initialized. * Sync TSC with them. * We assume other processors that could boot had time to * set online to 1 by now. */ static void nixsquids(void) { Mach *mp; int i; uvlong now, start; for(i = 1; i < MACHMAX; i++) if((mp = sys->machptr[i]) != nil && mp->online != 0){ /* * Inter-core calls. A ensure *mp->iccall and mp->icargs * go into different cache lines. */ mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0); mp->icc->fn = nil; if(i < initialTCs){ conf.nmach++; mp->nixtype = NIXTC; } ainc(&active.nbooting); } sys->epoch = rdtsc(); mfence(); wrmsr(0x10, sys->epoch); m->rdtsc = rdtsc(); active.thunderbirdsarego = 1; start = fastticks2us(fastticks(nil)); do{ now = fastticks2us(fastticks(nil)); }while(active.nbooting > 0 && now - start < 1000000) ; if(active.nbooting > 0) print("cpu0: %d cores couldn't start\n", active.nbooting); active.nbooting = 0; }
void fpsavealloc(void) { m->fpsavalign = mallocalign(sizeof(FPssestate), FPalign, 0, 0); if (m->fpsavalign == nil) panic("cpu%d: can't allocate fpsavalign", m->machno); }
static void* fpalloc(void) { if(up->fpsave.addr == nil) { up->fpsave.addr = fpsave == fpx87save? smalloc(sizeof(FPstate)): mallocalign(sizeof(FPssestate), FPalign, 0, 0); if (up->fpsave.addr) up->fpexit = fpexit; } return up->fpsave.addr; }
static Vqueue* mkvqueue(int size) { Vqueue *q; uchar *p; int i; q = malloc(sizeof(*q) + sizeof(void*)*size); p = mallocalign( PGROUND(sizeof(Vdesc)*size + sizeof(Vring) + sizeof(u16int)*size + sizeof(u16int)) + PGROUND(sizeof(Vring) + sizeof(Vused)*size + sizeof(u16int)), BY2PG, 0, 0); if(p == nil || q == nil){ print("virtio: no memory for Vqueue\n"); free(p); free(q); return nil; } q->desc = (void*)p; p += sizeof(Vdesc)*size; q->avail = (void*)p; p += sizeof(Vring); q->availent = (void*)p; p += sizeof(u16int)*size; q->availevent = (void*)p; p += sizeof(u16int); p = (uchar*)PGROUND((ulong)p); q->used = (void*)p; p += sizeof(Vring); q->usedent = (void*)p; p += sizeof(Vused)*size; q->usedevent = (void*)p; q->free = -1; q->nfree = q->size = size; for(i=0; i<size; i++){ q->desc[i].next = q->free; q->free = i; } return q; }
static Page* mmuptpalloc(void) { void* va; Page *page; /* * Do not really need a whole Page structure, * but it makes testing this out a lot easier. * Could keep a cache and free excess. * Have to maintain any fiction for pexit? */ lock(&mmuptpfreelist.l); if((page = mmuptpfreelist.next) != nil) { mmuptpfreelist.next = page->next; mmuptpfreelist.ref--; unlock(&mmuptpfreelist.l); if(page->ref++ != 0) panic("mmuptpalloc ref\n"); page->prev = page->next = nil; memset(UINT2PTR(page->va), 0, PTSZ); if(page->pa == 0) panic("mmuptpalloc: free page with pa == 0"); return page; } unlock(&mmuptpfreelist.l); if((page = malloc(sizeof(Page))) == nil) { print("mmuptpalloc Page\n"); return nil; } if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil) { print("mmuptpalloc va\n"); free(page); return nil; } page->va = PTR2UINT(va); page->pa = PADDR(va); page->ref = 1; if(page->pa == 0) panic("mmuptpalloc: no pa"); return page; }
/* * count CPU's, set up their mach structures and l1 ptes. * we're running on cpu 0 and our data structures were * statically allocated. */ void launchinit(void) { int mach; Mach *mm; PTE *l1; for(mach = 1; mach < MAXMACH; mach++){ machaddr[mach] = mm = mallocalign(MACHSIZE, MACHSIZE, 0, 0); l1 = mallocalign(L1SIZE, L1SIZE, 0, 0); if(mm == nil || l1 == nil) panic("launchinit"); memset(mm, 0, MACHSIZE); mm->machno = mach; memmove(l1, (void *)L1, L1SIZE); /* clone cpu0's l1 table */ l1cache->wbse(l1, L1SIZE); mm->mmul1 = l1; l1cache->wbse(mm, MACHSIZE); } l1cache->wbse(machaddr, sizeof machaddr); conf.nmach = 1; }
static uintptr* mmucreate(uintptr *table, uintptr va, int level, int index) { uintptr *page, flags; MMU *p; flags = PTEWRITE|PTEVALID; if(va < VMAP){ assert(up != nil); assert((va < TSTKTOP) || (va >= KMAP && va < KMAP+KMAPSIZE)); p = mmualloc(); p->index = index; p->level = level; if(va < TSTKTOP){ flags |= PTEUSER; if(level == PML4E){ if((p->next = up->mmuhead) == nil) up->mmutail = p; up->mmuhead = p; m->mmumap[index/MAPBITS] |= 1ull<<(index%MAPBITS); } else { up->mmutail->next = p; up->mmutail = p; } up->mmucount++; } else { if(level == PML4E){ up->kmaptail = p; up->kmaphead = p; } else { up->kmaptail->next = p; up->kmaptail = p; } up->kmapcount++; } page = p->page; } else if(conf.mem[0].npage != 0) { page = mallocalign(PTSZ, BY2PG, 0, 0); } else { page = rampage(); } memset(page, 0, PTSZ); table[index] = PADDR(page) | flags; return page; }
static MMU* mmualloc(void) { MMU *p; int i, n; p = m->mmufree; if(p != nil){ m->mmufree = p->next; m->mmucount--; } else { lock(&mmupool); p = mmupool.free; if(p != nil){ mmupool.free = p->next; mmupool.nfree--; } else { unlock(&mmupool); n = 256; p = malloc(n * sizeof(MMU)); if(p == nil) panic("mmualloc: out of memory for MMU"); p->page = mallocalign(n * PTSZ, BY2PG, 0, 0); if(p->page == nil) panic("mmualloc: out of memory for MMU pages"); for(i=1; i<n; i++){ p[i].page = p[i-1].page + (1<<PTSHIFT); p[i-1].next = &p[i]; } lock(&mmupool); p[n-1].next = mmupool.free; mmupool.free = p->next; mmupool.nalloc += n; mmupool.nfree += n-1; } unlock(&mmupool); } p->next = nil; return p; }
/* * Rendezvous with other cores. Set roles for those that came * up online, and wait until they are initialized. * Sync TSC with them. * We assume other processors that could boot had time to * set online to 1 by now. */ static void nixsquids(void) { Mach *m = machp(); Mach *mp; int i; uint64_t now, start; /* Not AC for now :-) */ for(i = 1; i <= MACHMAX; i++) //for(i = 1; i < MACHMAX; i++) if((mp = sys->machptr[i]) != nil && mp->online){ /* * Inter-core calls. A ensure *mp->iccall and mp->icargs * go into different cache lines. */ mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0); mp->icc->fn = nil; if(i < numtcs){ sys->nmach++; mp->nixtype = NIXTC; sys->nc[NIXTC]++; }//else //sys->nc[NIXAC]++; ainc(&active.nbooting); } sys->epoch = rdtsc(); mfence(); wrmsr(0x10, sys->epoch); m->rdtsc = rdtsc(); active.thunderbirdsarego = 1; start = fastticks2us(fastticks(nil)); do{ now = fastticks2us(fastticks(nil)); }while(active.nbooting > 0 && now - start < 1000000) ; if(active.nbooting > 0) print("cpu0: %d cores couldn't start\n", active.nbooting); active.nbooting = 0; }
static char* boot(Ctlr *ctlr) { int i, n, size; uchar *dma, *p; FWImage *fw; char *err; fw = ctlr->fw; /* 16 byte padding may not be necessary. */ size = ROUND(fw->init.data.size, 16) + ROUND(fw->init.text.size, 16); dma = mallocalign(size, 16, 0, 0); if(dma == nil) return "no memory for dma"; if((err = niclock(ctlr)) != nil){ free(dma); return err; } p = dma; memmove(p, fw->init.data.data, fw->init.data.size); coherence(); prphwrite(ctlr, BsmDramDataAddr, PCIWADDR(p)); prphwrite(ctlr, BsmDramDataSize, fw->init.data.size); p += ROUND(fw->init.data.size, 16); memmove(p, fw->init.text.data, fw->init.text.size); coherence(); prphwrite(ctlr, BsmDramTextAddr, PCIWADDR(p)); prphwrite(ctlr, BsmDramTextSize, fw->init.text.size); nicunlock(ctlr); if((err = niclock(ctlr)) != nil){ free(dma); return err; } /* Copy microcode image into NIC memory. */ p = fw->boot.text.data; n = fw->boot.text.size/4; for(i=0; i<n; i++, p += 4) prphwrite(ctlr, BsmSramBase+i*4, get32(p)); prphwrite(ctlr, BsmWrMemSrc, 0); prphwrite(ctlr, BsmWrMemDst, 0); prphwrite(ctlr, BsmWrDwCount, n); /* Start boot load now. */ prphwrite(ctlr, BsmWrCtrl, 1<<31); /* Wait for transfer to complete. */ for(i=0; i<1000; i++){ if((prphread(ctlr, BsmWrCtrl) & (1<<31)) == 0) break; delay(10); } if(i == 1000){ nicunlock(ctlr); free(dma); return "bootcode timeout"; } /* Enable boot after power up. */ prphwrite(ctlr, BsmWrCtrl, 1<<30); nicunlock(ctlr); /* Now press "execute". */ csr32w(ctlr, Reset, 0); /* Wait at most one second for first alive notification. */ if(irqwait(ctlr, Ierr|Ialive, 5000) != Ialive){ free(dma); return "init firmware boot failed"; } free(dma); size = ROUND(fw->main.data.size, 16) + ROUND(fw->main.text.size, 16); dma = mallocalign(size, 16, 0, 0); if(dma == nil) return "no memory for dma"; if((err = niclock(ctlr)) != nil){ free(dma); return err; } p = dma; memmove(p, fw->main.data.data, fw->main.data.size); coherence(); prphwrite(ctlr, BsmDramDataAddr, PCIWADDR(p)); prphwrite(ctlr, BsmDramDataSize, fw->main.data.size); p += ROUND(fw->main.data.size, 16); memmove(p, fw->main.text.data, fw->main.text.size); coherence(); prphwrite(ctlr, BsmDramTextAddr, PCIWADDR(p)); prphwrite(ctlr, BsmDramTextSize, fw->main.text.size | (1<<31)); nicunlock(ctlr); if(irqwait(ctlr, Ierr|Ialive, 5000) != Ialive){ free(dma); return "main firmware boot failed"; } free(dma); return postboot(ctlr); }
static void vt6102attach(Ether* edev) { int dsz, i; Ctlr *ctlr; Ds *ds, *prev; uchar *alloc, *bounce; char name[KNAMELEN]; ctlr = edev->ctlr; qlock(&ctlr->alock); if(ctlr->alloc != nil){ qunlock(&ctlr->alock); return; } /* * Descriptor and bounce-buffer space. * Must all be aligned on a 4-byte boundary, * but try to align on cache-lines. */ ctlr->nrd = Nrd; ctlr->ntd = Ntd; dsz = ROUNDUP(sizeof(Ds), ctlr->cls); alloc = mallocalign((ctlr->nrd+ctlr->ntd)*dsz + ctlr->ntd*Txcopy, dsz, 0, 0); if(alloc == nil){ qunlock(&ctlr->alock); error(Enomem); } ctlr->alloc = alloc; ctlr->rd = (Ds*)alloc; if(waserror()){ ds = ctlr->rd; for(i = 0; i < ctlr->nrd; i++){ if(ds->bp != nil){ freeb(ds->bp); ds->bp = nil; } if((ds = ds->next) == nil) break; } free(ctlr->alloc); ctlr->alloc = nil; qunlock(&ctlr->alock); nexterror(); } prev = ctlr->rd + ctlr->nrd-1; for(i = 0; i < ctlr->nrd; i++){ ds = (Ds*)alloc; alloc += dsz; ds->control = Rdbsz; ds->branch = PCIWADDR(alloc); ds->bp = iallocb(Rdbsz+3); if(ds->bp == nil) error("vt6102: can't allocate receive ring\n"); ds->bp->rp = (uchar*)ROUNDUP((ulong)ds->bp->rp, 4); ds->addr = PCIWADDR(ds->bp->rp); ds->next = (Ds*)alloc; ds->prev = prev; prev = ds; ds->status = Own; } prev->branch = 0; prev->next = ctlr->rd; prev->status = 0; ctlr->rdh = ctlr->rd; ctlr->td = (Ds*)alloc; prev = ctlr->td + ctlr->ntd-1; bounce = alloc + ctlr->ntd*dsz; for(i = 0; i < ctlr->ntd; i++){ ds = (Ds*)alloc; alloc += dsz; ds->bounce = bounce; bounce += Txcopy; ds->next = (Ds*)alloc; ds->prev = prev; prev = ds; } prev->next = ctlr->td; ctlr->tdh = ctlr->tdt = ctlr->td; ctlr->tdused = 0; ctlr->cr = Dpoll|Rdmd|Txon|Rxon|Strt; /*Srci|Abti|Norbf|Pktrace|Ovfi|Udfi|Be|Ru|Tu|Txe|Rxe|Ptx|Prx*/ ctlr->imr = Abti|Norbf|Pktrace|Ovfi|Udfi|Be|Ru|Tu|Txe|Rxe|Ptx|Prx; ilock(&ctlr->clock); csr32w(ctlr, Rxdaddr, PCIWADDR(ctlr->rd)); csr32w(ctlr, Txdaddr, PCIWADDR(ctlr->td)); csr16w(ctlr, Isr, ~0); csr16w(ctlr, Imr, ctlr->imr); csr16w(ctlr, Cr, ctlr->cr); iunlock(&ctlr->clock); snprint(name, KNAMELEN, "#l%dlproc", edev->ctlrno); kproc(name, vt6102lproc, edev); qunlock(&ctlr->alock); poperror(); }
static void i82563init(Ether* edev) { Ctlr *ctlr; u32int r, rctl; ctlr = edev->ctlr; rctl = Dpf | Bsize2048 | Bam | RdtmsHALF; if(ctlr->type == i82575){ /* * Setting Qenable in Rxdctl does not * appear to stick unless Ren is on. */ csr32w(ctlr, Rctl, Ren|rctl); r = csr32r(ctlr, Rxdctl); r |= Qenable; csr32w(ctlr, Rxdctl, r); } csr32w(ctlr, Rctl, rctl); ctlr->rdba = mallocalign(Nrdesc*sizeof(Rdesc), 128, 0, 0); csr32w(ctlr, Rdbal, PCIWADDR(ctlr->rdba)); csr32w(ctlr, Rdbah, 0); csr32w(ctlr, Rdlen, Nrdesc*sizeof(Rdesc)); ctlr->rdh = 0; csr32w(ctlr, Rdh, ctlr->rdh); ctlr->rdt = 0; csr32w(ctlr, Rdt, ctlr->rdt); ctlr->rb = malloc(sizeof(Block*)*Nrdesc); i82563replenish(ctlr); csr32w(ctlr, Rdtr, 0); csr32w(ctlr, Radv, 0); if(ctlr->type == i82573) csr32w(ctlr, Ert, 1024/8); if(ctlr->type == i82566 || ctlr->type == i82567) csr32w(ctlr, Pbs, 16); i82563im(ctlr, Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack); csr32w(ctlr, Tctl, 0x0F<<CtSHIFT | Psp | 0x3f<<ColdSHIFT | Mulr); csr32w(ctlr, Tipg, 6<<20 | 8<<10 | 8); csr32w(ctlr, Tidv, 1); ctlr->tdba = mallocalign(Ntdesc*sizeof(Tdesc), 128, 0, 0); memset(ctlr->tdba, 0, Ntdesc*sizeof(Tdesc)); csr32w(ctlr, Tdbal, PCIWADDR(ctlr->tdba)); csr32w(ctlr, Tdbah, 0); csr32w(ctlr, Tdlen, Ntdesc*sizeof(Tdesc)); ctlr->tdh = 0; csr32w(ctlr, Tdh, ctlr->tdh); ctlr->tdt = 0; csr32w(ctlr, Tdt, ctlr->tdt); ctlr->tb = malloc(sizeof(Block*)*Ntdesc); r = csr32r(ctlr, Txdctl); r &= ~(WthreshMASK|PthreshSHIFT); r |= 4<<WthreshSHIFT | 4<<PthreshSHIFT; if(ctlr->type == i82575) r |= Qenable; csr32w(ctlr, Txdctl, r); /* * Don't enable checksum offload. In practice, it interferes with * tftp booting on at least the 82575. */ // csr32w(ctlr, Rxcsum, Tuofl | Ipofl | ETHERHDRSIZE<<PcssSHIFT); csr32w(ctlr, Rxcsum, 0); r = csr32r(ctlr, Tctl); r |= Ten; csr32w(ctlr, Tctl, r); }
void sipi(void) { Apic *apic; Mach *mach; int apicno, i; u32int *sipiptr; uintmem sipipa; u8int *alloc, *p; extern void squidboy(int); /* * Move the startup code into place, * must be aligned properly. */ sipipa = mmuphysaddr(SIPIHANDLER); if((sipipa & (4*KiB - 1)) || sipipa > (1*MiB - 2*4*KiB)) return; sipiptr = UINT2PTR(SIPIHANDLER); memmove(sipiptr, sipihandler, sizeof(sipihandler)); DBG("sipiptr %#p sipipa %#llux\n", sipiptr, sipipa); /* * Notes: * The Universal Startup Algorithm described in the MP Spec. 1.4. * The data needed per-processor is the sum of the stack, page * table pages, vsvm page and the Mach page. The layout is similar * to that described in data.h for the bootstrap processor, but * with any unused space elided. */ for(apicno = 0; apicno < Napic; apicno++){ apic = &xlapic[apicno]; if(!apic->useable || apic->addr || apic->machno == 0) continue; /* * NOTE: for now, share the page tables with the * bootstrap processor, until the lsipi code is worked out, * so only the Mach and stack portions are used below. */ alloc = mallocalign(MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ, 4096, 0, 0); if(alloc == nil) continue; memset(alloc, 0, MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ); p = alloc+MACHSTKSZ; sipiptr[-1] = mmuphysaddr(PTR2UINT(p)); DBG("p %#p sipiptr[-1] %#ux\n", p, sipiptr[-1]); p += 4*PTSZ+4*KiB; /* * Committed. If the AP startup fails, can't safely * release the resources, who knows what mischief * the AP is up to. Perhaps should try to put it * back into the INIT state? */ mach = (Mach*)p; mach->machno = apic->machno; /* NOT one-to-one... */ mach->splpc = PTR2UINT(squidboy); mach->apicno = apicno; mach->stack = PTR2UINT(alloc); mach->vsvm = alloc+MACHSTKSZ+4*PTSZ; //OH OH mach->pml4 = (PTE*)(alloc+MACHSTKSZ); p = KADDR(0x467); *p++ = sipipa; *p++ = sipipa>>8; *p++ = 0; *p = 0; nvramwrite(0x0f, 0x0a); apicsipi(apicno, sipipa); for(i = 0; i < 1000; i++){ if(mach->splpc == 0) break; millidelay(5); } nvramwrite(0x0f, 0x00); DBG("mach %#p (%#p) apicid %d machno %2d %dMHz\n", mach, sys->machptr[mach->machno], apicno, mach->machno, mach->cpumhz); } }