static void icacheinsert(u8int score[VtScoreSize], IAddr *ia, int state) { IEntry *ie; if((ie = poplast(&icache.free)) == nil && (ie = evictlru()) == nil){ addstat(StatIcacheStall, 1); while((ie = poplast(&icache.free)) == nil && (ie = evictlru()) == nil){ // Could safely return here if state == IEClean. // But if state == IEDirty, have to wait to make // sure we don't lose an index write. // Let's wait all the time. flushdcache(); kickicache(); rsleep(&icache.full); } addstat(StatIcacheStall, -1); } memmove(ie->score, score, VtScoreSize); ie->state = state; ie->ia = *ia; if(state == IEClean){ addstat(StatIcachePrefetch, 1); pushfirst(&icache.clean, ie); }else{ addstat(StatIcacheWrite, 1); assert(state == IEDirty); icache.ndirty++; setstat(StatIcacheDirty, icache.ndirty); delaykickicache(); pushfirst(&icache.dirty, ie); } ihashinsert(icache.hash, ie); }
int syncindex(Index *ix) { Arena *arena; int i, e, e1, ok; ok = 0; for(i = 0; i < ix->narenas; i++){ trace(TraceProc, "syncindex start %d", i); arena = ix->arenas[i]; e = syncarena(arena, TWID32, 1, 1); e1 = e; e1 &= ~(SyncHeader|SyncCIZero|SyncCIErr); if(e & SyncHeader) fprint(2, "arena %s: header is out-of-date\n", arena->name); if(e1){ fprint(2, "arena %s: %x\n", arena->name, e1); ok = -1; continue; } flushdcache(); if(arena->memstats.clumps == arena->diskstats.clumps) continue; fprint(2, "%T %s: indexing %d clumps...\n", arena->name, arena->memstats.clumps - arena->diskstats.clumps); if(syncarenaindex(arena, ix->amap[i].start) < 0){ fprint(2, "arena %s: syncarenaindex: %r\n", arena->name); ok = -1; continue; } if(wbarena(arena) < 0){ fprint(2, "arena %s: wbarena: %r\n", arena->name); ok = -1; continue; } flushdcache(); delaykickicache(); } return ok; }
static int hdcacheflush(HConnect *c) { Hio *hout; int r; r = hsettext(c); if(r < 0) return r; hout = &c->hout; flushdcache(); hprint(hout, "flushed dcache\n"); hflush(hout); return 0; }
static int syncarenaindex(Arena *arena, uint64_t a0) { int ok; uint32_t clump; uint64_t a; ClumpInfo ci; IAddr ia; AState as; if(arena->diskstats.clumps == arena->memstats.clumps) return 0; memset(&as, 0, sizeof as); as.arena = arena; as.stats = arena->diskstats; ok = 0; a = a0 + arena->diskstats.used; for(clump=arena->diskstats.clumps; clump < arena->memstats.clumps; clump++){ if(readclumpinfo(arena, clump, &ci) < 0){ fprint(2, "%s: clump %d: cannot read clumpinfo\n", arena->name, clump); ok = -1; break; } ia.type = ci.type; ia.size = ci.uncsize; ia.addr = a; ia.blocks = (ClumpSize + ci.size + (1 << ABlockLog) - 1) >> ABlockLog; a += ClumpSize + ci.size; as.stats.used += ClumpSize + ci.size; as.stats.uncsize += ia.size; as.stats.clumps++; if(ci.uncsize > ci.size) as.stats.cclumps++; as.aa = a; insertscore(ci.score, &ia, IEDirty, &as); } flushdcache(); return ok; }
/* * Return a singly-linked list of dirty index entries. * with 32-bit hash numbers between lo and hi * and address < limit. */ IEntry* icachedirty(u32int lo, u32int hi, u64int limit) { u32int h; IEntry *ie, *dirty; dirty = nil; trace(TraceProc, "icachedirty enter"); qlock(&icache.lock); for(ie = icache.dirty.next; ie != &icache.dirty; ie=ie->next){ if(ie->state == IEDirty && ie->ia.addr <= limit){ h = hashbits(ie->score, 32); if(lo <= h && h <= hi){ ie->nextdirty = dirty; dirty = ie; } } } qunlock(&icache.lock); trace(TraceProc, "icachedirty exit"); if(dirty == nil) flushdcache(); return dirty; }
void threadmain(int argc, char *argv[]) { int vers; ArenaPart *ap; Part *part; Arena *arena; uint64_t addr, limit, asize, apsize; char *file, *name, aname[ANameSize]; int i, n, blocksize, tabsize, zero; ventifmtinstall(); statsinit(); blocksize = 8 * 1024; asize = 512 * 1024 *1024; tabsize = 512 * 1024; /* BUG: should be determine from number of arenas */ zero = -1; vers = ArenaVersion5; ARGBEGIN{ case 'D': settrace(EARGF(usage())); break; case 'a': asize = unittoull(EARGF(usage())); if(asize == TWID64) usage(); break; case 'b': blocksize = unittoull(EARGF(usage())); if(blocksize == ~0) usage(); if(blocksize > MaxDiskBlock){ fprint(2, "block size too large, max %d\n", MaxDiskBlock); threadexitsall("usage"); } break; case '4': vers = ArenaVersion4; break; case 'Z': zero = 0; break; default: usage(); break; }ARGEND if(zero == -1){ if(vers == ArenaVersion4) zero = 1; else zero = 0; } if(argc != 2) usage(); name = argv[0]; file = argv[1]; if(nameok(name) < 0) sysfatal("illegal name template %s", name); part = initpart(file, ORDWR|ODIRECT); if(part == nil) sysfatal("can't open partition %s: %r", file); if(zero) zeropart(part, blocksize); maxblocksize = blocksize; initdcache(20*blocksize); ap = newarenapart(part, blocksize, tabsize); if(ap == nil) sysfatal("can't initialize arena: %r"); apsize = ap->size - ap->arenabase; n = apsize / asize; if(apsize - (n * asize) >= MinArenaSize) n++; fprint(2, "fmtarenas %s: %,d arenas, %,lld bytes storage, %,d bytes for index map\n", file, n, apsize, ap->tabsize); ap->narenas = n; ap->map = MKNZ(AMap, n); ap->arenas = MKNZ(Arena*, n); addr = ap->arenabase; for(i = 0; i < n; i++){ limit = addr + asize; if(limit >= ap->size || ap->size - limit < MinArenaSize){ limit = ap->size; if(limit - addr < MinArenaSize) sysfatal("bad arena set math: runt arena at %lld,%lld %lld", addr, limit, ap->size); } snprint(aname, ANameSize, "%s%d", name, i); if(0) fprint(2, "adding arena %s at [%lld,%lld)\n", aname, addr, limit); arena = newarena(part, vers, aname, addr, limit - addr, blocksize); if(!arena) fprint(2, "can't make new arena %s: %r", aname); freearena(arena); ap->map[i].start = addr; ap->map[i].stop = limit; namecp(ap->map[i].name, aname); addr = limit; } if(wbarenapart(ap) < 0) fprint(2, "can't write back arena partition header for %s: %r\n", file); flushdcache(); threadexitsall(0); }
int writeqlump(Lump *u, Packet *p, int creator, uint ms) { ZBlock *flat; Packet *old; IAddr ia; int ok; if(lookupscore(u->score, u->type, &ia) == 0){ if(verifywrites == 0){ /* assume the data is here! */ packetfree(p); ms = msec() - ms; addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms); return 0; } /* * if the read fails, * assume it was corrupted data and store the block again */ old = readilump(u, &ia, u->score); if(old != nil){ ok = 0; if(packetcmp(p, old) != 0){ uchar nscore[VtScoreSize]; packetsha1(old, nscore); if(scorecmp(u->score, nscore) != 0) seterr(EStrange, "readilump returned bad data %V not %V", nscore, u->score); else seterr(EStrange, "score collision %V", u->score); ok = -1; } packetfree(p); packetfree(old); ms = msec() - ms; addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms); return ok; } logerr(EAdmin, "writelump: read %V failed, rewriting: %r\n", u->score); } flat = packet2zblock(p, packetsize(p)); ok = storeclump(mainindex, flat, u->score, u->type, creator, &ia); freezblock(flat); if(ok == 0) insertlump(u, p); else packetfree(p); if(syncwrites){ flushdcache(); flushicache(); flushdcache(); } ms = msec() - ms; addstat2(StatRpcWriteNew, 1, StatRpcWriteNewTime, ms); return ok; }
void sumarena(Arena *arena) { ZBlock *b; DigestState s; uint64_t a, e; uint32_t bs; int t; uint8_t score[VtScoreSize]; bs = MaxIoSize; if(bs < arena->blocksize) bs = arena->blocksize; /* * read & sum all blocks except the last one */ flushdcache(); memset(&s, 0, sizeof s); b = alloczblock(bs, 0, arena->part->blocksize); e = arena->base + arena->size; for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){ disksched(); while((t=arenasumsleeptime) == SleepForever){ sleep(1000); disksched(); } sleep(t); if(a + bs > e) bs = arena->blocksize; if(readpart(arena->part, a, b->data, bs) < 0) goto ReadErr; addstat(StatSumRead, 1); addstat(StatSumReadBytes, bs); sha1(b->data, bs, nil, &s); } /* * the last one is special, since it may already have the checksum included */ bs = arena->blocksize; if(readpart(arena->part, e, b->data, bs) < 0){ ReadErr: logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a); freezblock(b); return; } addstat(StatSumRead, 1); addstat(StatSumReadBytes, bs); sha1(b->data, bs-VtScoreSize, nil, &s); sha1(zeroscore, VtScoreSize, nil, &s); sha1(nil, 0, score, &s); /* * check for no checksum or the same */ if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0 && scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0) logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V", arena->name, &b->data[bs - VtScoreSize], score); freezblock(b); qlock(&arena->lock); scorecp(arena->score, score); wbarena(arena); qunlock(&arena->lock); }
static void ventiserver(void *v) { Packet *p; VtReq *r; char err[ERRMAX]; uint ms; int cached, ok; USED(v); threadsetname("ventiserver"); trace(TraceWork, "start"); while((r = vtgetreq(ventisrv)) != nil){ trace(TraceWork, "finish"); trace(TraceWork, "start request %F", &r->tx); trace(TraceRpc, "<- %F", &r->tx); r->rx.msgtype = r->tx.msgtype+1; addstat(StatRpcTotal, 1); if(0) print("req (arenas[0]=%p sects[0]=%p) %F\n", mainindex->arenas[0], mainindex->sects[0], &r->tx); switch(r->tx.msgtype){ default: vtrerror(r, "unknown request"); break; case VtTread: ms = msec(); r->rx.data = readlump(r->tx.score, r->tx.blocktype, r->tx.count, &cached); ms = msec() - ms; addstat2(StatRpcRead, 1, StatRpcReadTime, ms); if(r->rx.data == nil){ addstat(StatRpcReadFail, 1); rerrstr(err, sizeof err); vtrerror(r, err); }else{ addstat(StatRpcReadBytes, packetsize(r->rx.data)); addstat(StatRpcReadOk, 1); if(cached) addstat2(StatRpcReadCached, 1, StatRpcReadCachedTime, ms); else addstat2(StatRpcReadUncached, 1, StatRpcReadUncachedTime, ms); } break; case VtTwrite: if(readonly){ vtrerror(r, "read only"); break; } p = r->tx.data; r->tx.data = nil; addstat(StatRpcWriteBytes, packetsize(p)); ms = msec(); ok = writelump(p, r->rx.score, r->tx.blocktype, 0, ms); ms = msec() - ms; addstat2(StatRpcWrite, 1, StatRpcWriteTime, ms); if(ok < 0){ addstat(StatRpcWriteFail, 1); rerrstr(err, sizeof err); vtrerror(r, err); } break; case VtTsync: flushqueue(); flushdcache(); break; } trace(TraceRpc, "-> %F", &r->rx); vtrespond(r); trace(TraceWork, "start"); } flushdcache(); flushicache(); threadexitsall(0); }