static void putcib(Arena *arena, CIBlock *cib) { USED(arena); putdblock(cib->data); cib->data = nil; }
/* * write some data to the clump section at a given offset * used to fix up corrupted arenas. */ uint32_t writearena(Arena *arena, uint64_t aa, uint8_t *clbuf, uint32_t n) { DBlock *b; uint64_t a; uint32_t blocksize, off, m; int32_t nn; int ok; if(n == 0) return -1; qlock(&arena->lock); a = arena->size - arenadirsize(arena, arena->memstats.clumps); if(aa >= a || aa + n > a){ qunlock(&arena->lock); seterr(EOk, "writing beyond arena clump storage"); return -1; } blocksize = arena->blocksize; a = arena->base + aa; off = a & (blocksize - 1); a -= off; nn = 0; for(;;){ b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE); if(b == nil){ qunlock(&arena->lock); return -1; } dirtydblock(b, DirtyArena); m = blocksize - off; if(m > n - nn) m = n - nn; memmove(&b->data[off], &clbuf[nn], m); ok = 0; putdblock(b); if(ok < 0){ qunlock(&arena->lock); return -1; } nn += m; if(nn == n) break; off = 0; a += blocksize; } qunlock(&arena->lock); return n; }
/* * write the arena trailer block to the partition */ int wbarena(Arena *arena) { DBlock *b; int bad; if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){ logerr(EAdmin, "can't write arena trailer: %r"); return -1; } dirtydblock(b, DirtyArenaTrailer); bad = okarena(arena)<0 || packarena(arena, b->data)<0; scorecp(b->data + arena->blocksize - VtScoreSize, arena->score); putdblock(b); if(bad) return -1; return 0; }
/* * read a clump of data * n is a hint of the size of the data, not including the header * make sure it won't run off the end, then return the number of bytes actually read */ uint32_t readarena(Arena *arena, uint64_t aa, uint8_t *buf, int32_t n) { DBlock *b; uint64_t a; uint32_t blocksize, off, m; int32_t nn; if(n == 0) return -1; qlock(&arena->lock); a = arena->size - arenadirsize(arena, arena->memstats.clumps); qunlock(&arena->lock); if(aa >= a){ seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1)); return -1; } if(aa + n > a) n = a - aa; blocksize = arena->blocksize; a = arena->base + aa; off = a & (blocksize - 1); a -= off; nn = 0; for(;;){ b = getdblock(arena->part, a, OREAD); if(b == nil) return -1; m = blocksize - off; if(m > n - nn) m = n - nn; memmove(&buf[nn], &b->data[off], m); putdblock(b); nn += m; if(nn == n) break; off = 0; a += blocksize; } return n; }
/* * allocate space for the clump and write it, * updating the arena directory ZZZ question: should this distinguish between an arena filling up and real errors writing the clump? */ uint64_t writeaclump(Arena *arena, Clump *c, uint8_t *clbuf) { DBlock *b; uint64_t a, aa; uint32_t clump, n, nn, m, off, blocksize; int ok; n = c->info.size + ClumpSize + U32Size; qlock(&arena->lock); aa = arena->memstats.used; if(arena->memstats.sealed || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){ if(!arena->memstats.sealed){ logerr(EOk, "seal memstats %s", arena->name); arena->memstats.sealed = 1; wbarena(arena); } qunlock(&arena->lock); return TWID64; } if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){ qunlock(&arena->lock); return TWID64; } /* * write the data out one block at a time */ blocksize = arena->blocksize; a = arena->base + aa; off = a & (blocksize - 1); a -= off; nn = 0; for(;;){ b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE); if(b == nil){ qunlock(&arena->lock); return TWID64; } dirtydblock(b, DirtyArena); m = blocksize - off; if(m > n - nn) m = n - nn; memmove(&b->data[off], &clbuf[nn], m); ok = 0; putdblock(b); if(ok < 0){ qunlock(&arena->lock); return TWID64; } nn += m; if(nn == n) break; off = 0; a += blocksize; } arena->memstats.used += c->info.size + ClumpSize; arena->memstats.uncsize += c->info.uncsize; if(c->info.size < c->info.uncsize) arena->memstats.cclumps++; clump = arena->memstats.clumps; if(clump % ArenaCIGSize == 0){ if(arena->cig == nil){ loadcig(arena); if(arena->cig == nil) goto NoCIG; } /* add aa as start of next cig */ if(clump/ArenaCIGSize != arena->ncig){ fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n", arena->name, clump, arena->ncig); arena->ncig = -1; vtfree(arena->cig); arena->cig = nil; goto NoCIG; } arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]); arena->cig[arena->ncig++].offset = aa; } NoCIG: arena->memstats.clumps++; if(arena->memstats.clumps == 0) sysfatal("clumps wrapped"); arena->wtime = now(); if(arena->ctime == 0) arena->ctime = arena->wtime; writeclumpinfo(arena, clump, &c->info); wbarena(arena); qunlock(&arena->lock); return aa; }
static int icachewritesect(Index *ix, ISect *is, u8int *buf) { int err, i, werr, h, bsize, t; u32int lo, hi; u64int addr, naddr; uint nbuf, off; DBlock *b; IBucket ib; IEntry *ie, *iedirty, **l, *chunk; lo = is->start * ix->div; if(TWID32/ix->div < is->stop) hi = TWID32; else hi = is->stop * ix->div - 1; trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa); iedirty = icachedirty(lo, hi, iwrite.as.aa); iedirty = iesort(iedirty); bsize = 1 << is->blocklog; err = 0; while(iedirty){ disksched(); while((t = icachesleeptime) == SleepForever){ sleep(1000); disksched(); } if(t < minicachesleeptime) t = minicachesleeptime; if(t > 0) sleep(t); trace(TraceProc, "icachewritesect nextchunk"); chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf); trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf); if(readpart(is->part, addr, buf, nbuf) < 0){ fprint(2, "%s: part %s addr 0x%llux: icachewritesect " "readpart: %r\n", argv0, is->part->name, addr); err = -1; continue; } trace(TraceProc, "icachewritesect updatebuf"); addstat(StatIsectReadBytes, nbuf); addstat(StatIsectRead, 1); for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){ again: naddr = ie2diskaddr(ix, is, ie); off = naddr - addr; if(off+bsize > nbuf){ fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud " "addr+nbuf=0x%llux naddr=0x%llux\n", argv0, addr, nbuf, addr+nbuf, naddr); assert(off+bsize <= nbuf); } unpackibucket(&ib, buf+off, is->bucketmagic); if(okibucket(&ib, is) < 0){ fprint(2, "%s: bad bucket XXX\n", argv0); goto skipit; } trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr); h = bucklook(ie->score, ie->ia.type, ib.data, ib.n); if(h & 1){ h ^= 1; packientry(ie, &ib.data[h]); }else if(ib.n < is->buckmax){ memmove(&ib.data[h + IEntrySize], &ib.data[h], ib.n*IEntrySize - h); ib.n++; packientry(ie, &ib.data[h]); }else{ fprint(2, "%s: bucket overflow XXX\n", argv0); skipit: err = -1; *l = ie->nextdirty; ie = *l; if(ie) goto again; else break; } packibucket(&ib, buf+off, is->bucketmagic); } diskaccess(1); trace(TraceProc, "icachewritesect writepart", addr, nbuf); werr = 0; if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0) werr = -1; for(i=0; i<nbuf; i+=bsize){ if((b = _getdblock(is->part, addr+i, ORDWR, 0)) != nil){ memmove(b->data, buf+i, bsize); putdblock(b); } } if(werr < 0){ fprint(2, "%s: part %s addr 0x%llux: icachewritesect " "writepart: %r\n", argv0, is->part->name, addr); err = -1; continue; } addstat(StatIsectWriteBytes, nbuf); addstat(StatIsectWrite, 1); icacheclean(chunk); } trace(TraceProc, "icachewritesect done"); return err; }