static int getEntry(Source *r, Entry *e, int checkepoch) { u32int epoch; Block *b; if(r == nil){ memset(&e, 0, sizeof e); return 1; } b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadOnly); if(b == nil) return 0; if(!entryUnpack(e, b->data, r->offset % r->epb)){ blockPut(b); return 0; } epoch = b->l.epoch; blockPut(b); if(checkepoch){ b = cacheGlobal(r->fs->cache, e->score, entryType(e), e->tag, OReadOnly); if(b){ if(b->l.epoch >= epoch) fprint(2, "warning: entry %p epoch not older %#.8ux/%d %V/%d in getEntry\n", r, b->addr, b->l.epoch, r->score, epoch); blockPut(b); } } return 1; }
/* * Change the depth of the source r. * The entry e for r is contained in block p. */ static int sourceGrowDepth(Source *r, Block *p, Entry *e, int depth) { Block *b, *bb; uint32_t tag; int type; Entry oe; assert(sourceIsLocked(r)); assert(depth <= VtPointerDepth); type = entryType(e); b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite); if(b == nil) return 0; tag = e->tag; if(tag == 0) tag = tagGen(); oe = *e; /* * Keep adding layers until we get to the right depth * or an error occurs. */ while(e->depth < depth) { bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo); if(bb == nil) break; //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score); memmove(bb->data, b->score, VtScoreSize); memmove(e->score, bb->score, VtScoreSize); e->depth++; type++; e->tag = tag; e->flags |= VtEntryLocal; blockDependency(bb, b, 0, vtZeroScore, nil); blockPut(b); b = bb; blockDirty(b); } entryPack(e, p->data, r->offset % r->epb); blockDependency(p, b, r->offset % r->epb, nil, &oe); blockPut(b); blockDirty(p); return e->depth == depth; }
static int setEntry(Source *r, Entry *e) { Block *b; Entry oe; b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite); if(0) fprint(2, "setEntry: b %#ux %d score=%V\n", b->addr, r->offset % r->epb, e->score); if(b == nil) return 0; if(!entryUnpack(&oe, b->data, r->offset % r->epb)){ blockPut(b); return 0; } e->gen = oe.gen; entryPack(e, b->data, r->offset % r->epb); /* BUG b should depend on the entry pointer */ blockDirty(b); blockPut(b); return 1; }
/* * When b points at bb, need to check: * * (i) b.e in [bb.e, bb.eClose) * (ii) if b.e==bb.e, then no other b' in e points at bb. * (iii) if !(b.state&Copied) and b.e==bb.e then no other b' points at bb. * (iv) if b is active then no other active b' points at bb. * (v) if b is a past life of b' then only one of b and b' is active * (too hard to check) */ static int walkEpoch(Fsck *chk, Block *b, uchar score[VtScoreSize], int type, u32int tag, u32int epoch) { int i, ret; u32int addr, ep; Block *bb; Entry e; if(b && chk->walkdepth == 0 && chk->printblocks) chk->print("%V %d %#.8ux %#.8ux\n", b->score, b->l.type, b->l.tag, b->l.epoch); if(!chk->useventi && globalToLocal(score) == NilBlock) return 1; chk->walkdepth++; bb = cacheGlobal(chk->cache, score, type, tag, OReadOnly); if(bb == nil){ error(chk, "could not load block %V type %d tag %ux: %R", score, type, tag); chk->walkdepth--; return 0; } if(chk->printblocks) chk->print("%*s%V %d %#.8ux %#.8ux\n", chk->walkdepth*2, "", score, type, tag, bb->l.epoch); ret = 0; addr = globalToLocal(score); if(addr == NilBlock){ ret = 1; goto Exit; } if(b){ /* (i) */ if(b->l.epoch < bb->l.epoch || bb->l.epochClose <= b->l.epoch){ error(chk, "walk: block %#ux [%ud, %ud) points at %#ux [%ud, %ud)", b->addr, b->l.epoch, b->l.epochClose, bb->addr, bb->l.epoch, bb->l.epochClose); goto Exit; } /* (ii) */ if(b->l.epoch == epoch && bb->l.epoch == epoch){ if(getBit(chk->emap, addr)){ error(chk, "walk: epoch join detected: addr %#ux %L", bb->addr, &bb->l); goto Exit; } setBit(chk->emap, addr); } /* (iii) */ if(!(b->l.state&BsCopied) && b->l.epoch == bb->l.epoch){ if(getBit(chk->xmap, addr)){ error(chk, "walk: copy join detected; addr %#ux %L", bb->addr, &bb->l); goto Exit; } setBit(chk->xmap, addr); } } /* (iv) */ if(epoch == chk->fs->ehi){ /* * since epoch==fs->ehi is first, amap is same as * ``have seen active'' */ if(getBit(chk->amap, addr)){ error(chk, "walk: active join detected: addr %#ux %L", bb->addr, &bb->l); goto Exit; } if(bb->l.state&BsClosed) error(chk, "walk: addr %#ux: block is in active tree but is closed", addr); }else if(!getBit(chk->amap, addr)) if(!(bb->l.state&BsClosed)){ // error(chk, "walk: addr %#ux: block is not in active tree, not closed (%d)", // addr, bb->l.epochClose); chk->close(chk, bb, epoch+1); chk->nclose++; } if(getBit(chk->amap, addr)){ ret = 1; goto Exit; } setBit(chk->amap, addr); if(chk->nseen++%chk->quantum == 0) chk->print("check: visited %d/%d blocks (%.0f%%)\n", chk->nseen, chk->nblocks, chk->nseen*100./chk->nblocks); b = nil; /* make sure no more refs to parent */ USED(b); switch(type){ default: /* pointer block */ for(i = 0; i < chk->bsize/VtScoreSize; i++) if(!walkEpoch(chk, bb, bb->data + i*VtScoreSize, type-1, tag, epoch)){ setBit(chk->errmap, bb->addr); chk->clrp(chk, bb, i); chk->nclrp++; } break; case BtData: break; case BtDir: for(i = 0; i < chk->bsize/VtEntrySize; i++){ if(!entryUnpack(&e, bb->data, i)){ // error(chk, "walk: could not unpack entry: %ux[%d]: %R", // addr, i); setBit(chk->errmap, bb->addr); chk->clre(chk, bb, i); chk->nclre++; continue; } if(!(e.flags & VtEntryActive)) continue; if(0) fprint(2, "%x[%d] tag=%x snap=%d score=%V\n", addr, i, e.tag, e.snap, e.score); ep = epoch; if(e.snap != 0){ if(e.snap >= epoch){ // error(chk, "bad snap in entry: %ux[%d] snap = %ud: epoch = %ud", // addr, i, e.snap, epoch); setBit(chk->errmap, bb->addr); chk->clre(chk, bb, i); chk->nclre++; continue; } continue; } if(e.flags & VtEntryLocal){ if(e.tag < UserTag) if(e.tag != RootTag || tag != RootTag || i != 1){ // error(chk, "bad tag in entry: %ux[%d] tag = %ux", // addr, i, e.tag); setBit(chk->errmap, bb->addr); chk->clre(chk, bb, i); chk->nclre++; continue; } }else if(e.tag != 0){ // error(chk, "bad tag in entry: %ux[%d] tag = %ux", // addr, i, e.tag); setBit(chk->errmap, bb->addr); chk->clre(chk, bb, i); chk->nclre++; continue; } if(!walkEpoch(chk, bb, e.score, entryType(&e), e.tag, ep)){ setBit(chk->errmap, bb->addr); chk->clre(chk, bb, i); chk->nclre++; } } break; } ret = 1; Exit: chk->walkdepth--; blockPut(bb); return ret; }
static int bumpEpoch(Fs *fs, int doarchive) { uint8_t oscore[VtScoreSize]; uint32_t oldaddr; Block *b, *bs; Entry e; Source *r; Super super; /* * Duplicate the root block. * * As a hint to flchk, the garbage collector, * and any (human) debuggers, store a pointer * to the old root block in entry 1 of the new root block. */ r = fs->source; b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly); if(b == nil) return 0; memset(&e, 0, sizeof e); e.flags = VtEntryActive | VtEntryLocal | VtEntryDir; memmove(e.score, b->score, VtScoreSize); e.tag = RootTag; e.snap = b->l.epoch; b = blockCopy(b, RootTag, fs->ehi+1, fs->elo); if(b == nil){ fprint(2, "%s: bumpEpoch: blockCopy: %R\n", argv0); return 0; } if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr); entryPack(&e, b->data, 1); blockDirty(b); /* * Update the superblock with the new root and epoch. */ if((bs = superGet(fs->cache, &super)) == nil) return 0; fs->ehi++; memmove(r->score, b->score, VtScoreSize); r->epoch = fs->ehi; super.epochHigh = fs->ehi; oldaddr = super.active; super.active = b->addr; if(doarchive) super.next = oldaddr; /* * Record that the new super.active can't get written out until * the new b gets written out. Until then, use the old value. */ localToGlobal(oldaddr, oscore); blockDependency(bs, b, 0, oscore, nil); blockPut(b); /* * We force the super block to disk so that super.epochHigh gets updated. * Otherwise, if we crash and come back, we might incorrectly treat as active * some of the blocks that making up the snapshot we just created. * Basically every block in the active file system and all the blocks in * the recently-created snapshot depend on the super block now. * Rather than record all those dependencies, we just force the block to disk. * * Note that blockWrite might actually (will probably) send a slightly outdated * super.active to disk. It will be the address of the most recent root that has * gone to disk. */ superWrite(bs, &super, 1); blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0); blockPut(bs); return 1; }
/* * Retrieve the block containing the entry for r. * If a snapshot has happened, we might need * to get a new copy of the block. We avoid this * in the common case by caching the score for * the block and the last epoch in which it was valid. * * We use r->mode to tell the difference between active * file system sources (OReadWrite) and sources for the * snapshot file system (OReadOnly). */ static Block* sourceLoadBlock(Source *r, int mode) { uint32_t addr; Block *b; switch(r->mode) { default: assert(0); case OReadWrite: assert(r->mode == OReadWrite); /* * This needn't be true -- we might bump the low epoch * to reclaim some old blocks, but since this score is * OReadWrite, the blocks must all still be open, so none * are reclaimed. Thus it's okay that the epoch is so low. * Proceed. assert(r->epoch >= r->fs->elo); */ if(r->epoch == r->fs->ehi) { b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite); if(b == nil) return nil; assert(r->epoch == b->l.epoch); return b; } assert(r->parent != nil); if(!sourceLock(r->parent, OReadWrite)) return nil; b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite); sourceUnlock(r->parent); if(b == nil) return nil; assert(b->l.epoch == r->fs->ehi); // fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score); memmove(r->score, b->score, VtScoreSize); r->scoreEpoch = b->l.epoch; r->tag = b->l.tag; r->epoch = r->fs->ehi; return b; case OReadOnly: addr = globalToLocal(r->score); if(addr == NilBlock) return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode); b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch); if(b) return b; /* * If it failed because the epochs don't match, the block has been * archived and reclaimed. Rewalk from the parent and get the * new pointer. This can't happen in the OReadWrite case * above because blocks in the current epoch don't get * reclaimed. The fact that we're OReadOnly means we're * a snapshot. (Or else the file system is read-only, but then * the archiver isn't going around deleting blocks.) */ if(strcmp(vtGetError(), ELabelMismatch) == 0) { if(!sourceLock(r->parent, OReadOnly)) return nil; b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly); sourceUnlock(r->parent); if(b) { fprint(2, "sourceAlloc: lost %V found %V\n", r->score, b->score); memmove(r->score, b->score, VtScoreSize); r->scoreEpoch = b->l.epoch; return b; } } return nil; } }
static int sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth) { Block *b, *nb, *ob, *rb; uint32_t tag; int type, d; Entry oe; assert(sourceIsLocked(r)); assert(depth <= VtPointerDepth); type = entryType(e); rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite); if(rb == nil) return 0; tag = e->tag; if(tag == 0) tag = tagGen(); /* * Walk down to the new root block. * We may stop early, but something is better than nothing. */ oe = *e; ob = nil; b = rb; /* BUG: explain type++. i think it is a real bug */ for(d=e->depth; d > depth; d--, type++) { nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite); if(nb == nil) break; if(ob!=nil && ob!=rb) blockPut(ob); ob = b; b = nb; } if(b == rb) { blockPut(rb); return 0; } /* * Right now, e points at the root block rb, b is the new root block, * and ob points at b. To update: * * (i) change e to point at b * (ii) zero the pointer ob -> b * (iii) free the root block * * p (the block containing e) must be written before * anything else. */ /* (i) */ e->depth = d; /* might have been local and now global; reverse cannot happen */ if(globalToLocal(b->score) == NilBlock) e->flags &= ~VtEntryLocal; memmove(e->score, b->score, VtScoreSize); entryPack(e, p->data, r->offset % r->epb); blockDependency(p, b, r->offset % r->epb, nil, &oe); blockDirty(p); /* (ii) */ memmove(ob->data, vtZeroScore, VtScoreSize); blockDependency(ob, p, 0, b->score, nil); blockDirty(ob); /* (iii) */ if(rb->addr != NilBlock) blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1); blockPut(rb); if(ob!=nil && ob!=rb) blockPut(ob); blockPut(b); return d == depth; }
static Block * blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e) { Block *b; Cache *c; uint32_t addr; int type; uint8_t oscore[VtScoreSize], score[VtScoreSize]; Entry oe; c = fs->cache; if((p->l.type & BtLevelMask) == 0) { assert(p->l.type == BtDir); type = entryType(e); b = cacheGlobal(c, e->score, type, e->tag, mode); } else { type = p->l.type - 1; b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode); } if(b) b->pc = getcallerpc(&p); if(b == nil || mode == OReadOnly) return b; if(p->l.epoch != fs->ehi) { fprint(2, "blockWalk: parent not writable\n"); abort(); } if(b->l.epoch == fs->ehi) return b; oe = *e; /* * Copy on write. */ if(e->tag == 0) { assert(p->l.type == BtDir); e->tag = tagGen(); e->flags |= VtEntryLocal; } addr = b->addr; b = blockCopy(b, e->tag, fs->ehi, fs->elo); if(b == nil) return nil; b->pc = getcallerpc(&p); assert(b->l.epoch == fs->ehi); blockDirty(b); memmove(score, b->score, VtScoreSize); if(p->l.type == BtDir) { memmove(e->score, b->score, VtScoreSize); entryPack(e, p->data, index); blockDependency(p, b, index, nil, &oe); } else { memmove(oscore, p->data+index*VtScoreSize, VtScoreSize); memmove(p->data+index*VtScoreSize, b->score, VtScoreSize); blockDependency(p, b, index, oscore, nil); } blockDirty(p); if(addr != NilBlock) blockRemoveLink(p, addr, type, e->tag, 0); return b; }
static int sourceShrinkSize(Source *r, Entry *e, uint64_t size) { int i, type, ppb; uint64_t ptrsz; uint32_t addr; uint8_t score[VtScoreSize]; Block *b; type = entryType(e); b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite); if(b == nil) return 0; ptrsz = e->dsize; ppb = e->psize/VtScoreSize; for(i=0; i+1<e->depth; i++) ptrsz *= ppb; while(type&BtLevelMask) { if(b->addr == NilBlock || b->l.epoch != r->fs->ehi) { /* not worth copying the block just so we can zero some of it */ blockPut(b); return 0; } /* * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes */ /* zero the pointers to unnecessary blocks */ i = (size+ptrsz-1)/ptrsz; for(; i<ppb; i++) { addr = globalToLocal(b->data+i*VtScoreSize); memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize); blockDirty(b); if(addr != NilBlock) blockRemoveLink(b, addr, type-1, e->tag, 1); } /* recurse (go around again) on the partially necessary block */ i = size/ptrsz; size = size%ptrsz; if(size == 0) { blockPut(b); return 1; } ptrsz /= ppb; type--; memmove(score, b->data+i*VtScoreSize, VtScoreSize); blockPut(b); b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite); if(b == nil) return 0; } if(b->addr == NilBlock || b->l.epoch != r->fs->ehi) { blockPut(b); return 0; } /* * No one ever truncates BtDir blocks. */ if(type == BtData && e->dsize > size) { memset(b->data+size, 0, e->dsize-size); blockDirty(b); } blockPut(b); return 1; }