static void checkEpoch(Fsck *chk, u32int epoch) { u32int a; Block *b; Entry e; Label l; chk->print("checking epoch %ud...\n", epoch); for(a=0; a<chk->nblocks; a++){ if(!readLabel(chk->cache, &l, (a+chk->hint)%chk->nblocks)){ error(chk, "could not read label for addr 0x%.8#ux", a); continue; } if(l.tag == RootTag && l.epoch == epoch) break; } if(a == chk->nblocks){ chk->print("could not find root block for epoch %ud", epoch); return; } a = (a+chk->hint)%chk->nblocks; b = cacheLocalData(chk->cache, a, BtDir, RootTag, OReadOnly, 0); if(b == nil){ error(chk, "could not read root block 0x%.8#ux: %R", a); return; } /* no one should point at root blocks */ setBit(chk->amap, a); setBit(chk->emap, a); setBit(chk->xmap, a); /* * First entry is the rest of the file system. * Second entry is link to previous epoch root, * just a convenience to help the search. */ if(!entryUnpack(&e, b->data, 0)){ error(chk, "could not unpack root block 0x%.8#ux: %R", a); blockPut(b); return; } walkEpoch(chk, b, e.score, BtDir, e.tag, epoch); if(entryUnpack(&e, b->data, 1)) chk->hint = globalToLocal(e.score); blockPut(b); }
Source * sourceRoot(Fs *fs, uint32_t addr, int mode) { Source *r; Block *b; b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0); if(b == nil) return nil; if(mode == OReadWrite && b->l.epoch != fs->ehi) { consPrint("sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l); blockPut(b); vtSetError(EBadRoot); return nil; } r = sourceAlloc(fs, b, nil, 0, mode, 0); blockPut(b); return r; }
Fs * fsOpen(char *file, VtSession *z, int32_t ncache, int mode) { int fd, m; uint8_t oscore[VtScoreSize]; Block *b, *bs; Disk *disk; Fs *fs; Super super; switch(mode){ default: vtSetError(EBadMode); return nil; case OReadOnly: m = OREAD; break; case OReadWrite: m = ORDWR; break; } fd = open(file, m); if(fd < 0){ vtSetError("open %s: %r", file); return nil; } bwatchInit(); disk = diskAlloc(fd); if(disk == nil){ vtSetError("diskAlloc: %R"); close(fd); return nil; } fs = vtMemAllocZ(sizeof(Fs)); fs->mode = mode; fs->name = vtStrDup(file); fs->blockSize = diskBlockSize(disk); fs->elk = vtLockAlloc(); fs->cache = cacheAlloc(disk, z, ncache, mode); if(mode == OReadWrite && z) fs->arch = archInit(fs->cache, disk, fs, z); fs->z = z; b = cacheLocal(fs->cache, PartSuper, 0, mode); if(b == nil) goto Err; if(!superUnpack(&super, b->data)){ blockPut(b); vtSetError("bad super block"); goto Err; } blockPut(b); fs->ehi = super.epochHigh; fs->elo = super.epochLow; //fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active); fs->source = sourceRoot(fs, super.active, mode); if(fs->source == nil){ /* * Perhaps it failed because the block is copy-on-write. * Do the copy and try again. */ if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0) goto Err; b = cacheLocalData(fs->cache, super.active, BtDir, RootTag, OReadWrite, 0); if(b == nil){ vtSetError("cacheLocalData: %R"); goto Err; } if(b->l.epoch == fs->ehi){ blockPut(b); vtSetError("bad root source block"); goto Err; } b = blockCopy(b, RootTag, fs->ehi, fs->elo); if(b == nil) goto Err; localToGlobal(super.active, oscore); super.active = b->addr; bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite); if(bs == nil){ blockPut(b); vtSetError("cacheLocal: %R"); goto Err; } superPack(&super, bs->data); blockDependency(bs, b, 0, oscore, nil); blockPut(b); blockDirty(bs); blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0); blockPut(bs); fs->source = sourceRoot(fs, super.active, mode); if(fs->source == nil){ vtSetError("sourceRoot: %R"); goto Err; } } //fprint(2, "%s: got fs source\n", argv0); vtRLock(fs->elk); fs->file = fileRoot(fs->source); fs->source->file = fs->file; /* point back */ vtRUnlock(fs->elk); if(fs->file == nil){ vtSetError("fileRoot: %R"); goto Err; } //fprint(2, "%s: got file root\n", argv0); if(mode == OReadWrite){ fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000); fs->snap = snapInit(fs); } return fs; Err: fprint(2, "%s: fsOpen error\n", argv0); fsClose(fs); return nil; }
static int archWalk(Param *p, u32int addr, uchar type, u32int tag) { int ret, i, x, psize, dsize; uchar *data, score[VtScoreSize]; Block *b; Label l; Entry *e; WalkPtr w; p->nvisit++; b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0); if(b == nil){ fprint(2, "archive(%ud, %#ux): cannot find block: %R\n", p->snapEpoch, addr); if(strcmp(vtGetError(), ELabelMismatch) == 0){ /* might as well plod on so we write _something_ to Venti */ memmove(p->score, vtZeroScore, VtScoreSize); return ArchFaked; } return ArchFailure; } if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n", p->depth*2, "", p->snapEpoch, b->addr, &b->l); p->depth++; if(p->depth > p->maxdepth) p->maxdepth = p->depth; data = b->data; if((b->l.state&BsVenti) == 0){ initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize); for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){ if(e){ if(!(e->flags&VtEntryActive)) continue; if((e->snap && !e->archive) || (e->flags&VtEntryNoArchive)){ if(0) fprint(2, "snap; faking %#ux\n", b->addr); if(data == b->data){ data = copyBlock(b, p->blockSize); if(data == nil){ ret = ArchFailure; goto Out; } w.data = data; } memmove(e->score, vtZeroScore, VtScoreSize); e->depth = 0; e->size = 0; e->tag = 0; e->flags &= ~VtEntryLocal; entryPack(e, data, w.n-1); continue; } } addr = globalToLocal(score); if(addr == NilBlock) continue; dsize = p->dsize; psize = p->psize; if(e){ p->dsize= e->dsize; p->psize = e->psize; } vtUnlock(b->lk); x = archWalk(p, addr, type, tag); vtLock(b->lk); if(e){ p->dsize = dsize; p->psize = psize; } while(b->iostate != BioClean && b->iostate != BioDirty) vtSleep(b->ioready); switch(x){ case ArchFailure: fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n", addr, b->addr, i); ret = ArchFailure; goto Out; case ArchFaked: /* * When we're writing the entry for an archive directory * (like /archive/2003/1215) then even if we've faked * any data, record the score unconditionally. * This way, we will always record the Venti score here. * Otherwise, temporary data or corrupted file system * would cause us to keep holding onto the on-disk * copy of the archive. */ if(e==nil || !e->archive) if(data == b->data){ if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score); data = copyBlock(b, p->blockSize); if(data == nil){ ret = ArchFailure; goto Out; } w.data = data; } /* fall through */ if(0) fprint(2, "falling\n"); case ArchSuccess: if(e){ memmove(e->score, p->score, VtScoreSize); e->flags &= ~VtEntryLocal; entryPack(e, data, w.n-1); }else memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize); if(data == b->data){ blockDirty(b); /* * If b is in the active tree, then we need to note that we've * just removed addr from the active tree (replacing it with the * copy we just stored to Venti). If addr is in other snapshots, * this will close addr but not free it, since it has a non-empty * epoch range. * * If b is in the active tree but has been copied (this can happen * if we get killed at just the right moment), then we will * mistakenly leak its kids. * * The children of an archive directory (e.g., /archive/2004/0604) * are not treated as in the active tree. */ if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0)) blockRemoveLink(b, addr, p->l.type, p->l.tag, 0); } break; } } if(!ventiSend(p->a, b, data)){ p->nfailsend++; ret = ArchFailure; goto Out; } p->nsend++; if(data != b->data) p->nfake++; if(data == b->data){ /* not faking it, so update state */ p->nreal++; l = b->l; l.state |= BsVenti; if(!blockSetLabel(b, &l, 0)){ ret = ArchFailure; goto Out; } } } shaBlock(p->score, b, data, p->blockSize); if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data); ret = data!=b->data ? ArchFaked : ArchSuccess; p->l = b->l; Out: if(data != b->data) vtMemFree(data); p->depth--; blockPut(b); return ret; }
/* * Retrieve the block containing the entry for r. * If a snapshot has happened, we might need * to get a new copy of the block. We avoid this * in the common case by caching the score for * the block and the last epoch in which it was valid. * * We use r->mode to tell the difference between active * file system sources (OReadWrite) and sources for the * snapshot file system (OReadOnly). */ static Block* sourceLoadBlock(Source *r, int mode) { uint32_t addr; Block *b; switch(r->mode) { default: assert(0); case OReadWrite: assert(r->mode == OReadWrite); /* * This needn't be true -- we might bump the low epoch * to reclaim some old blocks, but since this score is * OReadWrite, the blocks must all still be open, so none * are reclaimed. Thus it's okay that the epoch is so low. * Proceed. assert(r->epoch >= r->fs->elo); */ if(r->epoch == r->fs->ehi) { b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite); if(b == nil) return nil; assert(r->epoch == b->l.epoch); return b; } assert(r->parent != nil); if(!sourceLock(r->parent, OReadWrite)) return nil; b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite); sourceUnlock(r->parent); if(b == nil) return nil; assert(b->l.epoch == r->fs->ehi); // fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score); memmove(r->score, b->score, VtScoreSize); r->scoreEpoch = b->l.epoch; r->tag = b->l.tag; r->epoch = r->fs->ehi; return b; case OReadOnly: addr = globalToLocal(r->score); if(addr == NilBlock) return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode); b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch); if(b) return b; /* * If it failed because the epochs don't match, the block has been * archived and reclaimed. Rewalk from the parent and get the * new pointer. This can't happen in the OReadWrite case * above because blocks in the current epoch don't get * reclaimed. The fact that we're OReadOnly means we're * a snapshot. (Or else the file system is read-only, but then * the archiver isn't going around deleting blocks.) */ if(strcmp(vtGetError(), ELabelMismatch) == 0) { if(!sourceLock(r->parent, OReadOnly)) return nil; b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly); sourceUnlock(r->parent); if(b) { fprint(2, "sourceAlloc: lost %V found %V\n", r->score, b->score); memmove(r->score, b->score, VtScoreSize); r->scoreEpoch = b->l.epoch; return b; } } return nil; } }