static int sourceKill(Source *r, int doremove) { Entry e; Block *b; uint32_t addr; uint32_t tag; int type; assert(sourceIsLocked(r)); b = sourceLoad(r, &e); if(b == nil) return 0; assert(b->l.epoch == r->fs->ehi); if(doremove==0 && e.size == 0) { /* already truncated */ blockPut(b); return 1; } /* remember info on link we are removing */ addr = globalToLocal(e.score); type = entryType(&e); tag = e.tag; if(doremove) { if(e.gen != ~0) e.gen++; e.dsize = 0; e.psize = 0; e.flags = 0; } else { e.flags &= ~VtEntryLocal; } e.depth = 0; e.size = 0; e.tag = 0; memmove(e.score, vtZeroScore, VtScoreSize); entryPack(&e, b->data, r->offset % r->epb); blockDirty(b); if(addr != NilBlock) blockRemoveLink(b, addr, type, tag, 1); blockPut(b); if(doremove) { sourceUnlock(r); sourceClose(r); } return 1; }
static int bumpEpoch(Fs *fs, int doarchive) { uint8_t oscore[VtScoreSize]; uint32_t oldaddr; Block *b, *bs; Entry e; Source *r; Super super; /* * Duplicate the root block. * * As a hint to flchk, the garbage collector, * and any (human) debuggers, store a pointer * to the old root block in entry 1 of the new root block. */ r = fs->source; b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly); if(b == nil) return 0; memset(&e, 0, sizeof e); e.flags = VtEntryActive | VtEntryLocal | VtEntryDir; memmove(e.score, b->score, VtScoreSize); e.tag = RootTag; e.snap = b->l.epoch; b = blockCopy(b, RootTag, fs->ehi+1, fs->elo); if(b == nil){ fprint(2, "%s: bumpEpoch: blockCopy: %R\n", argv0); return 0; } if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr); entryPack(&e, b->data, 1); blockDirty(b); /* * Update the superblock with the new root and epoch. */ if((bs = superGet(fs->cache, &super)) == nil) return 0; fs->ehi++; memmove(r->score, b->score, VtScoreSize); r->epoch = fs->ehi; super.epochHigh = fs->ehi; oldaddr = super.active; super.active = b->addr; if(doarchive) super.next = oldaddr; /* * Record that the new super.active can't get written out until * the new b gets written out. Until then, use the old value. */ localToGlobal(oldaddr, oscore); blockDependency(bs, b, 0, oscore, nil); blockPut(b); /* * We force the super block to disk so that super.epochHigh gets updated. * Otherwise, if we crash and come back, we might incorrectly treat as active * some of the blocks that making up the snapshot we just created. * Basically every block in the active file system and all the blocks in * the recently-created snapshot depend on the super block now. * Rather than record all those dependencies, we just force the block to disk. * * Note that blockWrite might actually (will probably) send a slightly outdated * super.active to disk. It will be the address of the most recent root that has * gone to disk. */ superWrite(bs, &super, 1); blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0); blockPut(bs); return 1; }
Fs * fsOpen(char *file, VtSession *z, int32_t ncache, int mode) { int fd, m; uint8_t oscore[VtScoreSize]; Block *b, *bs; Disk *disk; Fs *fs; Super super; switch(mode){ default: vtSetError(EBadMode); return nil; case OReadOnly: m = OREAD; break; case OReadWrite: m = ORDWR; break; } fd = open(file, m); if(fd < 0){ vtSetError("open %s: %r", file); return nil; } bwatchInit(); disk = diskAlloc(fd); if(disk == nil){ vtSetError("diskAlloc: %R"); close(fd); return nil; } fs = vtMemAllocZ(sizeof(Fs)); fs->mode = mode; fs->name = vtStrDup(file); fs->blockSize = diskBlockSize(disk); fs->elk = vtLockAlloc(); fs->cache = cacheAlloc(disk, z, ncache, mode); if(mode == OReadWrite && z) fs->arch = archInit(fs->cache, disk, fs, z); fs->z = z; b = cacheLocal(fs->cache, PartSuper, 0, mode); if(b == nil) goto Err; if(!superUnpack(&super, b->data)){ blockPut(b); vtSetError("bad super block"); goto Err; } blockPut(b); fs->ehi = super.epochHigh; fs->elo = super.epochLow; //fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active); fs->source = sourceRoot(fs, super.active, mode); if(fs->source == nil){ /* * Perhaps it failed because the block is copy-on-write. * Do the copy and try again. */ if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0) goto Err; b = cacheLocalData(fs->cache, super.active, BtDir, RootTag, OReadWrite, 0); if(b == nil){ vtSetError("cacheLocalData: %R"); goto Err; } if(b->l.epoch == fs->ehi){ blockPut(b); vtSetError("bad root source block"); goto Err; } b = blockCopy(b, RootTag, fs->ehi, fs->elo); if(b == nil) goto Err; localToGlobal(super.active, oscore); super.active = b->addr; bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite); if(bs == nil){ blockPut(b); vtSetError("cacheLocal: %R"); goto Err; } superPack(&super, bs->data); blockDependency(bs, b, 0, oscore, nil); blockPut(b); blockDirty(bs); blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0); blockPut(bs); fs->source = sourceRoot(fs, super.active, mode); if(fs->source == nil){ vtSetError("sourceRoot: %R"); goto Err; } } //fprint(2, "%s: got fs source\n", argv0); vtRLock(fs->elk); fs->file = fileRoot(fs->source); fs->source->file = fs->file; /* point back */ vtRUnlock(fs->elk); if(fs->file == nil){ vtSetError("fileRoot: %R"); goto Err; } //fprint(2, "%s: got file root\n", argv0); if(mode == OReadWrite){ fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000); fs->snap = snapInit(fs); } return fs; Err: fprint(2, "%s: fsOpen error\n", argv0); fsClose(fs); return nil; }
static int archWalk(Param *p, u32int addr, uchar type, u32int tag) { int ret, i, x, psize, dsize; uchar *data, score[VtScoreSize]; Block *b; Label l; Entry *e; WalkPtr w; p->nvisit++; b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0); if(b == nil){ fprint(2, "archive(%ud, %#ux): cannot find block: %R\n", p->snapEpoch, addr); if(strcmp(vtGetError(), ELabelMismatch) == 0){ /* might as well plod on so we write _something_ to Venti */ memmove(p->score, vtZeroScore, VtScoreSize); return ArchFaked; } return ArchFailure; } if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n", p->depth*2, "", p->snapEpoch, b->addr, &b->l); p->depth++; if(p->depth > p->maxdepth) p->maxdepth = p->depth; data = b->data; if((b->l.state&BsVenti) == 0){ initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize); for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){ if(e){ if(!(e->flags&VtEntryActive)) continue; if((e->snap && !e->archive) || (e->flags&VtEntryNoArchive)){ if(0) fprint(2, "snap; faking %#ux\n", b->addr); if(data == b->data){ data = copyBlock(b, p->blockSize); if(data == nil){ ret = ArchFailure; goto Out; } w.data = data; } memmove(e->score, vtZeroScore, VtScoreSize); e->depth = 0; e->size = 0; e->tag = 0; e->flags &= ~VtEntryLocal; entryPack(e, data, w.n-1); continue; } } addr = globalToLocal(score); if(addr == NilBlock) continue; dsize = p->dsize; psize = p->psize; if(e){ p->dsize= e->dsize; p->psize = e->psize; } vtUnlock(b->lk); x = archWalk(p, addr, type, tag); vtLock(b->lk); if(e){ p->dsize = dsize; p->psize = psize; } while(b->iostate != BioClean && b->iostate != BioDirty) vtSleep(b->ioready); switch(x){ case ArchFailure: fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n", addr, b->addr, i); ret = ArchFailure; goto Out; case ArchFaked: /* * When we're writing the entry for an archive directory * (like /archive/2003/1215) then even if we've faked * any data, record the score unconditionally. * This way, we will always record the Venti score here. * Otherwise, temporary data or corrupted file system * would cause us to keep holding onto the on-disk * copy of the archive. */ if(e==nil || !e->archive) if(data == b->data){ if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score); data = copyBlock(b, p->blockSize); if(data == nil){ ret = ArchFailure; goto Out; } w.data = data; } /* fall through */ if(0) fprint(2, "falling\n"); case ArchSuccess: if(e){ memmove(e->score, p->score, VtScoreSize); e->flags &= ~VtEntryLocal; entryPack(e, data, w.n-1); }else memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize); if(data == b->data){ blockDirty(b); /* * If b is in the active tree, then we need to note that we've * just removed addr from the active tree (replacing it with the * copy we just stored to Venti). If addr is in other snapshots, * this will close addr but not free it, since it has a non-empty * epoch range. * * If b is in the active tree but has been copied (this can happen * if we get killed at just the right moment), then we will * mistakenly leak its kids. * * The children of an archive directory (e.g., /archive/2004/0604) * are not treated as in the active tree. */ if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0)) blockRemoveLink(b, addr, p->l.type, p->l.tag, 0); } break; } } if(!ventiSend(p->a, b, data)){ p->nfailsend++; ret = ArchFailure; goto Out; } p->nsend++; if(data != b->data) p->nfake++; if(data == b->data){ /* not faking it, so update state */ p->nreal++; l = b->l; l.state |= BsVenti; if(!blockSetLabel(b, &l, 0)){ ret = ArchFailure; goto Out; } } } shaBlock(p->score, b, data, p->blockSize); if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data); ret = data!=b->data ? ArchFaked : ArchSuccess; p->l = b->l; Out: if(data != b->data) vtMemFree(data); p->depth--; blockPut(b); return ret; }
static int sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth) { Block *b, *nb, *ob, *rb; uint32_t tag; int type, d; Entry oe; assert(sourceIsLocked(r)); assert(depth <= VtPointerDepth); type = entryType(e); rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite); if(rb == nil) return 0; tag = e->tag; if(tag == 0) tag = tagGen(); /* * Walk down to the new root block. * We may stop early, but something is better than nothing. */ oe = *e; ob = nil; b = rb; /* BUG: explain type++. i think it is a real bug */ for(d=e->depth; d > depth; d--, type++) { nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite); if(nb == nil) break; if(ob!=nil && ob!=rb) blockPut(ob); ob = b; b = nb; } if(b == rb) { blockPut(rb); return 0; } /* * Right now, e points at the root block rb, b is the new root block, * and ob points at b. To update: * * (i) change e to point at b * (ii) zero the pointer ob -> b * (iii) free the root block * * p (the block containing e) must be written before * anything else. */ /* (i) */ e->depth = d; /* might have been local and now global; reverse cannot happen */ if(globalToLocal(b->score) == NilBlock) e->flags &= ~VtEntryLocal; memmove(e->score, b->score, VtScoreSize); entryPack(e, p->data, r->offset % r->epb); blockDependency(p, b, r->offset % r->epb, nil, &oe); blockDirty(p); /* (ii) */ memmove(ob->data, vtZeroScore, VtScoreSize); blockDependency(ob, p, 0, b->score, nil); blockDirty(ob); /* (iii) */ if(rb->addr != NilBlock) blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1); blockPut(rb); if(ob!=nil && ob!=rb) blockPut(ob); blockPut(b); return d == depth; }
static Block * blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e) { Block *b; Cache *c; uint32_t addr; int type; uint8_t oscore[VtScoreSize], score[VtScoreSize]; Entry oe; c = fs->cache; if((p->l.type & BtLevelMask) == 0) { assert(p->l.type == BtDir); type = entryType(e); b = cacheGlobal(c, e->score, type, e->tag, mode); } else { type = p->l.type - 1; b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode); } if(b) b->pc = getcallerpc(&p); if(b == nil || mode == OReadOnly) return b; if(p->l.epoch != fs->ehi) { fprint(2, "blockWalk: parent not writable\n"); abort(); } if(b->l.epoch == fs->ehi) return b; oe = *e; /* * Copy on write. */ if(e->tag == 0) { assert(p->l.type == BtDir); e->tag = tagGen(); e->flags |= VtEntryLocal; } addr = b->addr; b = blockCopy(b, e->tag, fs->ehi, fs->elo); if(b == nil) return nil; b->pc = getcallerpc(&p); assert(b->l.epoch == fs->ehi); blockDirty(b); memmove(score, b->score, VtScoreSize); if(p->l.type == BtDir) { memmove(e->score, b->score, VtScoreSize); entryPack(e, p->data, index); blockDependency(p, b, index, nil, &oe); } else { memmove(oscore, p->data+index*VtScoreSize, VtScoreSize); memmove(p->data+index*VtScoreSize, b->score, VtScoreSize); blockDependency(p, b, index, oscore, nil); } blockDirty(p); if(addr != NilBlock) blockRemoveLink(p, addr, type, e->tag, 0); return b; }
static int sourceShrinkSize(Source *r, Entry *e, uint64_t size) { int i, type, ppb; uint64_t ptrsz; uint32_t addr; uint8_t score[VtScoreSize]; Block *b; type = entryType(e); b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite); if(b == nil) return 0; ptrsz = e->dsize; ppb = e->psize/VtScoreSize; for(i=0; i+1<e->depth; i++) ptrsz *= ppb; while(type&BtLevelMask) { if(b->addr == NilBlock || b->l.epoch != r->fs->ehi) { /* not worth copying the block just so we can zero some of it */ blockPut(b); return 0; } /* * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes */ /* zero the pointers to unnecessary blocks */ i = (size+ptrsz-1)/ptrsz; for(; i<ppb; i++) { addr = globalToLocal(b->data+i*VtScoreSize); memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize); blockDirty(b); if(addr != NilBlock) blockRemoveLink(b, addr, type-1, e->tag, 1); } /* recurse (go around again) on the partially necessary block */ i = size/ptrsz; size = size%ptrsz; if(size == 0) { blockPut(b); return 1; } ptrsz /= ppb; type--; memmove(score, b->data+i*VtScoreSize, VtScoreSize); blockPut(b); b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite); if(b == nil) return 0; } if(b->addr == NilBlock || b->l.epoch != r->fs->ehi) { blockPut(b); return 0; } /* * No one ever truncates BtDir blocks. */ if(type == BtData && e->dsize > size) { memset(b->data+size, 0, e->dsize-size); blockDirty(b); } blockPut(b); return 1; }