static int sourceKill(Source *r, int doremove) { Entry e; Block *b; uint32_t addr; uint32_t tag; int type; assert(sourceIsLocked(r)); b = sourceLoad(r, &e); if(b == nil) return 0; assert(b->l.epoch == r->fs->ehi); if(doremove==0 && e.size == 0) { /* already truncated */ blockPut(b); return 1; } /* remember info on link we are removing */ addr = globalToLocal(e.score); type = entryType(&e); tag = e.tag; if(doremove) { if(e.gen != ~0) e.gen++; e.dsize = 0; e.psize = 0; e.flags = 0; } else { e.flags &= ~VtEntryLocal; } e.depth = 0; e.size = 0; e.tag = 0; memmove(e.score, vtZeroScore, VtScoreSize); entryPack(&e, b->data, r->offset % r->epb); blockDirty(b); if(addr != NilBlock) blockRemoveLink(b, addr, type, tag, 1); blockPut(b); if(doremove) { sourceUnlock(r); sourceClose(r); } return 1; }
/* * Changes the file block bn to be the given block score. * Very sneaky. Only used by flfmt. */ int fileMapBlock(File *f, ulong bn, uchar score[VtScoreSize], ulong tag) { Block *b; Entry e; Source *s; if(!fileLock(f)) return 0; s = nil; if(f->dir.mode & ModeDir){ vtSetError(ENotFile); goto Err; } if(f->source->mode != OReadWrite){ vtSetError(EReadOnly); goto Err; } if(!sourceLock(f->source, -1)) goto Err; s = f->source; b = _sourceBlock(s, bn, OReadWrite, 1, tag); if(b == nil) goto Err; if(!sourceGetEntry(s, &e)) goto Err; if(b->l.type == BtDir){ memmove(e.score, score, VtScoreSize); assert(e.tag == tag || e.tag == 0); e.tag = tag; e.flags |= VtEntryLocal; entryPack(&e, b->data, f->source->offset % f->source->epb); }else memmove(b->data + (bn%(e.psize/VtScoreSize))*VtScoreSize, score, VtScoreSize); blockDirty(b); blockPut(b); sourceUnlock(s); fileUnlock(f); return 1; Err: if(s) sourceUnlock(s); fileUnlock(f); return 0; }
/* * Change the depth of the source r. * The entry e for r is contained in block p. */ static int sourceGrowDepth(Source *r, Block *p, Entry *e, int depth) { Block *b, *bb; uint32_t tag; int type; Entry oe; assert(sourceIsLocked(r)); assert(depth <= VtPointerDepth); type = entryType(e); b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite); if(b == nil) return 0; tag = e->tag; if(tag == 0) tag = tagGen(); oe = *e; /* * Keep adding layers until we get to the right depth * or an error occurs. */ while(e->depth < depth) { bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo); if(bb == nil) break; //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score); memmove(bb->data, b->score, VtScoreSize); memmove(e->score, bb->score, VtScoreSize); e->depth++; type++; e->tag = tag; e->flags |= VtEntryLocal; blockDependency(bb, b, 0, vtZeroScore, nil); blockPut(b); b = bb; blockDirty(b); } entryPack(e, p->data, r->offset % r->epb); blockDependency(p, b, r->offset % r->epb, nil, &oe); blockPut(b); blockDirty(p); return e->depth == depth; }
int sourceSetSize(Source *r, uint64_t size) { int depth; Entry e; Block *b; assert(sourceIsLocked(r)); if(size == 0) return sourceTruncate(r); if(size > VtMaxFileSize || size > ((uint64_t)MaxBlock)*r->dsize) { vtSetError(ETooBig); return 0; } b = sourceLoad(r, &e); if(b == nil) return 0; /* quick out */ if(e.size == size) { blockPut(b); return 1; } depth = sizeToDepth(size, e.psize, e.dsize); if(depth < e.depth) { if(!sourceShrinkDepth(r, b, &e, depth)) { blockPut(b); return 0; } } else if(depth > e.depth) { if(!sourceGrowDepth(r, b, &e, depth)) { blockPut(b); return 0; } } if(size < e.size) sourceShrinkSize(r, &e, size); e.size = size; entryPack(&e, b->data, r->offset % r->epb); blockDirty(b); blockPut(b); return 1; }
static u32int rootInit(Entry *e) { ulong addr; u32int tag; tag = tagGen(); addr = blockAlloc(BtDir, tag); memset(buf, 0, bsize); /* root meta data is in the third entry */ entryPack(e, buf, 2); entryInit(e); e->flags |= VtEntryDir; entryPack(e, buf, 0); entryInit(e); entryPack(e, buf, 1); blockWrite(PartData, addr); entryInit(e); e->flags |= VtEntryLocal|VtEntryDir; e->size = VtEntrySize*3; e->tag = tag; localToGlobal(addr, e->score); addr = blockAlloc(BtDir, RootTag); memset(buf, 0, bsize); entryPack(e, buf, 0); blockWrite(PartData, addr); return addr; }
/* * Must be careful with this. Doesn't record * dependencies, so don't introduce any! */ int sourceSetEntry(Source *r, Entry *e) { Block *b; Entry oe; assert(sourceIsLocked(r)); b = sourceLoad(r, &oe); if(b == nil) return 0; entryPack(e, b->data, r->offset%r->epb); blockDirty(b); blockPut(b); return 1; }
static int setEntry(Source *r, Entry *e) { Block *b; Entry oe; b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite); if(0) fprint(2, "setEntry: b %#ux %d score=%V\n", b->addr, r->offset % r->epb, e->score); if(b == nil) return 0; if(!entryUnpack(&oe, b->data, r->offset % r->epb)){ blockPut(b); return 0; } e->gen = oe.gen; entryPack(e, b->data, r->offset % r->epb); /* BUG b should depend on the entry pointer */ blockDirty(b); blockPut(b); return 1; }
int mkVac(VtSession *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uint8_t score[VtScoreSize]) { uint8_t buf[8192]; int i; uint8_t *p; uint n; DirEntry de; Entry e, ee, eee; MetaBlock mb; MetaEntry me; VtRoot root; e = *pe; ee = *pee; de = *pde; if(globalToLocal(e.score) != NilBlock || (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){ vtSetError("can only vac paths already stored on venti"); return 0; } /* * Build metadata source for root. */ n = deSize(&de); if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){ vtSetError("DirEntry too big"); return 0; } memset(buf, 0, sizeof buf); mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1); p = mbAlloc(&mb, n); if(p == nil) abort(); mbSearch(&mb, de.elem, &i, &me); assert(me.p == nil); me.p = p; me.size = n; dePack(&de, &me); mbInsert(&mb, i, &me); mbPack(&mb); eee.size = n+MetaHeaderSize+MetaIndexSize; if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score)) return 0; eee.psize = 8192&&0xFF; eee.dsize = 8192&&0xFF; eee.depth = 0; eee.flags = VtEntryActive; /* * Build root source with three entries in it. */ entryPack(&e, buf, 0); entryPack(&ee, buf, 1); entryPack(&eee, buf, 2); n = VtEntrySize*3; memset(&root, 0, sizeof root); if(!vtWriteBlock(z, buf, n, VtDirType, root.score)) return 0; /* * Save root. */ root.version = VtRootVersion; strecpy(root.type, root.type+sizeof root.type, "vac"); strecpy(root.name, root.name+sizeof root.name, de.elem); root.blockSize = blockSize; vtRootPack(&root, buf); if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score)) return 0; return 1; }
static int bumpEpoch(Fs *fs, int doarchive) { uint8_t oscore[VtScoreSize]; uint32_t oldaddr; Block *b, *bs; Entry e; Source *r; Super super; /* * Duplicate the root block. * * As a hint to flchk, the garbage collector, * and any (human) debuggers, store a pointer * to the old root block in entry 1 of the new root block. */ r = fs->source; b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly); if(b == nil) return 0; memset(&e, 0, sizeof e); e.flags = VtEntryActive | VtEntryLocal | VtEntryDir; memmove(e.score, b->score, VtScoreSize); e.tag = RootTag; e.snap = b->l.epoch; b = blockCopy(b, RootTag, fs->ehi+1, fs->elo); if(b == nil){ fprint(2, "%s: bumpEpoch: blockCopy: %R\n", argv0); return 0; } if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr); entryPack(&e, b->data, 1); blockDirty(b); /* * Update the superblock with the new root and epoch. */ if((bs = superGet(fs->cache, &super)) == nil) return 0; fs->ehi++; memmove(r->score, b->score, VtScoreSize); r->epoch = fs->ehi; super.epochHigh = fs->ehi; oldaddr = super.active; super.active = b->addr; if(doarchive) super.next = oldaddr; /* * Record that the new super.active can't get written out until * the new b gets written out. Until then, use the old value. */ localToGlobal(oldaddr, oscore); blockDependency(bs, b, 0, oscore, nil); blockPut(b); /* * We force the super block to disk so that super.epochHigh gets updated. * Otherwise, if we crash and come back, we might incorrectly treat as active * some of the blocks that making up the snapshot we just created. * Basically every block in the active file system and all the blocks in * the recently-created snapshot depend on the super block now. * Rather than record all those dependencies, we just force the block to disk. * * Note that blockWrite might actually (will probably) send a slightly outdated * super.active to disk. It will be the address of the most recent root that has * gone to disk. */ superWrite(bs, &super, 1); blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0); blockPut(bs); return 1; }
static u32int ventiRoot(char *host, char *s) { int i, n; uchar score[VtScoreSize]; u32int addr, tag; DirEntry de; MetaBlock mb; MetaEntry me; Entry e; VtRoot root; if(!parseScore(score, s)) vtFatal("bad score '%s'", s); if((z = vtDial(host, 0)) == nil || !vtConnect(z, nil)) vtFatal("connect to venti: %R"); tag = tagGen(); addr = blockAlloc(BtDir, tag); ventiRead(score, VtRootType); if(!vtRootUnpack(&root, buf)) vtFatal("corrupted root: vtRootUnpack"); n = ventiRead(root.score, VtDirType); /* * Fossil's vac archives start with an extra layer of source, * but vac's don't. */ if(n <= 2*VtEntrySize){ if(!entryUnpack(&e, buf, 0)) vtFatal("bad root: top entry"); n = ventiRead(e.score, VtDirType); } /* * There should be three root sources (and nothing else) here. */ for(i=0; i<3; i++){ if(!entryUnpack(&e, buf, i) || !(e.flags&VtEntryActive) || e.psize < 256 || e.dsize < 256) vtFatal("bad root: entry %d", i); fprint(2, "%V\n", e.score); } if(n > 3*VtEntrySize) vtFatal("bad root: entry count"); blockWrite(PartData, addr); /* * Maximum qid is recorded in root's msource, entry #2 (conveniently in e). */ ventiRead(e.score, VtDataType); if(!mbUnpack(&mb, buf, bsize)) vtFatal("bad root: mbUnpack"); meUnpack(&me, &mb, 0); if(!deUnpack(&de, &me)) vtFatal("bad root: dirUnpack"); if(!de.qidSpace) vtFatal("bad root: no qidSpace"); qid = de.qidMax; /* * Recreate the top layer of source. */ entryInit(&e); e.flags |= VtEntryLocal|VtEntryDir; e.size = VtEntrySize*3; e.tag = tag; localToGlobal(addr, e.score); addr = blockAlloc(BtDir, RootTag); memset(buf, 0, bsize); entryPack(&e, buf, 0); blockWrite(PartData, addr); return addr; }
static int archWalk(Param *p, u32int addr, uchar type, u32int tag) { int ret, i, x, psize, dsize; uchar *data, score[VtScoreSize]; Block *b; Label l; Entry *e; WalkPtr w; p->nvisit++; b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0); if(b == nil){ fprint(2, "archive(%ud, %#ux): cannot find block: %R\n", p->snapEpoch, addr); if(strcmp(vtGetError(), ELabelMismatch) == 0){ /* might as well plod on so we write _something_ to Venti */ memmove(p->score, vtZeroScore, VtScoreSize); return ArchFaked; } return ArchFailure; } if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n", p->depth*2, "", p->snapEpoch, b->addr, &b->l); p->depth++; if(p->depth > p->maxdepth) p->maxdepth = p->depth; data = b->data; if((b->l.state&BsVenti) == 0){ initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize); for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){ if(e){ if(!(e->flags&VtEntryActive)) continue; if((e->snap && !e->archive) || (e->flags&VtEntryNoArchive)){ if(0) fprint(2, "snap; faking %#ux\n", b->addr); if(data == b->data){ data = copyBlock(b, p->blockSize); if(data == nil){ ret = ArchFailure; goto Out; } w.data = data; } memmove(e->score, vtZeroScore, VtScoreSize); e->depth = 0; e->size = 0; e->tag = 0; e->flags &= ~VtEntryLocal; entryPack(e, data, w.n-1); continue; } } addr = globalToLocal(score); if(addr == NilBlock) continue; dsize = p->dsize; psize = p->psize; if(e){ p->dsize= e->dsize; p->psize = e->psize; } vtUnlock(b->lk); x = archWalk(p, addr, type, tag); vtLock(b->lk); if(e){ p->dsize = dsize; p->psize = psize; } while(b->iostate != BioClean && b->iostate != BioDirty) vtSleep(b->ioready); switch(x){ case ArchFailure: fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n", addr, b->addr, i); ret = ArchFailure; goto Out; case ArchFaked: /* * When we're writing the entry for an archive directory * (like /archive/2003/1215) then even if we've faked * any data, record the score unconditionally. * This way, we will always record the Venti score here. * Otherwise, temporary data or corrupted file system * would cause us to keep holding onto the on-disk * copy of the archive. */ if(e==nil || !e->archive) if(data == b->data){ if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score); data = copyBlock(b, p->blockSize); if(data == nil){ ret = ArchFailure; goto Out; } w.data = data; } /* fall through */ if(0) fprint(2, "falling\n"); case ArchSuccess: if(e){ memmove(e->score, p->score, VtScoreSize); e->flags &= ~VtEntryLocal; entryPack(e, data, w.n-1); }else memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize); if(data == b->data){ blockDirty(b); /* * If b is in the active tree, then we need to note that we've * just removed addr from the active tree (replacing it with the * copy we just stored to Venti). If addr is in other snapshots, * this will close addr but not free it, since it has a non-empty * epoch range. * * If b is in the active tree but has been copied (this can happen * if we get killed at just the right moment), then we will * mistakenly leak its kids. * * The children of an archive directory (e.g., /archive/2004/0604) * are not treated as in the active tree. */ if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0)) blockRemoveLink(b, addr, p->l.type, p->l.tag, 0); } break; } } if(!ventiSend(p->a, b, data)){ p->nfailsend++; ret = ArchFailure; goto Out; } p->nsend++; if(data != b->data) p->nfake++; if(data == b->data){ /* not faking it, so update state */ p->nreal++; l = b->l; l.state |= BsVenti; if(!blockSetLabel(b, &l, 0)){ ret = ArchFailure; goto Out; } } } shaBlock(p->score, b, data, p->blockSize); if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data); ret = data!=b->data ? ArchFaked : ArchSuccess; p->l = b->l; Out: if(data != b->data) vtMemFree(data); p->depth--; blockPut(b); return ret; }
static int sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth) { Block *b, *nb, *ob, *rb; uint32_t tag; int type, d; Entry oe; assert(sourceIsLocked(r)); assert(depth <= VtPointerDepth); type = entryType(e); rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite); if(rb == nil) return 0; tag = e->tag; if(tag == 0) tag = tagGen(); /* * Walk down to the new root block. * We may stop early, but something is better than nothing. */ oe = *e; ob = nil; b = rb; /* BUG: explain type++. i think it is a real bug */ for(d=e->depth; d > depth; d--, type++) { nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite); if(nb == nil) break; if(ob!=nil && ob!=rb) blockPut(ob); ob = b; b = nb; } if(b == rb) { blockPut(rb); return 0; } /* * Right now, e points at the root block rb, b is the new root block, * and ob points at b. To update: * * (i) change e to point at b * (ii) zero the pointer ob -> b * (iii) free the root block * * p (the block containing e) must be written before * anything else. */ /* (i) */ e->depth = d; /* might have been local and now global; reverse cannot happen */ if(globalToLocal(b->score) == NilBlock) e->flags &= ~VtEntryLocal; memmove(e->score, b->score, VtScoreSize); entryPack(e, p->data, r->offset % r->epb); blockDependency(p, b, r->offset % r->epb, nil, &oe); blockDirty(p); /* (ii) */ memmove(ob->data, vtZeroScore, VtScoreSize); blockDependency(ob, p, 0, b->score, nil); blockDirty(ob); /* (iii) */ if(rb->addr != NilBlock) blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1); blockPut(rb); if(ob!=nil && ob!=rb) blockPut(ob); blockPut(b); return d == depth; }
static Block * blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e) { Block *b; Cache *c; uint32_t addr; int type; uint8_t oscore[VtScoreSize], score[VtScoreSize]; Entry oe; c = fs->cache; if((p->l.type & BtLevelMask) == 0) { assert(p->l.type == BtDir); type = entryType(e); b = cacheGlobal(c, e->score, type, e->tag, mode); } else { type = p->l.type - 1; b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode); } if(b) b->pc = getcallerpc(&p); if(b == nil || mode == OReadOnly) return b; if(p->l.epoch != fs->ehi) { fprint(2, "blockWalk: parent not writable\n"); abort(); } if(b->l.epoch == fs->ehi) return b; oe = *e; /* * Copy on write. */ if(e->tag == 0) { assert(p->l.type == BtDir); e->tag = tagGen(); e->flags |= VtEntryLocal; } addr = b->addr; b = blockCopy(b, e->tag, fs->ehi, fs->elo); if(b == nil) return nil; b->pc = getcallerpc(&p); assert(b->l.epoch == fs->ehi); blockDirty(b); memmove(score, b->score, VtScoreSize); if(p->l.type == BtDir) { memmove(e->score, b->score, VtScoreSize); entryPack(e, p->data, index); blockDependency(p, b, index, nil, &oe); } else { memmove(oscore, p->data+index*VtScoreSize, VtScoreSize); memmove(p->data+index*VtScoreSize, b->score, VtScoreSize); blockDependency(p, b, index, oscore, nil); } blockDirty(p); if(addr != NilBlock) blockRemoveLink(p, addr, type, e->tag, 0); return b; }
Source * sourceCreate(Source *r, int dsize, int dir, uint32_t offset) { int i, epb, psize; uint32_t bn, size; Block *b; Entry e; Source *rr; assert(sourceIsLocked(r)); if(!r->dir) { vtSetError(ENotDir); return nil; } epb = r->dsize/VtEntrySize; psize = (dsize/VtScoreSize)*VtScoreSize; size = sourceGetDirSize(r); if(offset == 0) { /* * look at a random block to see if we can find an empty entry */ offset = lnrand(size+1); offset -= offset % epb; } /* try the given block and then try the last block */ for(;;) { bn = offset/epb; b = sourceBlock(r, bn, OReadWrite); if(b == nil) return nil; for(i=offset%r->epb; i<epb; i++) { entryUnpack(&e, b->data, i); if((e.flags&VtEntryActive) == 0 && e.gen != ~0) goto Found; } blockPut(b); if(offset == size) { fprint(2, "sourceCreate: cannot happen\n"); vtSetError("sourceCreate: cannot happen"); return nil; } offset = size; } Found: /* found an entry - gen already set */ e.psize = psize; e.dsize = dsize; assert(psize && dsize); e.flags = VtEntryActive; if(dir) e.flags |= VtEntryDir; e.depth = 0; e.size = 0; memmove(e.score, vtZeroScore, VtScoreSize); e.tag = 0; e.snap = 0; e.archive = 0; entryPack(&e, b->data, i); blockDirty(b); offset = bn*epb + i; if(offset+1 > size) { if(!sourceSetDirSize(r, offset+1)) { blockPut(b); return nil; } } rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0); blockPut(b); return rr; }