Beispiel #1
0
static int
sourceKill(Source *r, int doremove)
{
    Entry e;
    Block *b;
    uint32_t addr;
    uint32_t tag;
    int type;

    assert(sourceIsLocked(r));
    b = sourceLoad(r, &e);
    if(b == nil)
        return 0;

    assert(b->l.epoch == r->fs->ehi);

    if(doremove==0 && e.size == 0) {
        /* already truncated */
        blockPut(b);
        return 1;
    }

    /* remember info on link we are removing */
    addr = globalToLocal(e.score);
    type = entryType(&e);
    tag = e.tag;

    if(doremove) {
        if(e.gen != ~0)
            e.gen++;
        e.dsize = 0;
        e.psize = 0;
        e.flags = 0;
    } else {
        e.flags &= ~VtEntryLocal;
    }
    e.depth = 0;
    e.size = 0;
    e.tag = 0;
    memmove(e.score, vtZeroScore, VtScoreSize);
    entryPack(&e, b->data, r->offset % r->epb);
    blockDirty(b);
    if(addr != NilBlock)
        blockRemoveLink(b, addr, type, tag, 1);
    blockPut(b);

    if(doremove) {
        sourceUnlock(r);
        sourceClose(r);
    }

    return 1;
}
Beispiel #2
0
static int
bumpEpoch(Fs *fs, int doarchive)
{
	uint8_t oscore[VtScoreSize];
	uint32_t oldaddr;
	Block *b, *bs;
	Entry e;
	Source *r;
	Super super;

	/*
	 * Duplicate the root block.
	 *
	 * As a hint to flchk, the garbage collector,
	 * and any (human) debuggers, store a pointer
	 * to the old root block in entry 1 of the new root block.
	 */
	r = fs->source;
	b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
	if(b == nil)
		return 0;

	memset(&e, 0, sizeof e);
	e.flags = VtEntryActive | VtEntryLocal | VtEntryDir;
	memmove(e.score, b->score, VtScoreSize);
	e.tag = RootTag;
	e.snap = b->l.epoch;

	b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
	if(b == nil){
		fprint(2, "%s: bumpEpoch: blockCopy: %R\n", argv0);
		return 0;
	}

	if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr);
	entryPack(&e, b->data, 1);
	blockDirty(b);

	/*
	 * Update the superblock with the new root and epoch.
	 */
	if((bs = superGet(fs->cache, &super)) == nil)
		return 0;

	fs->ehi++;
	memmove(r->score, b->score, VtScoreSize);
	r->epoch = fs->ehi;

	super.epochHigh = fs->ehi;
	oldaddr = super.active;
	super.active = b->addr;
	if(doarchive)
		super.next = oldaddr;

	/*
	 * Record that the new super.active can't get written out until
	 * the new b gets written out.  Until then, use the old value.
	 */
	localToGlobal(oldaddr, oscore);
	blockDependency(bs, b, 0, oscore, nil);
	blockPut(b);

	/*
	 * We force the super block to disk so that super.epochHigh gets updated.
	 * Otherwise, if we crash and come back, we might incorrectly treat as active
	 * some of the blocks that making up the snapshot we just created.
	 * Basically every block in the active file system and all the blocks in
	 * the recently-created snapshot depend on the super block now.
	 * Rather than record all those dependencies, we just force the block to disk.
	 *
	 * Note that blockWrite might actually (will probably) send a slightly outdated
	 * super.active to disk.  It will be the address of the most recent root that has
	 * gone to disk.
	 */
	superWrite(bs, &super, 1);
	blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
	blockPut(bs);

	return 1;
}
Beispiel #3
0
Fs *
fsOpen(char *file, VtSession *z, int32_t ncache, int mode)
{
	int fd, m;
	uint8_t oscore[VtScoreSize];
	Block *b, *bs;
	Disk *disk;
	Fs *fs;
	Super super;

	switch(mode){
	default:
		vtSetError(EBadMode);
		return nil;
	case OReadOnly:
		m = OREAD;
		break;
	case OReadWrite:
		m = ORDWR;
		break;
	}
	fd = open(file, m);
	if(fd < 0){
		vtSetError("open %s: %r", file);
		return nil;
	}

	bwatchInit();
	disk = diskAlloc(fd);
	if(disk == nil){
		vtSetError("diskAlloc: %R");
		close(fd);
		return nil;
	}

	fs = vtMemAllocZ(sizeof(Fs));
	fs->mode = mode;
	fs->name = vtStrDup(file);
	fs->blockSize = diskBlockSize(disk);
	fs->elk = vtLockAlloc();
	fs->cache = cacheAlloc(disk, z, ncache, mode);
	if(mode == OReadWrite && z)
		fs->arch = archInit(fs->cache, disk, fs, z);
	fs->z = z;

	b = cacheLocal(fs->cache, PartSuper, 0, mode);
	if(b == nil)
		goto Err;
	if(!superUnpack(&super, b->data)){
		blockPut(b);
		vtSetError("bad super block");
		goto Err;
	}
	blockPut(b);

	fs->ehi = super.epochHigh;
	fs->elo = super.epochLow;

//fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active);

	fs->source = sourceRoot(fs, super.active, mode);
	if(fs->source == nil){
		/*
		 * Perhaps it failed because the block is copy-on-write.
		 * Do the copy and try again.
		 */
		if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0)
			goto Err;
		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag,
			OReadWrite, 0);
		if(b == nil){
			vtSetError("cacheLocalData: %R");
			goto Err;
		}
		if(b->l.epoch == fs->ehi){
			blockPut(b);
			vtSetError("bad root source block");
			goto Err;
		}
		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
		if(b == nil)
			goto Err;
		localToGlobal(super.active, oscore);
		super.active = b->addr;
		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
		if(bs == nil){
			blockPut(b);
			vtSetError("cacheLocal: %R");
			goto Err;
		}
		superPack(&super, bs->data);
		blockDependency(bs, b, 0, oscore, nil);
		blockPut(b);
		blockDirty(bs);
		blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
		blockPut(bs);
		fs->source = sourceRoot(fs, super.active, mode);
		if(fs->source == nil){
			vtSetError("sourceRoot: %R");
			goto Err;
		}
	}

//fprint(2, "%s: got fs source\n", argv0);

	vtRLock(fs->elk);
	fs->file = fileRoot(fs->source);
	fs->source->file = fs->file;		/* point back */
	vtRUnlock(fs->elk);
	if(fs->file == nil){
		vtSetError("fileRoot: %R");
		goto Err;
	}

//fprint(2, "%s: got file root\n", argv0);

	if(mode == OReadWrite){
		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
		fs->snap = snapInit(fs);
	}
	return fs;

Err:
fprint(2, "%s: fsOpen error\n", argv0);
	fsClose(fs);
	return nil;
}
Beispiel #4
0
static int
archWalk(Param *p, u32int addr, uchar type, u32int tag)
{
	int ret, i, x, psize, dsize;
	uchar *data, score[VtScoreSize];
	Block *b;
	Label l;
	Entry *e;
	WalkPtr w;

	p->nvisit++;

	b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0);
	if(b == nil){
		fprint(2, "archive(%ud, %#ux): cannot find block: %R\n", p->snapEpoch, addr);
		if(strcmp(vtGetError(), ELabelMismatch) == 0){
			/* might as well plod on so we write _something_ to Venti */
			memmove(p->score, vtZeroScore, VtScoreSize);
			return ArchFaked;
		}
		return ArchFailure;
	}

	if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n",
		p->depth*2, "",  p->snapEpoch, b->addr, &b->l);
	p->depth++;
	if(p->depth > p->maxdepth)
		p->maxdepth = p->depth;

	data = b->data;
	if((b->l.state&BsVenti) == 0){
		initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize);
		for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){
			if(e){
				if(!(e->flags&VtEntryActive))
					continue;
				if((e->snap && !e->archive)
				|| (e->flags&VtEntryNoArchive)){
					if(0) fprint(2, "snap; faking %#ux\n", b->addr);
					if(data == b->data){
						data = copyBlock(b, p->blockSize);
						if(data == nil){
							ret = ArchFailure;
							goto Out;
						}
						w.data = data;
					}
					memmove(e->score, vtZeroScore, VtScoreSize);
					e->depth = 0;
					e->size = 0;
					e->tag = 0;
					e->flags &= ~VtEntryLocal;
					entryPack(e, data, w.n-1);
					continue;
				}
			}
			addr = globalToLocal(score);
			if(addr == NilBlock)
				continue;
			dsize = p->dsize;
			psize = p->psize;
			if(e){
				p->dsize= e->dsize;
				p->psize = e->psize;
			}
			vtUnlock(b->lk);
			x = archWalk(p, addr, type, tag);
			vtLock(b->lk);
			if(e){
				p->dsize = dsize;
				p->psize = psize;
			}
			while(b->iostate != BioClean && b->iostate != BioDirty)
				vtSleep(b->ioready);
			switch(x){
			case ArchFailure:
				fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n",
					addr, b->addr, i);
				ret = ArchFailure;
				goto Out;
			case ArchFaked:
				/*
				 * When we're writing the entry for an archive directory
				 * (like /archive/2003/1215) then even if we've faked
				 * any data, record the score unconditionally.
				 * This way, we will always record the Venti score here.
				 * Otherwise, temporary data or corrupted file system
				 * would cause us to keep holding onto the on-disk
				 * copy of the archive.
				 */
				if(e==nil || !e->archive)
				if(data == b->data){
if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score);
					data = copyBlock(b, p->blockSize);
					if(data == nil){
						ret = ArchFailure;
						goto Out;
					}
					w.data = data;
				}
				/* fall through */
if(0) fprint(2, "falling\n");
			case ArchSuccess:
				if(e){
					memmove(e->score, p->score, VtScoreSize);
					e->flags &= ~VtEntryLocal;
					entryPack(e, data, w.n-1);
				}else
					memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize);
				if(data == b->data){
					blockDirty(b);
					/*
					 * If b is in the active tree, then we need to note that we've
					 * just removed addr from the active tree (replacing it with the 
					 * copy we just stored to Venti).  If addr is in other snapshots,
					 * this will close addr but not free it, since it has a non-empty
					 * epoch range.
					 *
					 * If b is in the active tree but has been copied (this can happen
					 * if we get killed at just the right moment), then we will
					 * mistakenly leak its kids.  
					 *
					 * The children of an archive directory (e.g., /archive/2004/0604)
					 * are not treated as in the active tree.
					 */
					if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0))
						blockRemoveLink(b, addr, p->l.type, p->l.tag, 0);
				}
				break;
			}
		}

		if(!ventiSend(p->a, b, data)){
			p->nfailsend++;
			ret = ArchFailure;
			goto Out;
		}
		p->nsend++;
		if(data != b->data)
			p->nfake++;
		if(data == b->data){	/* not faking it, so update state */
			p->nreal++;
			l = b->l;
			l.state |= BsVenti;
			if(!blockSetLabel(b, &l, 0)){
				ret = ArchFailure;
				goto Out;
			}
		}
	}

	shaBlock(p->score, b, data, p->blockSize);
if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data);
	ret = data!=b->data ? ArchFaked : ArchSuccess;
	p->l = b->l;
Out:
	if(data != b->data)
		vtMemFree(data);
	p->depth--;
	blockPut(b);
	return ret;
}
Beispiel #5
0
static int
sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
{
    Block *b, *nb, *ob, *rb;
    uint32_t tag;
    int type, d;
    Entry oe;

    assert(sourceIsLocked(r));
    assert(depth <= VtPointerDepth);

    type = entryType(e);
    rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
    if(rb == nil)
        return 0;

    tag = e->tag;
    if(tag == 0)
        tag = tagGen();

    /*
     * Walk down to the new root block.
     * We may stop early, but something is better than nothing.
     */
    oe = *e;

    ob = nil;
    b = rb;
    /* BUG: explain type++.  i think it is a real bug */
    for(d=e->depth; d > depth; d--, type++) {
        nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
        if(nb == nil)
            break;
        if(ob!=nil && ob!=rb)
            blockPut(ob);
        ob = b;
        b = nb;
    }

    if(b == rb) {
        blockPut(rb);
        return 0;
    }

    /*
     * Right now, e points at the root block rb, b is the new root block,
     * and ob points at b.  To update:
     *
     *	(i) change e to point at b
     *	(ii) zero the pointer ob -> b
     *	(iii) free the root block
     *
     * p (the block containing e) must be written before
     * anything else.
     */

    /* (i) */
    e->depth = d;
    /* might have been local and now global; reverse cannot happen */
    if(globalToLocal(b->score) == NilBlock)
        e->flags &= ~VtEntryLocal;
    memmove(e->score, b->score, VtScoreSize);
    entryPack(e, p->data, r->offset % r->epb);
    blockDependency(p, b, r->offset % r->epb, nil, &oe);
    blockDirty(p);

    /* (ii) */
    memmove(ob->data, vtZeroScore, VtScoreSize);
    blockDependency(ob, p, 0, b->score, nil);
    blockDirty(ob);

    /* (iii) */
    if(rb->addr != NilBlock)
        blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);

    blockPut(rb);
    if(ob!=nil && ob!=rb)
        blockPut(ob);
    blockPut(b);

    return d == depth;
}
Beispiel #6
0
static Block *
blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
{
    Block *b;
    Cache *c;
    uint32_t addr;
    int type;
    uint8_t oscore[VtScoreSize], score[VtScoreSize];
    Entry oe;

    c = fs->cache;

    if((p->l.type & BtLevelMask) == 0) {
        assert(p->l.type == BtDir);
        type = entryType(e);
        b = cacheGlobal(c, e->score, type, e->tag, mode);
    } else {
        type = p->l.type - 1;
        b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
    }

    if(b)
        b->pc = getcallerpc(&p);

    if(b == nil || mode == OReadOnly)
        return b;

    if(p->l.epoch != fs->ehi) {
        fprint(2, "blockWalk: parent not writable\n");
        abort();
    }
    if(b->l.epoch == fs->ehi)
        return b;

    oe = *e;

    /*
     * Copy on write.
     */
    if(e->tag == 0) {
        assert(p->l.type == BtDir);
        e->tag = tagGen();
        e->flags |= VtEntryLocal;
    }

    addr = b->addr;
    b = blockCopy(b, e->tag, fs->ehi, fs->elo);
    if(b == nil)
        return nil;

    b->pc = getcallerpc(&p);
    assert(b->l.epoch == fs->ehi);

    blockDirty(b);
    memmove(score, b->score, VtScoreSize);
    if(p->l.type == BtDir) {
        memmove(e->score, b->score, VtScoreSize);
        entryPack(e, p->data, index);
        blockDependency(p, b, index, nil, &oe);
    } else {
        memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
        memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
        blockDependency(p, b, index, oscore, nil);
    }
    blockDirty(p);

    if(addr != NilBlock)
        blockRemoveLink(p, addr, type, e->tag, 0);

    return b;
}
Beispiel #7
0
static int
sourceShrinkSize(Source *r, Entry *e, uint64_t size)
{
    int i, type, ppb;
    uint64_t ptrsz;
    uint32_t addr;
    uint8_t score[VtScoreSize];
    Block *b;

    type = entryType(e);
    b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
    if(b == nil)
        return 0;

    ptrsz = e->dsize;
    ppb = e->psize/VtScoreSize;
    for(i=0; i+1<e->depth; i++)
        ptrsz *= ppb;

    while(type&BtLevelMask) {
        if(b->addr == NilBlock || b->l.epoch != r->fs->ehi) {
            /* not worth copying the block just so we can zero some of it */
            blockPut(b);
            return 0;
        }

        /*
         * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
         */

        /* zero the pointers to unnecessary blocks */
        i = (size+ptrsz-1)/ptrsz;
        for(; i<ppb; i++) {
            addr = globalToLocal(b->data+i*VtScoreSize);
            memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
            blockDirty(b);
            if(addr != NilBlock)
                blockRemoveLink(b, addr, type-1, e->tag, 1);
        }

        /* recurse (go around again) on the partially necessary block */
        i = size/ptrsz;
        size = size%ptrsz;
        if(size == 0) {
            blockPut(b);
            return 1;
        }
        ptrsz /= ppb;
        type--;
        memmove(score, b->data+i*VtScoreSize, VtScoreSize);
        blockPut(b);
        b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
        if(b == nil)
            return 0;
    }

    if(b->addr == NilBlock || b->l.epoch != r->fs->ehi) {
        blockPut(b);
        return 0;
    }

    /*
     * No one ever truncates BtDir blocks.
     */
    if(type == BtData && e->dsize > size) {
        memset(b->data+size, 0, e->dsize-size);
        blockDirty(b);
    }
    blockPut(b);
    return 1;
}