Exemple #1
0
static void
checkEpoch(Fsck *chk, u32int epoch)
{
	u32int a;
	Block *b;
	Entry e;
	Label l;

	chk->print("checking epoch %ud...\n", epoch);

	for(a=0; a<chk->nblocks; a++){
		if(!readLabel(chk->cache, &l, (a+chk->hint)%chk->nblocks)){
			error(chk, "could not read label for addr 0x%.8#ux", a);
			continue;
		}
		if(l.tag == RootTag && l.epoch == epoch)
			break;
	}

	if(a == chk->nblocks){
		chk->print("could not find root block for epoch %ud", epoch);
		return;
	}

	a = (a+chk->hint)%chk->nblocks;
	b = cacheLocalData(chk->cache, a, BtDir, RootTag, OReadOnly, 0);
	if(b == nil){
		error(chk, "could not read root block 0x%.8#ux: %R", a);
		return;
	}

	/* no one should point at root blocks */
	setBit(chk->amap, a);
	setBit(chk->emap, a);
	setBit(chk->xmap, a);

	/*
	 * First entry is the rest of the file system.
	 * Second entry is link to previous epoch root,
	 * just a convenience to help the search.
	 */
	if(!entryUnpack(&e, b->data, 0)){
		error(chk, "could not unpack root block 0x%.8#ux: %R", a);
		blockPut(b);
		return;
	}
	walkEpoch(chk, b, e.score, BtDir, e.tag, epoch);
	if(entryUnpack(&e, b->data, 1))
		chk->hint = globalToLocal(e.score);
	blockPut(b);
}
Exemple #2
0
Source *
sourceRoot(Fs *fs, uint32_t addr, int mode)
{
    Source *r;
    Block *b;

    b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
    if(b == nil)
        return nil;

    if(mode == OReadWrite && b->l.epoch != fs->ehi) {
        consPrint("sourceRoot: fs->ehi = %ud, b->l = %L\n",
                  fs->ehi, &b->l);
        blockPut(b);
        vtSetError(EBadRoot);
        return nil;
    }

    r = sourceAlloc(fs, b, nil, 0, mode, 0);
    blockPut(b);
    return r;
}
Exemple #3
0
Fs *
fsOpen(char *file, VtSession *z, int32_t ncache, int mode)
{
	int fd, m;
	uint8_t oscore[VtScoreSize];
	Block *b, *bs;
	Disk *disk;
	Fs *fs;
	Super super;

	switch(mode){
	default:
		vtSetError(EBadMode);
		return nil;
	case OReadOnly:
		m = OREAD;
		break;
	case OReadWrite:
		m = ORDWR;
		break;
	}
	fd = open(file, m);
	if(fd < 0){
		vtSetError("open %s: %r", file);
		return nil;
	}

	bwatchInit();
	disk = diskAlloc(fd);
	if(disk == nil){
		vtSetError("diskAlloc: %R");
		close(fd);
		return nil;
	}

	fs = vtMemAllocZ(sizeof(Fs));
	fs->mode = mode;
	fs->name = vtStrDup(file);
	fs->blockSize = diskBlockSize(disk);
	fs->elk = vtLockAlloc();
	fs->cache = cacheAlloc(disk, z, ncache, mode);
	if(mode == OReadWrite && z)
		fs->arch = archInit(fs->cache, disk, fs, z);
	fs->z = z;

	b = cacheLocal(fs->cache, PartSuper, 0, mode);
	if(b == nil)
		goto Err;
	if(!superUnpack(&super, b->data)){
		blockPut(b);
		vtSetError("bad super block");
		goto Err;
	}
	blockPut(b);

	fs->ehi = super.epochHigh;
	fs->elo = super.epochLow;

//fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active);

	fs->source = sourceRoot(fs, super.active, mode);
	if(fs->source == nil){
		/*
		 * Perhaps it failed because the block is copy-on-write.
		 * Do the copy and try again.
		 */
		if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0)
			goto Err;
		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag,
			OReadWrite, 0);
		if(b == nil){
			vtSetError("cacheLocalData: %R");
			goto Err;
		}
		if(b->l.epoch == fs->ehi){
			blockPut(b);
			vtSetError("bad root source block");
			goto Err;
		}
		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
		if(b == nil)
			goto Err;
		localToGlobal(super.active, oscore);
		super.active = b->addr;
		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
		if(bs == nil){
			blockPut(b);
			vtSetError("cacheLocal: %R");
			goto Err;
		}
		superPack(&super, bs->data);
		blockDependency(bs, b, 0, oscore, nil);
		blockPut(b);
		blockDirty(bs);
		blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
		blockPut(bs);
		fs->source = sourceRoot(fs, super.active, mode);
		if(fs->source == nil){
			vtSetError("sourceRoot: %R");
			goto Err;
		}
	}

//fprint(2, "%s: got fs source\n", argv0);

	vtRLock(fs->elk);
	fs->file = fileRoot(fs->source);
	fs->source->file = fs->file;		/* point back */
	vtRUnlock(fs->elk);
	if(fs->file == nil){
		vtSetError("fileRoot: %R");
		goto Err;
	}

//fprint(2, "%s: got file root\n", argv0);

	if(mode == OReadWrite){
		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
		fs->snap = snapInit(fs);
	}
	return fs;

Err:
fprint(2, "%s: fsOpen error\n", argv0);
	fsClose(fs);
	return nil;
}
Exemple #4
0
static int
archWalk(Param *p, u32int addr, uchar type, u32int tag)
{
	int ret, i, x, psize, dsize;
	uchar *data, score[VtScoreSize];
	Block *b;
	Label l;
	Entry *e;
	WalkPtr w;

	p->nvisit++;

	b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0);
	if(b == nil){
		fprint(2, "archive(%ud, %#ux): cannot find block: %R\n", p->snapEpoch, addr);
		if(strcmp(vtGetError(), ELabelMismatch) == 0){
			/* might as well plod on so we write _something_ to Venti */
			memmove(p->score, vtZeroScore, VtScoreSize);
			return ArchFaked;
		}
		return ArchFailure;
	}

	if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n",
		p->depth*2, "",  p->snapEpoch, b->addr, &b->l);
	p->depth++;
	if(p->depth > p->maxdepth)
		p->maxdepth = p->depth;

	data = b->data;
	if((b->l.state&BsVenti) == 0){
		initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize);
		for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){
			if(e){
				if(!(e->flags&VtEntryActive))
					continue;
				if((e->snap && !e->archive)
				|| (e->flags&VtEntryNoArchive)){
					if(0) fprint(2, "snap; faking %#ux\n", b->addr);
					if(data == b->data){
						data = copyBlock(b, p->blockSize);
						if(data == nil){
							ret = ArchFailure;
							goto Out;
						}
						w.data = data;
					}
					memmove(e->score, vtZeroScore, VtScoreSize);
					e->depth = 0;
					e->size = 0;
					e->tag = 0;
					e->flags &= ~VtEntryLocal;
					entryPack(e, data, w.n-1);
					continue;
				}
			}
			addr = globalToLocal(score);
			if(addr == NilBlock)
				continue;
			dsize = p->dsize;
			psize = p->psize;
			if(e){
				p->dsize= e->dsize;
				p->psize = e->psize;
			}
			vtUnlock(b->lk);
			x = archWalk(p, addr, type, tag);
			vtLock(b->lk);
			if(e){
				p->dsize = dsize;
				p->psize = psize;
			}
			while(b->iostate != BioClean && b->iostate != BioDirty)
				vtSleep(b->ioready);
			switch(x){
			case ArchFailure:
				fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n",
					addr, b->addr, i);
				ret = ArchFailure;
				goto Out;
			case ArchFaked:
				/*
				 * When we're writing the entry for an archive directory
				 * (like /archive/2003/1215) then even if we've faked
				 * any data, record the score unconditionally.
				 * This way, we will always record the Venti score here.
				 * Otherwise, temporary data or corrupted file system
				 * would cause us to keep holding onto the on-disk
				 * copy of the archive.
				 */
				if(e==nil || !e->archive)
				if(data == b->data){
if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score);
					data = copyBlock(b, p->blockSize);
					if(data == nil){
						ret = ArchFailure;
						goto Out;
					}
					w.data = data;
				}
				/* fall through */
if(0) fprint(2, "falling\n");
			case ArchSuccess:
				if(e){
					memmove(e->score, p->score, VtScoreSize);
					e->flags &= ~VtEntryLocal;
					entryPack(e, data, w.n-1);
				}else
					memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize);
				if(data == b->data){
					blockDirty(b);
					/*
					 * If b is in the active tree, then we need to note that we've
					 * just removed addr from the active tree (replacing it with the 
					 * copy we just stored to Venti).  If addr is in other snapshots,
					 * this will close addr but not free it, since it has a non-empty
					 * epoch range.
					 *
					 * If b is in the active tree but has been copied (this can happen
					 * if we get killed at just the right moment), then we will
					 * mistakenly leak its kids.  
					 *
					 * The children of an archive directory (e.g., /archive/2004/0604)
					 * are not treated as in the active tree.
					 */
					if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0))
						blockRemoveLink(b, addr, p->l.type, p->l.tag, 0);
				}
				break;
			}
		}

		if(!ventiSend(p->a, b, data)){
			p->nfailsend++;
			ret = ArchFailure;
			goto Out;
		}
		p->nsend++;
		if(data != b->data)
			p->nfake++;
		if(data == b->data){	/* not faking it, so update state */
			p->nreal++;
			l = b->l;
			l.state |= BsVenti;
			if(!blockSetLabel(b, &l, 0)){
				ret = ArchFailure;
				goto Out;
			}
		}
	}

	shaBlock(p->score, b, data, p->blockSize);
if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data);
	ret = data!=b->data ? ArchFaked : ArchSuccess;
	p->l = b->l;
Out:
	if(data != b->data)
		vtMemFree(data);
	p->depth--;
	blockPut(b);
	return ret;
}
Exemple #5
0
/*
 * Retrieve the block containing the entry for r.
 * If a snapshot has happened, we might need
 * to get a new copy of the block.  We avoid this
 * in the common case by caching the score for
 * the block and the last epoch in which it was valid.
 *
 * We use r->mode to tell the difference between active
 * file system sources (OReadWrite) and sources for the
 * snapshot file system (OReadOnly).
 */
static Block*
sourceLoadBlock(Source *r, int mode)
{
    uint32_t addr;
    Block *b;

    switch(r->mode) {
    default:
        assert(0);
    case OReadWrite:
        assert(r->mode == OReadWrite);
        /*
         * This needn't be true -- we might bump the low epoch
         * to reclaim some old blocks, but since this score is
         * OReadWrite, the blocks must all still be open, so none
         * are reclaimed.  Thus it's okay that the epoch is so low.
         * Proceed.
        assert(r->epoch >= r->fs->elo);
         */
        if(r->epoch == r->fs->ehi) {
            b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
            if(b == nil)
                return nil;
            assert(r->epoch == b->l.epoch);
            return b;
        }
        assert(r->parent != nil);
        if(!sourceLock(r->parent, OReadWrite))
            return nil;
        b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
        sourceUnlock(r->parent);
        if(b == nil)
            return nil;
        assert(b->l.epoch == r->fs->ehi);
        //	fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
        memmove(r->score, b->score, VtScoreSize);
        r->scoreEpoch = b->l.epoch;
        r->tag = b->l.tag;
        r->epoch = r->fs->ehi;
        return b;

    case OReadOnly:
        addr = globalToLocal(r->score);
        if(addr == NilBlock)
            return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);

        b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
        if(b)
            return b;

        /*
         * If it failed because the epochs don't match, the block has been
         * archived and reclaimed.  Rewalk from the parent and get the
         * new pointer.  This can't happen in the OReadWrite case
         * above because blocks in the current epoch don't get
         * reclaimed.  The fact that we're OReadOnly means we're
         * a snapshot.  (Or else the file system is read-only, but then
         * the archiver isn't going around deleting blocks.)
         */
        if(strcmp(vtGetError(), ELabelMismatch) == 0) {
            if(!sourceLock(r->parent, OReadOnly))
                return nil;
            b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
            sourceUnlock(r->parent);
            if(b) {
                fprint(2, "sourceAlloc: lost %V found %V\n",
                       r->score, b->score);
                memmove(r->score, b->score, VtScoreSize);
                r->scoreEpoch = b->l.epoch;
                return b;
            }
        }
        return nil;
    }
}