Esempio n. 1
0
static int try_bnode_merge(struct sb *sb, struct buffer_head *intobuf,
			   struct buffer_head *frombuf)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct bnode *into = bufdata(intobuf);
	struct bnode *from = bufdata(frombuf);

	/* Try to merge nodes */
	if (bnode_merge_nodes(sb, into, from)) {
		/*
		 * We know frombuf is redirected and dirty. So, in
		 * here, we can just cancel bnode_redirect by bfree(),
		 * instead of defered_bfree()
		 * FIXME: we can optimize freeing bnode without
		 * bnode_redirect, and if we did, this is not true.
		 */
		bfree(sb, bufindex(frombuf), 1);
		log_bnode_merge(sb, bufindex(frombuf), bufindex(intobuf));
		return 1;
	}
	return 0;
}
Esempio n. 2
0
static int try_leaf_merge(struct btree *btree, struct buffer_head *intobuf,
			  struct buffer_head *frombuf)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct vleaf *from = bufdata(frombuf);
	struct vleaf *into = bufdata(intobuf);

	/* Try to merge leaves */
	if (btree->ops->leaf_merge(btree, into, from)) {
		struct sb *sb = btree->sb;
		/*
		 * We know frombuf is redirected and dirty. So, in
		 * here, we can just cancel leaf_redirect by bfree(),
		 * instead of defered_bfree()
		 * FIXME: we can optimize freeing leaf without
		 * leaf_redirect, and if we did, this is not true.
		 */
		bfree(sb, bufindex(frombuf), 1);
		log_leaf_free(sb, bufindex(frombuf));
		return 1;
	}
	return 0;
}
Esempio n. 3
0
/* Prepare log info for replay and pin logblocks. */
static struct replay *replay_prepare(struct sb *sb)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	block_t logchain = be64_to_cpu(sb->super.logchain);
	unsigned i, logcount = be32_to_cpu(sb->super.logcount);
	struct replay *rp;
	struct buffer_head *buffer;
	int err;

	/* FIXME: this address array is quick hack. Rethink about log
	 * block management and log block address. */
	rp = alloc_replay(sb, logcount);
	if (IS_ERR(rp))
		return rp;

	/* FIXME: maybe, we should use bufvec to read log blocks */
	trace("load %u logblocks", logcount);
	i = logcount;
	while (i-- > 0) {
		struct logblock *log;

		buffer = blockget(mapping(sb->logmap), i);
		if (!buffer) {
			i++;
			err = -ENOMEM;
			goto error;
		}
		assert(bufindex(buffer) == i);
		err = blockio(READ, sb, buffer, logchain);
		if (err)
			goto error;

		err = replay_check_log(rp, buffer);
		if (err)
			goto error;

		/* Store index => blocknr map */
		rp->blocknrs[bufindex(buffer)] = logchain;

		log = bufdata(buffer);
		logchain = be64_to_cpu(log->logchain);
	}

	return rp;

error:
	free_replay(rp);
	replay_unpin_logblocks(sb, i, logcount);

	return ERR_PTR(err);
}
Esempio n. 4
0
static void brelse_free(struct btree *btree, struct buffer_head *buffer)
{
	struct sb *sb = btree->sb;
	block_t block = bufindex(buffer);
	if (bufcount(buffer) != 1) {
		warn("free block %Lx still in use!", (L)bufindex(buffer));
		brelse(buffer);
		assert(bufcount(buffer) == 0);
		return;
	}
	brelse(buffer);
	(btree->ops->bfree)(sb, block, 1);
	set_buffer_empty(buffer); // free it!!! (and need a buffer free state)
}
Esempio n. 5
0
/* unused */
void show_cursor(struct cursor *cursor, int depth)
{
	printf(">>> cursor %p/%i:", cursor, depth);
	for (int i = 0; i < depth; i++)
		printf(" [%Lx/%i]", (L)bufindex(cursor->path[i].buffer), bufcount(cursor->path[i].buffer));
	printf("\n");
}
Esempio n. 6
0
/*
 * Split leaf, then insert to parent.
 * key:  key to add after split (cursor will point leaf which is including key)
 * hint: hint for split
 *
 * return value:
 *   0 - success
 * < 0 - error
 */
static int btree_leaf_split(struct cursor *cursor, tuxkey_t key, tuxkey_t hint)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	trace("split leaf");
	struct btree *btree = cursor->btree;
	struct buffer_head *newbuf;

	newbuf = new_leaf(btree);
	if (IS_ERR(newbuf))
		return PTR_ERR(newbuf);
	log_balloc(btree->sb, bufindex(newbuf), 1);

	struct buffer_head *leafbuf = cursor_leafbuf(cursor);
	tuxkey_t newkey = btree->ops->leaf_split(btree, hint, bufdata(leafbuf),
						 bufdata(newbuf));
	assert(cursor_this_key(cursor) < newkey);
	assert(newkey < cursor_next_key(cursor));
	if (key < newkey)
		mark_buffer_dirty_non(newbuf);
	else
		mark_buffer_dirty_non(leafbuf);
	return insert_leaf(cursor, newkey, newbuf, key < newkey);
}
Esempio n. 7
0
static void cursor_check(struct cursor *cursor)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	if (cursor->level == -1)
		return;
	tuxkey_t key = 0;
	block_t block = cursor->btree->root.block;

	for (int i = 0; i <= cursor->level; i++) {
		assert(bufindex(cursor->path[i].buffer) == block);
		if (i == cursor->level)
			break;

		struct bnode *bnode = level_node(cursor, i);
		struct index_entry *entry = cursor->path[i].next - 1;
		assert(bnode->entries <= entry);
		assert(entry < bnode->entries + bcount(bnode));
		/*
		 * If this entry is most left, it should be same key
		 * with parent. Otherwise, most left key may not be
		 * correct as next key.
		 */
		if (bnode->entries == entry)
			assert(be64_to_cpu(entry->key) == key);
		else
			assert(be64_to_cpu(entry->key) > key);

		block = be64_to_cpu(entry->block);
		key = be64_to_cpu(entry->key);
	}
}
Esempio n. 8
0
static void adjust_parent_sep(struct cursor *cursor, int level, __be64 newsep)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	/* Update separating key until nearest common parent */
	while (level >= 0) {
		struct path_level *parent_at = &cursor->path[level];
		struct index_entry *parent = parent_at->next - 1;

		assert(0 < be64_to_cpu(parent->key));
		assert(be64_to_cpu(parent->key) < be64_to_cpu(newsep));
		log_bnode_adjust(cursor->btree->sb,
				 bufindex(parent_at->buffer),
				 be64_to_cpu(parent->key),
				 be64_to_cpu(newsep));
		parent->key = newsep;
		mark_buffer_unify_non(parent_at->buffer);

		if (parent != level_node(cursor, level)->entries)
			break;

		level--;
	}
}
Esempio n. 9
0
File: btree.c Progetto: Zkin/tux3
/* unused */
void show_cursor(struct cursor *cursor, int depth)
{
	__tux3_dbg(">>> cursor %p/%i:", cursor, depth);
	for (int i = 0; i < depth; i++) {
		__tux3_dbg(" [%Lx/%i]",
			   bufindex(cursor->path[i].buffer),
			   bufcount(cursor->path[i].buffer));
	}
	__tux3_dbg("\n");
}
Esempio n. 10
0
File: btree.c Progetto: Zkin/tux3
static int try_bnode_merge(struct sb *sb, struct buffer_head *intobuf,
			   struct buffer_head *frombuf)
{
	struct bnode *into = bufdata(intobuf);
	struct bnode *from = bufdata(frombuf);

	/* Try to merge nodes */
	if (bnode_merge_nodes(sb, into, from)) {
		/*
		 * We know frombuf is redirected and dirty. So, in
		 * here, we can just cancel bnode_redirect by bfree(),
		 * instead of defered_bfree()
		 * FIXME: we can optimize freeing bnode without
		 * bnode_redirect, and if we did, this is not true.
		 */
		bfree(sb, bufindex(frombuf), 1);
		log_bnode_merge(sb, bufindex(frombuf), bufindex(intobuf));
		return 1;
	}
	return 0;
}
Esempio n. 11
0
/* unused */
void show_cursor(struct cursor *cursor, int depth)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	__tux3_dbg(">>> cursor %p/%i:", cursor, depth);
	for (int i = 0; i < depth; i++) {
		__tux3_dbg(" [%Lx/%i]",
			   bufindex(cursor->path[i].buffer),
			   bufcount(cursor->path[i].buffer));
	}
	__tux3_dbg("\n");
}
Esempio n. 12
0
static void cursor_check(struct cursor *cursor)
{
	if (cursor->len == 0)
		return;
	tuxkey_t key = 0;
	block_t block = cursor->btree->root.block;
	for (int i = 0; i < cursor->len; i++) {
		assert(bufindex(cursor->path[i].buffer) == block);
		if (!cursor->path[i].next)
			break;
		struct bnode *node = cursor_node(cursor, i);
		assert(node->entries < cursor->path[i].next);
		assert(cursor->path[i].next <= node->entries + bcount(node));
		assert(from_be_u64((cursor->path[i].next - 1)->key) >= key);
		block = from_be_u64((cursor->path[i].next - 1)->block);
		key = from_be_u64((cursor->path[i].next - 1)->key);
	}
}
Esempio n. 13
0
File: dir.c Progetto: Zkin/tux3
int tux_delete_entry(struct inode *dir, struct buffer_head *buffer,
		     tux_dirent *entry)
{
	unsigned delta = tux3_get_current_delta();
	tux_dirent *prev = NULL, *this = bufdata(buffer);
	struct buffer_head *clone;
	void *olddata;

	while ((char *)this < (char *)entry) {
		if (this->rec_len == 0) {
			blockput(buffer);
			tux_zero_len_error(dir, bufindex(buffer));
			return -EIO;
		}
		prev = this;
		this = next_entry(this);
	}

	/*
	 * The directory is protected by i_mutex.
	 * blockdirty() should never return -EAGAIN.
	 */
	olddata = bufdata(buffer);
	clone = blockdirty(buffer, delta);
	if (IS_ERR(clone)) {
		assert(PTR_ERR(clone) != -EAGAIN);
		blockput(buffer);
		return PTR_ERR(clone);
	}
	entry = ptr_redirect(entry, olddata, bufdata(clone));
	prev = ptr_redirect(prev, olddata, bufdata(clone));

	if (prev)
		prev->rec_len = tux_rec_len_to_disk((void *)next_entry(entry) - (void *)prev);
	memset(entry->name, 0, entry->name_len);
	entry->name_len = entry->type = 0;
	entry->inum = 0;

	mark_buffer_dirty_non(clone);
	blockput(clone);

	return 0;
}
Esempio n. 14
0
static int replay_check_log(struct replay *rp, struct buffer_head *logbuf)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = rp->sb;
	struct logblock *log = bufdata(logbuf);
	unsigned char *data = log->data;

	if (log->magic != cpu_to_be16(TUX3_MAGIC_LOG)) {
		tux3_err(sb, "bad log magic %x", be16_to_cpu(log->magic));
		return -EINVAL;
	}
	if (be16_to_cpu(log->bytes) + sizeof(*log) > sb->blocksize) {
		tux3_err(sb, "log bytes is too big");
		return -EINVAL;
	}

	while (data < log->data + be16_to_cpu(log->bytes)) {
		u8 code = *data;

		/* Find latest unify. */
		if (code == LOG_UNIFY && rp->unify_index == -1) {
			rp->unify_pos = data;
			/* FIXME: index is unnecessary to use. We just
			 * want to know whether before or after unify
			 * mark. */
			rp->unify_index = bufindex(logbuf);
		}

		if (log_size[code] == 0) {
			tux3_err(sb, "invalid log code: 0x%02x", code);
			return -EINVAL;
		}
		data += log_size[code];
	}

	return 0;
}
Esempio n. 15
0
File: btree.c Progetto: Zkin/tux3
/*
 * Split leaf, then insert to parent.
 * key:  key to add after split (cursor will point leaf which is including key)
 * hint: hint for split
 *
 * return value:
 *   0 - success
 * < 0 - error
 */
static int btree_leaf_split(struct cursor *cursor, tuxkey_t key, tuxkey_t hint)
{
	trace("split leaf");
	struct btree *btree = cursor->btree;
	struct buffer_head *newbuf;

	newbuf = new_leaf(btree);
	if (IS_ERR(newbuf))
		return PTR_ERR(newbuf);
	log_balloc(btree->sb, bufindex(newbuf), 1);

	struct buffer_head *leafbuf = cursor_leafbuf(cursor);
	tuxkey_t newkey = btree->ops->leaf_split(btree, hint, bufdata(leafbuf),
						 bufdata(newbuf));
	assert(cursor_this_key(cursor) < newkey);
	assert(newkey < cursor_next_key(cursor));
	if (key < newkey)
		mark_buffer_dirty_non(newbuf);
	else
		mark_buffer_dirty_non(leafbuf);
	return insert_leaf(cursor, newkey, newbuf, key < newkey);
}
Esempio n. 16
0
File: btree.c Progetto: Zkin/tux3
static void adjust_parent_sep(struct cursor *cursor, int level, __be64 newsep)
{
	/* Update separating key until nearest common parent */
	while (level >= 0) {
		struct path_level *parent_at = &cursor->path[level];
		struct index_entry *parent = parent_at->next - 1;

		assert(0 < be64_to_cpu(parent->key));
		assert(be64_to_cpu(parent->key) < be64_to_cpu(newsep));
		log_bnode_adjust(cursor->btree->sb,
				 bufindex(parent_at->buffer),
				 be64_to_cpu(parent->key),
				 be64_to_cpu(newsep));
		parent->key = newsep;
		mark_buffer_rollup_non(parent_at->buffer);

		if (parent != level_node(cursor, level)->entries)
			break;

		level--;
	}
}
Esempio n. 17
0
/*
 * Recursively redirect non-dirty buffers on path to modify leaf.
 *
 * Redirect order is from root to leaf. Otherwise, blocks of path will
 * be allocated by reverse order.
 *
 * FIXME: We can allocate/copy blocks before change common ancestor
 * (before changing common ancestor, changes are not visible for
 * reader). With this, we may be able to reduce locking time.
 */
int cursor_redirect(struct cursor *cursor)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct btree *btree = cursor->btree;
	struct sb *sb = btree->sb;
	int level;

	for (level = 0; level <= btree->root.depth; level++) {
		struct buffer_head *buffer, *clone;
		block_t parent, oldblock, newblock;
		struct index_entry *entry;
		int redirect, is_leaf = (level == btree->root.depth);

		buffer = cursor->path[level].buffer;
		/* If buffer needs to redirect to dirty, redirect it */
		if (is_leaf)
			redirect = leaf_need_redirect(sb, buffer);
		else
			redirect = bnode_need_redirect(sb, buffer);

		/* No need to redirect */
		if (!redirect)
			continue;

		/* Redirect buffer before changing */
		clone = new_block(btree);
		if (IS_ERR(clone))
			return PTR_ERR(clone);
		oldblock = bufindex(buffer);
		newblock = bufindex(clone);
		trace("redirect %Lx to %Lx", oldblock, newblock);
		level_redirect_blockput(cursor, level, clone);
		if (is_leaf) {
			/* This is leaf buffer */
			mark_buffer_dirty_atomic(clone);
			log_leaf_redirect(sb, oldblock, newblock);
			defer_bfree(&sb->defree, oldblock, 1);
		} else {
			/* This is bnode buffer */
			mark_buffer_unify_atomic(clone);
			log_bnode_redirect(sb, oldblock, newblock);
			defer_bfree(&sb->deunify, oldblock, 1);
		}

		trace("update parent");
		if (!level) {
			/* Update pointer in btree->root */
			trace("redirect root");
			assert(oldblock == btree->root.block);
			btree->root.block = newblock;
			tux3_mark_btree_dirty(btree);
			continue;
		}
		/* Update entry on parent for the redirected block */
		parent = bufindex(cursor->path[level - 1].buffer);
		entry = cursor->path[level - 1].next - 1;
		entry->block = cpu_to_be64(newblock);
		log_bnode_update(sb, parent, newblock, be64_to_cpu(entry->key));
	}

	cursor_check(cursor);
	return 0;
}
Esempio n. 18
0
int alloc_empty_btree(struct btree *btree)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = btree->sb;
	struct buffer_head *rootbuf = new_node(btree);
	if (IS_ERR(rootbuf))
		goto error;
	struct buffer_head *leafbuf = new_leaf(btree);
	if (IS_ERR(leafbuf))
		goto error_leafbuf;

	assert(!has_root(btree));
	struct bnode *rootnode = bufdata(rootbuf);
	block_t rootblock = bufindex(rootbuf);
	block_t leafblock = bufindex(leafbuf);
	trace("root at %Lx", rootblock);
	trace("leaf at %Lx", leafblock);
	bnode_init_root(rootnode, 1, leafblock, 0, 0);
	log_bnode_root(sb, rootblock, 1, leafblock, 0, 0);
	log_balloc(sb, leafblock, 1);

	mark_buffer_unify_non(rootbuf);
	blockput(rootbuf);
	mark_buffer_dirty_non(leafbuf);
	blockput(leafbuf);

	btree->root = (struct root){ .block = rootblock, .depth = 1 };
	tux3_mark_btree_dirty(btree);

	return 0;

error_leafbuf:
	(btree->ops->bfree)(sb, bufindex(rootbuf), 1);
	blockput(rootbuf);
	rootbuf = leafbuf;
error:
	return PTR_ERR(rootbuf);
}

/* FIXME: right? and this should be done by btree_chop()? */
int free_empty_btree(struct btree *btree)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct btree_ops *ops = btree->ops;

	if (!has_root(btree))
		return 0;

	assert(btree->root.depth == 1);
	struct sb *sb = btree->sb;
	struct buffer_head *rootbuf = vol_bread(sb, btree->root.block);
	if (!rootbuf)
		return -EIO;
	assert(bnode_sniff(bufdata(rootbuf)));
	/* Make btree has no root */
	btree->root = no_root;
	tux3_mark_btree_dirty(btree);

	struct bnode *rootnode = bufdata(rootbuf);
	assert(bcount(rootnode) == 1);
	block_t leaf = be64_to_cpu(rootnode->entries[0].block);
	struct buffer_head *leafbuf = vol_find_get_block(sb, leaf);

	if (leafbuf && !leaf_need_redirect(sb, leafbuf)) {
		/*
		 * This is redirected leaf. So, in here, we can just
		 * cancel leaf_redirect by bfree(), instead of
		 * defered_bfree().
		 */
		bfree(sb, leaf, 1);
		log_leaf_free(sb, leaf);
		assert(ops->leaf_can_free(btree, bufdata(leafbuf)));
		blockput_free(sb, leafbuf);
	} else {
		defer_bfree(&sb->defree, leaf, 1);
		log_bfree(sb, leaf, 1);
		if (leafbuf) {
			assert(ops->leaf_can_free(btree, bufdata(leafbuf)));
			blockput(leafbuf);
		}
	}

	if (!bnode_need_redirect(sb, rootbuf)) {
		/*
		 * This is redirected bnode. So, in here, we can just
		 * cancel bnode_redirect by bfree(), instead of
		 * defered_bfree().
		 */
		bfree(sb, bufindex(rootbuf), 1);
		log_bnode_free(sb, bufindex(rootbuf));
		blockput_free_unify(sb, rootbuf);
	} else {
		defer_bfree(&sb->deunify, bufindex(rootbuf), 1);
		log_bfree_on_unify(sb, bufindex(rootbuf), 1);
		blockput(rootbuf);
	}

	return 0;
}

int replay_bnode_redirect(struct replay *rp, block_t oldblock, block_t newblock)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = rp->sb;
	struct buffer_head *newbuf, *oldbuf;
	int err = 0;

	newbuf = vol_getblk(sb, newblock);
	if (!newbuf) {
		err = -ENOMEM;	/* FIXME: error code */
		goto error;
	}
	oldbuf = vol_bread(sb, oldblock);
	if (!oldbuf) {
		err = -EIO;	/* FIXME: error code */
		goto error_put_newbuf;
	}
	assert(bnode_sniff(bufdata(oldbuf)));

	memcpy(bufdata(newbuf), bufdata(oldbuf), bufsize(newbuf));
	mark_buffer_unify_atomic(newbuf);

	blockput(oldbuf);
error_put_newbuf:
	blockput(newbuf);
error:
	return err;
}

int replay_bnode_root(struct replay *rp, block_t root, unsigned count,
		      block_t left, block_t right, tuxkey_t rkey)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = rp->sb;
	struct buffer_head *rootbuf;

	rootbuf = vol_getblk(sb, root);
	if (!rootbuf)
		return -ENOMEM;
	bnode_buffer_init(rootbuf);

	bnode_init_root(bufdata(rootbuf), count, left, right, rkey);

	mark_buffer_unify_atomic(rootbuf);
	blockput(rootbuf);

	return 0;
}

/*
 * Before this replay, replay should already dirty the buffer of src.
 * (e.g. by redirect)
 */
int replay_bnode_split(struct replay *rp, block_t src, unsigned pos,
		       block_t dst)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = rp->sb;
	struct buffer_head *srcbuf, *dstbuf;
	int err = 0;

	srcbuf = vol_getblk(sb, src);
	if (!srcbuf) {
		err = -ENOMEM;	/* FIXME: error code */
		goto error;
	}

	dstbuf = vol_getblk(sb, dst);
	if (!dstbuf) {
		err = -ENOMEM;	/* FIXME: error code */
		goto error_put_srcbuf;
	}
	bnode_buffer_init(dstbuf);

	bnode_split(bufdata(srcbuf), pos, bufdata(dstbuf));

	mark_buffer_unify_non(srcbuf);
	mark_buffer_unify_atomic(dstbuf);

	blockput(dstbuf);
error_put_srcbuf:
	blockput(srcbuf);
error:
	return err;
}

/*
 * Before this replay, replay should already dirty the buffer of bnodeblock.
 * (e.g. by redirect)
 */
static int replay_bnode_change(struct sb *sb, block_t bnodeblock,
			       u64 val1, u64 val2,
			       void (*change)(struct bnode *, u64, u64))
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct buffer_head *bnodebuf;

	bnodebuf = vol_getblk(sb, bnodeblock);
	if (!bnodebuf)
		return -ENOMEM;	/* FIXME: error code */

	struct bnode *bnode = bufdata(bnodebuf);
	change(bnode, val1, val2);

	mark_buffer_unify_non(bnodebuf);
	blockput(bnodebuf);

	return 0;
}

static void add_func(struct bnode *bnode, u64 child, u64 key)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct index_entry *entry = bnode_lookup(bnode, key) + 1;
	bnode_add_index(bnode, entry, child, key);
}

int replay_bnode_add(struct replay *rp, block_t parent, block_t child,
		     tuxkey_t key)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	return replay_bnode_change(rp->sb, parent, child, key, add_func);
}

static void update_func(struct bnode *bnode, u64 child, u64 key)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct index_entry *entry = bnode_lookup(bnode, key);
	assert(be64_to_cpu(entry->key) == key);
	entry->block = cpu_to_be64(child);
}

int replay_bnode_update(struct replay *rp, block_t parent, block_t child,
			tuxkey_t key)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	return replay_bnode_change(rp->sb, parent, child, key, update_func);
}

int replay_bnode_merge(struct replay *rp, block_t src, block_t dst)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = rp->sb;
	struct buffer_head *srcbuf, *dstbuf;
	int err = 0, ret;

	srcbuf = vol_getblk(sb, src);
	if (!srcbuf) {
		err = -ENOMEM;	/* FIXME: error code */
		goto error;
	}

	dstbuf = vol_getblk(sb, dst);
	if (!dstbuf) {
		err = -ENOMEM;	/* FIXME: error code */
		goto error_put_srcbuf;
	}

	ret = bnode_merge_nodes(sb, bufdata(dstbuf), bufdata(srcbuf));
	assert(ret == 1);

	mark_buffer_unify_non(dstbuf);
	mark_buffer_unify_non(srcbuf);

	blockput(dstbuf);
error_put_srcbuf:
	blockput(srcbuf);
error:
	return err;
}

static void del_func(struct bnode *bnode, u64 key, u64 count)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct index_entry *entry = bnode_lookup(bnode, key);
	assert(be64_to_cpu(entry->key) == key);
	bnode_remove_index(bnode, entry, count);
}

int replay_bnode_del(struct replay *rp, block_t bnode, tuxkey_t key,
		     unsigned count)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	return replay_bnode_change(rp->sb, bnode, key, count, del_func);
}

static void adjust_func(struct bnode *bnode, u64 from, u64 to)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct index_entry *entry = bnode_lookup(bnode, from);
	assert(be64_to_cpu(entry->key) == from);
	entry->key = cpu_to_be64(to);
}

int replay_bnode_adjust(struct replay *rp, block_t bnode, tuxkey_t from,
			tuxkey_t to)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	return replay_bnode_change(rp->sb, bnode, from, to, adjust_func);
}
Esempio n. 19
0
/*
 * Insert new leaf to next cursor position.
 * keep == 1: keep current cursor position.
 * keep == 0, set cursor position to new leaf.
 */
static int insert_leaf(struct cursor *cursor, tuxkey_t childkey, struct buffer_head *leafbuf, int keep)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct btree *btree = cursor->btree;
	struct sb *sb = btree->sb;
	int level = btree->root.depth;
	block_t childblock = bufindex(leafbuf);

	if (keep)
		blockput(leafbuf);
	else {
		cursor_pop_blockput(cursor);
		cursor_push(cursor, leafbuf, NULL);
	}
	while (level--) {
		struct path_level *at = &cursor->path[level];
		struct buffer_head *parentbuf = at->buffer;
		struct bnode *parent = bufdata(parentbuf);

		/* insert and exit if not full */
		if (bcount(parent) < btree->sb->entries_per_node) {
			bnode_add_index(parent, at->next, childblock, childkey);
			if (!keep)
				at->next++;
			log_bnode_add(sb, bufindex(parentbuf), childblock, childkey);
			mark_buffer_unify_non(parentbuf);
			cursor_check(cursor);
			return 0;
		}

		/* split a full index node */
		struct buffer_head *newbuf = new_node(btree);
		if (IS_ERR(newbuf))
			return PTR_ERR(newbuf);

		struct bnode *newnode = bufdata(newbuf);
		unsigned half = bcount(parent) / 2;
		u64 newkey = be64_to_cpu(parent->entries[half].key);

		bnode_split(parent, half, newnode);
		log_bnode_split(sb, bufindex(parentbuf), half, bufindex(newbuf));

		/* if the cursor is in the new node, use that as the parent */
		int child_is_left = at->next <= parent->entries + half;
		if (!child_is_left) {
			struct index_entry *newnext;
			mark_buffer_unify_non(parentbuf);
			newnext = newnode->entries + (at->next - &parent->entries[half]);
			get_bh(newbuf);
			level_replace_blockput(cursor, level, newbuf, newnext);
			parentbuf = newbuf;
			parent = newnode;
		} else
			mark_buffer_unify_non(newbuf);

		bnode_add_index(parent, at->next, childblock, childkey);
		if (!keep)
			at->next++;
		log_bnode_add(sb, bufindex(parentbuf), childblock, childkey);
		mark_buffer_unify_non(parentbuf);

		childkey = newkey;
		childblock = bufindex(newbuf);
		blockput(newbuf);

		/*
		 * If child is in left bnode, we should keep the
		 * cursor position to child, otherwise adjust cursor
		 * to new bnode.
		 */
		keep = child_is_left;
	}

	/* Make new root bnode */
	trace("add tree level");
	struct buffer_head *newbuf = new_node(btree);
	if (IS_ERR(newbuf))
		return PTR_ERR(newbuf);

	struct bnode *newroot = bufdata(newbuf);
	block_t newrootblock = bufindex(newbuf);
	block_t oldrootblock = btree->root.block;
	int left_node = bufindex(cursor->path[0].buffer) != childblock;
	bnode_init_root(newroot, 2, oldrootblock, childblock, childkey);
	cursor_root_add(cursor, newbuf, newroot->entries + 1 + !left_node);
	log_bnode_root(sb, newrootblock, 2, oldrootblock, childblock, childkey);

	/* Change btree to point the new root */
	btree->root.block = newrootblock;
	btree->root.depth++;

	mark_buffer_unify_non(newbuf);
	tux3_mark_btree_dirty(btree);
	cursor_check(cursor);

	return 0;
}
Esempio n. 20
0
File: btree.c Progetto: Zkin/tux3
int cursor_redirect(struct cursor *cursor)
{
	struct btree *btree = cursor->btree;
	unsigned level = btree->root.depth;
	struct sb *sb = btree->sb;
	block_t uninitialized_var(child);

	while (1) {
		struct buffer_head *buffer;
		block_t uninitialized_var(oldblock);
		block_t uninitialized_var(newblock);
		int redirect, is_leaf = (level == btree->root.depth);

		buffer = cursor->path[level].buffer;
		/* If buffer needs to redirect to dirty, redirect it */
		if (is_leaf)
			redirect = leaf_need_redirect(sb, buffer);
		else
			redirect = bnode_need_redirect(sb, buffer);

		if (redirect) {
			/* Redirect buffer before changing */
			struct buffer_head *clone = new_block(btree);
			if (IS_ERR(clone))
				return PTR_ERR(clone);
			oldblock = bufindex(buffer);
			newblock = bufindex(clone);
			trace("redirect %Lx to %Lx", oldblock, newblock);
			level_redirect_blockput(cursor, level, clone);
			if (is_leaf) {
				/* This is leaf buffer */
				mark_buffer_dirty_atomic(clone);
				log_leaf_redirect(sb, oldblock, newblock);
				defer_bfree(&sb->defree, oldblock, 1);
				goto parent_level;
			}
			/* This is bnode buffer */
			mark_buffer_rollup_atomic(clone);
			log_bnode_redirect(sb, oldblock, newblock);
			defer_bfree(&sb->derollup, oldblock, 1);
		} else {
			if (is_leaf) {
				/* This is leaf buffer */
				goto parent_level;
			}
		}

		/* Update entry for the redirected child block */
		trace("update parent");
		block_t block = bufindex(cursor->path[level].buffer);
		struct index_entry *entry = cursor->path[level].next - 1;
		entry->block = cpu_to_be64(child);
		log_bnode_update(sb, block, child, be64_to_cpu(entry->key));

parent_level:
		/* If it is already redirected, ancestor is also redirected */
		if (!redirect) {
			cursor_check(cursor);
			return 0;
		}

		if (!level--) {
			trace("redirect root");
			assert(oldblock == btree->root.block);
			btree->root.block = newblock;
			tux3_mark_btree_dirty(btree);
			cursor_check(cursor);
			return 0;
		}
		child = newblock;
	}
}
Esempio n. 21
0
int tree_chop(struct btree *btree, struct delete_info *info, millisecond_t deadline)
{
	int depth = btree->root.depth, level = depth - 1, suspend = 0;
	struct cursor *cursor;
	struct buffer_head *leafbuf, **prev, *leafprev = NULL;
	struct btree_ops *ops = btree->ops;
	struct sb *sb = btree->sb;
	int ret;

	cursor = alloc_cursor(btree, 0);
	prev = malloc(sizeof(*prev) * depth);
	memset(prev, 0, sizeof(*prev) * depth);

	down_write(&btree->lock);
	probe(btree, info->resume, cursor);
	leafbuf = level_pop(cursor);

	/* leaf walk */
	while (1) {
		ret = (ops->leaf_chop)(btree, info->key, bufdata(leafbuf));
		if (ret) {
			mark_buffer_dirty(leafbuf);
			if (ret < 0)
				goto error_leaf_chop;
		}

		/* try to merge this leaf with prev */
		if (leafprev) {
			struct vleaf *this = bufdata(leafbuf);
			struct vleaf *that = bufdata(leafprev);
			/* try to merge leaf with prev */
			if ((ops->leaf_need)(btree, this) <= (ops->leaf_free)(btree, that)) {
				trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev);
				(ops->leaf_merge)(btree, that, this);
				remove_index(cursor, level);
				mark_buffer_dirty(leafprev);
				brelse_free(btree, leafbuf);
				//dirty_buffer_count_check(sb);
				goto keep_prev_leaf;
			}
			brelse(leafprev);
		}
		leafprev = leafbuf;
keep_prev_leaf:

		//nanosleep(&(struct timespec){ 0, 50 * 1000000 }, NULL);
		//printf("time remaining: %Lx\n", deadline - gettime());
//		if (deadline && gettime() > deadline)
//			suspend = -1;
		if (info->blocks && info->freed >= info->blocks)
			suspend = -1;

		/* pop and try to merge finished nodes */
		while (suspend || level_finished(cursor, level)) {
			/* try to merge node with prev */
			if (prev[level]) {
				assert(level); /* node has no prev */
				struct bnode *this = cursor_node(cursor, level);
				struct bnode *that = bufdata(prev[level]);
				trace_off("check node %p against %p", this, that);
				trace_off("this count = %i prev count = %i", bcount(this), bcount(that));
				/* try to merge with node to left */
				if (bcount(this) <= sb->entries_per_node - bcount(that)) {
					trace(">>> can merge node %p into node %p", this, that);
					merge_nodes(that, this);
					remove_index(cursor, level - 1);
					mark_buffer_dirty(prev[level]);
					brelse_free(btree, level_pop(cursor));
					//dirty_buffer_count_check(sb);
					goto keep_prev_node;
				}
				brelse(prev[level]);
			}
			prev[level] = level_pop(cursor);
keep_prev_node:

			/* deepest key in the cursor is the resume address */
			if (suspend == -1 && !level_finished(cursor, level)) {
				suspend = 1; /* only set resume once */
				info->resume = from_be_u64((cursor->path[level].next)->key);
			}
			if (!level) { /* remove depth if possible */
				while (depth > 1 && bcount(bufdata(prev[0])) == 1) {
					trace("drop btree level");
					btree->root.block = bufindex(prev[1]);
					mark_btree_dirty(btree);
					brelse_free(btree, prev[0]);
					//dirty_buffer_count_check(sb);
					depth = --btree->root.depth;
					vecmove(prev, prev + 1, depth);
					//set_sb_dirty(sb);
				}
				//sb->snapmask &= ~snapmask; delete_snapshot_from_disk();
				//set_sb_dirty(sb);
				//save_sb(sb);
				ret = suspend;
				goto out;
			}
			level--;
			trace_off(printf("pop to level %i, block %Lx, %i of %i nodes\n", level, bufindex(cursor->path[level].buffer), cursor->path[level].next - cursor_node(cursor, level)->entries, bcount(cursor_node(cursor, level))););
		}

		/* push back down to leaf level */
		while (level < depth - 1) {
			struct buffer_head *buffer = sb_bread(vfs_sb(sb), from_be_u64(cursor->path[level++].next++->block));
			if (!buffer) {
				ret = -EIO;
				goto out;
			}
			level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries);
			trace_off(printf("push to level %i, block %Lx, %i nodes\n", level, bufindex(buffer), bcount(cursor_node(cursor, level))););
		}
Esempio n. 22
0
static int replay_log_stage1(struct replay *rp, struct buffer_head *logbuf)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = rp->sb;
	struct logblock *log = bufdata(logbuf);
	unsigned char *data = log->data;
	int err;

	/* Check whether array is uptodate */
	BUILD_BUG_ON(ARRAY_SIZE(log_name) != LOG_TYPES);

	/* If log is before latest unify, those were already applied to FS. */
	if (bufindex(logbuf) < rp->unify_index) {
//		assert(0);	/* older logs should already be freed */
		return 0;
	}
	if (bufindex(logbuf) == rp->unify_index)
		data = rp->unify_pos;

	while (data < log->data + be16_to_cpu(log->bytes)) {
		u8 code = *data++;
		switch (code) {
		case LOG_BNODE_REDIRECT:
		{
			u64 oldblock, newblock;
			data = decode48(data, &oldblock);
			data = decode48(data, &newblock);
			trace("%s: oldblock %Lx, newblock %Lx",
			      log_name[code], oldblock, newblock);
			err = replay_bnode_redirect(rp, oldblock, newblock);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_ROOT:
		{
			u64 root, left, right, rkey;
			u8 count;
			count = *data++;
			data = decode48(data, &root);
			data = decode48(data, &left);
			data = decode48(data, &right);
			data = decode48(data, &rkey);
			trace("%s: count %u, root block %Lx, left %Lx, right %Lx, rkey %Lx",
			      log_name[code], count, root, left, right, rkey);

			err = replay_bnode_root(rp, root, count, left, right, rkey);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_SPLIT:
		{
			unsigned pos;
			u64 src, dst;
			data = decode16(data, &pos);
			data = decode48(data, &src);
			data = decode48(data, &dst);
			trace("%s: pos %x, src %Lx, dst %Lx",
			      log_name[code], pos, src, dst);
			err = replay_bnode_split(rp, src, pos, dst);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_ADD:
		case LOG_BNODE_UPDATE:
		{
			u64 child, parent, key;
			data = decode48(data, &parent);
			data = decode48(data, &child);
			data = decode48(data, &key);
			trace("%s: parent 0x%Lx, child 0x%Lx, key 0x%Lx",
			      log_name[code], parent, child, key);
			if (code == LOG_BNODE_UPDATE)
				err = replay_bnode_update(rp, parent, child, key);
			else
				err = replay_bnode_add(rp, parent, child, key);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_MERGE:
		{
			u64 src, dst;
			data = decode48(data, &src);
			data = decode48(data, &dst);
			trace("%s: src 0x%Lx, dst 0x%Lx",
			      log_name[code], src, dst);
			err = replay_bnode_merge(rp, src, dst);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_DEL:
		{
			unsigned count;
			u64 bnode, key;
			data = decode16(data, &count);
			data = decode48(data, &bnode);
			data = decode48(data, &key);
			trace("%s: bnode 0x%Lx, count 0x%x, key 0x%Lx",
			      log_name[code], bnode, count, key);
			err = replay_bnode_del(rp, bnode, key, count);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_ADJUST:
		{
			u64 bnode, from, to;
			data = decode48(data, &bnode);
			data = decode48(data, &from);
			data = decode48(data, &to);
			trace("%s: bnode 0x%Lx, from 0x%Lx, to 0x%Lx",
			      log_name[code], bnode, from, to);
			err = replay_bnode_adjust(rp, bnode, from, to);
			if (err)
				return err;
			break;
		}
		case LOG_FREEBLOCKS:
		{
			u64 freeblocks;
			data = decode48(data, &freeblocks);
			trace("%s: freeblocks %llu", log_name[code],
			      freeblocks);
			sb->freeblocks = freeblocks;
			break;
		}
		case LOG_BALLOC:
		case LOG_BFREE:
		case LOG_BFREE_ON_UNIFY:
		case LOG_BFREE_RELOG:
		case LOG_LEAF_REDIRECT:
		case LOG_LEAF_FREE:
		case LOG_BNODE_FREE:
		case LOG_ORPHAN_ADD:
		case LOG_ORPHAN_DEL:
		case LOG_UNIFY:
		case LOG_DELTA:
			data += log_size[code] - sizeof(code);
			break;
		default:
			tux3_err(rp->sb, "unrecognized log code 0x%x", code);
			return -EINVAL;
		}
	}

	return 0;
}
Esempio n. 23
0
static int tux3_symlink(struct inode *dir, struct dentry *dentry,
			const char *symname)
{
	struct tux_iattr iattr = {
		.uid	= current_fsuid(),
		.gid	= current_fsgid(),
		.mode	= S_IFLNK | S_IRWXUGO,
	};

	return __tux3_symlink(dir, dentry, &iattr, symname);
}
#endif /* !__KERNEL__ */

static int tux3_unlink(struct inode *dir, struct dentry *dentry)
{
	struct inode *inode = dentry->d_inode;
	struct sb *sb = tux_sb(inode->i_sb);

	change_begin(sb);
	int err = tux_del_dirent(dir, dentry);
	if (!err) {
		tux3_iattrdirty(inode);
		inode->i_ctime = dir->i_ctime;
		/* FIXME: we shouldn't write inode for i_nlink = 0? */
		inode_dec_link_count(inode);
	}
	change_end(sb);

	return err;
}

static int tux3_rmdir(struct inode *dir, struct dentry *dentry)
{
	struct sb *sb = tux_sb(dir->i_sb);
	struct inode *inode = dentry->d_inode;
	int err = tux_dir_is_empty(inode);

	if (!err) {
		change_begin(sb);
		err = tux_del_dirent(dir, dentry);
		if (!err) {
			tux3_iattrdirty(inode);
			inode->i_ctime = dir->i_ctime;
			/* FIXME: we need to do this for POSIX? */
			/* inode->i_size = 0; */
			clear_nlink(inode);
			tux3_mark_inode_dirty_sync(inode);

			inode_dec_link_count(dir);
		}
		change_end(sb);
	}
	return err;
}

static int tux3_rename(struct inode *old_dir, struct dentry *old_dentry,
		       struct inode *new_dir, struct dentry *new_dentry)
{
	struct inode *old_inode = old_dentry->d_inode;
	struct inode *new_inode = new_dentry->d_inode;
	struct sb *sb = tux_sb(old_inode->i_sb);
	struct buffer_head *old_buffer, *new_buffer, *clone;
	tux_dirent *old_entry, *new_entry;
	void *olddata;
	int err, new_subdir = 0;
	unsigned delta;

	old_entry = tux_find_dirent(old_dir, &old_dentry->d_name, &old_buffer);
	if (IS_ERR(old_entry))
		return PTR_ERR(old_entry);

	/* FIXME: is this needed? */
	assert(be64_to_cpu(old_entry->inum) == tux_inode(old_inode)->inum);

	change_begin(sb);
	delta = tux3_get_current_delta();

	if (new_inode) {
		int old_is_dir = S_ISDIR(old_inode->i_mode);
		if (old_is_dir) {
			err = tux_dir_is_empty(new_inode);
			if (err)
				goto error;
		}

		new_entry = tux_find_dirent(new_dir, &new_dentry->d_name,
					    &new_buffer);
		if (IS_ERR(new_entry)) {
			assert(PTR_ERR(new_entry) != -ENOENT);
			err = PTR_ERR(new_entry);
			goto error;
		}

		/*
		 * The directory is protected by i_mutex.
		 * blockdirty() should never return -EAGAIN.
		 */
		olddata = bufdata(new_buffer);
		clone = blockdirty(new_buffer, delta);
		if (IS_ERR(clone)) {
			assert(PTR_ERR(clone) != -EAGAIN);
			blockput(new_buffer);
			err = PTR_ERR(clone);
			goto error;
		}
		new_entry = ptr_redirect(new_entry, olddata, bufdata(clone));

		/* this releases new_buffer */
		tux_update_dirent(new_dir, clone, new_entry, old_inode);

		tux3_iattrdirty(new_inode);
		new_inode->i_ctime = new_dir->i_ctime;
		if (old_is_dir)
			drop_nlink(new_inode);
		inode_dec_link_count(new_inode);
	} else {
		new_subdir = S_ISDIR(old_inode->i_mode) && new_dir != old_dir;
		if (new_subdir) {
			if (new_dir->i_nlink >= TUX_LINK_MAX) {
				err = -EMLINK;
				goto error;
			}
		}
		err = tux_create_dirent(new_dir, &new_dentry->d_name,
					old_inode);
		if (err)
			goto error;
		if (new_subdir)
			inode_inc_link_count(new_dir);
	}
	tux3_iattrdirty(old_inode);
	old_inode->i_ctime = new_dir->i_ctime;
	tux3_mark_inode_dirty(old_inode);

	/*
	 * The new entry can be on same buffer with old_buffer, and
	 * may did buffer fork in the above path. So if old_buffer is
	 * forked buffer, we update the old_buffer in here.
	 */
	if (buffer_forked(old_buffer)) {
		clone = blockget(mapping(old_dir), bufindex(old_buffer));
		assert(clone);
		old_entry = ptr_redirect(old_entry, bufdata(old_buffer),
					 bufdata(clone));
		blockput(old_buffer);
		old_buffer = clone;
	}
	err = tux_delete_dirent(old_dir, old_buffer, old_entry);
	if (err) {
		tux3_fs_error(sb, "couldn't delete old entry (%Lu)",
			      tux_inode(old_inode)->inum);
		/* FIXME: now, we have hardlink even if it's dir. */
		inode_inc_link_count(old_inode);
	}
	if (!err && new_subdir)
		inode_dec_link_count(old_dir);

	change_end(sb);
	return err;

error:
	change_end(sb);
	blockput(old_buffer);
	return err;
}

#ifdef __KERNEL__
const struct file_operations tux_dir_fops = {
	.llseek		= generic_file_llseek,
	.read		= generic_read_dir,
	.readdir	= tux_readdir,
	.fsync		= tux3_sync_file,
};

const struct inode_operations tux_dir_iops = {
	.create		= tux3_create,
	.lookup		= tux3_lookup,
	.link		= tux3_link,
	.unlink		= tux3_unlink,
	.symlink	= tux3_symlink,
	.mkdir		= tux3_mkdir,
	.rmdir		= tux3_rmdir,
	.mknod		= tux3_mknod,
	.rename		= tux3_rename,
	.setattr	= tux3_setattr,
	.getattr	= tux3_getattr
//	.setxattr	= generic_setxattr,
//	.getxattr	= generic_getxattr,
//	.listxattr	= ext3_listxattr,
//	.removexattr	= generic_removexattr,
//	.permission	= ext3_permission,
	/* FIXME: why doesn't ext4 support this for directory? */
//	.fallocate	= ext4_fallocate,
//	.fiemap		= ext4_fiemap,
};
Esempio n. 24
0
/*
 * This is range deletion. So, instead of adjusting balance of the
 * space on sibling nodes for each change, this just removes the range
 * and merges from right to left even if it is not same parent.
 *
 *              +--------------- (A, B, C)--------------------+
 *              |                    |                        |
 *     +-- (AA, AB, AC) -+       +- (BA, BB, BC) -+      + (CA, CB, CC) +
 *     |        |        |       |        |       |      |       |      |
 * (AAA,AAB)(ABA,ABB)(ACA,ACB) (BAA,BAB)(BBA)(BCA,BCB)  (CAA)(CBA,CBB)(CCA)
 *
 * [less : A, AA, AAA, AAB, AB, ABA, ABB, AC, ACA, ACB, B, BA ... : greater]
 *
 * If we merged from cousin (or re-distributed), we may have to update
 * the index until common parent. (e.g. removed (ACB), then merged
 * from (BAA,BAB) to (ACA), we have to adjust B in root node to BB)
 *
 * See, adjust_parent_sep().
 *
 * FIXME: no re-distribute. so, we don't guarantee above than 50%
 * space efficiency. And if range is end of key (truncate() case), we
 * don't need to merge, and adjust_parent_sep().
 *
 * FIXME2: we may want to split chop work for each step. instead of
 * blocking for a long time.
 */
int btree_chop(struct btree *btree, tuxkey_t start, u64 len)
{
	if(DEBUG_MODE_K==1)
	{
		printf("\t\t\t\t%25s[K]  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = btree->sb;
	struct btree_ops *ops = btree->ops;
	struct buffer_head **prev, *leafprev = NULL;
	struct chopped_index_info *cii;
	struct cursor *cursor;
	tuxkey_t limit;
	int ret, done = 0;

	if (!has_root(btree))
		return 0;

	/* Chop all range if len >= TUXKEY_LIMIT */
	limit = (len >= TUXKEY_LIMIT) ? TUXKEY_LIMIT : start + len;

	prev = malloc(sizeof(*prev) * btree->root.depth);
	if (prev == NULL)
		return -ENOMEM;
	memset(prev, 0, sizeof(*prev) * btree->root.depth);

	cii = malloc(sizeof(*cii) * btree->root.depth);
	if (cii == NULL) {
		ret = -ENOMEM;
		goto error_cii;
	}
	memset(cii, 0, sizeof(*cii) * btree->root.depth);

	cursor = alloc_cursor(btree, 0);
	if (!cursor) {
		ret = -ENOMEM;
		goto error_alloc_cursor;
	}

	down_write(&btree->lock);
	ret = btree_probe(cursor, start);
	if (ret)
		goto error_btree_probe;

	/* Walk leaves */
	while (1) {
		struct buffer_head *leafbuf;
		tuxkey_t this_key;

		/*
		 * FIXME: If leaf was merged and freed later, we don't
		 * need to redirect leaf and leaf_chop()
		 */
		if ((ret = cursor_redirect(cursor)))
			goto out;
		leafbuf = cursor_pop(cursor);

		/* Adjust start and len for this leaf */
		this_key = cursor_level_this_key(cursor);
		if (start < this_key) {
			if (limit < TUXKEY_LIMIT)
				len -= this_key - start;
			start = this_key;
		}

		ret = ops->leaf_chop(btree, start, len, bufdata(leafbuf));
		if (ret) {
			if (ret < 0) {
				blockput(leafbuf);
				goto out;
			}
			mark_buffer_dirty_non(leafbuf);
		}

		/* Try to merge this leaf with prev */
		if (leafprev) {
			if (try_leaf_merge(btree, leafprev, leafbuf)) {
				trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev);
				remove_index(cursor, cii);
				mark_buffer_dirty_non(leafprev);
				blockput_free(sb, leafbuf);
				goto keep_prev_leaf;
			}
			blockput(leafprev);
		}
		leafprev = leafbuf;

keep_prev_leaf:

		if (cursor_level_next_key(cursor) >= limit)
			done = 1;
		/* Pop and try to merge finished nodes */
		while (done || cursor_level_finished(cursor)) {
			struct buffer_head *buf;
			int level = cursor->level;
			struct chopped_index_info *ciil = &cii[level];


			/* Get merge src buffer, and go parent level */
			buf = cursor_pop(cursor);

			/*
			 * Logging chopped indexes
			 * FIXME: If node is freed later (e.g. merged),
			 * we dont't need to log this
			 */
			if (ciil->count) {
				log_bnode_del(sb, bufindex(buf), ciil->start,
					      ciil->count);
			}
			memset(ciil, 0, sizeof(*ciil));

			/* Try to merge node with prev */
			if (prev[level]) {
				assert(level);
				if (try_bnode_merge(sb, prev[level], buf)) {
					trace(">>> can merge node %p into node %p", buf, prev[level]);
					remove_index(cursor, cii);
					mark_buffer_unify_non(prev[level]);
					blockput_free_unify(sb, buf);
					goto keep_prev_node;
				}
				blockput(prev[level]);
			}
			prev[level] = buf;
keep_prev_node:

			if (!level)
				goto chop_root;
		}

		/* Push back down to leaf level */
		do {
			ret = cursor_advance_down(cursor);
			if (ret < 0)
				goto out;
		} while (ret);
	}

chop_root:
	/* Remove depth if possible */
	while (btree->root.depth > 1 && bcount(bufdata(prev[0])) == 1) {
		trace("drop btree level");
		btree->root.block = bufindex(prev[1]);
		btree->root.depth--;
		tux3_mark_btree_dirty(btree);

		/*
		 * We know prev[0] is redirected and dirty. So, in
		 * here, we can just cancel bnode_redirect by bfree(),
		 * instead of defered_bfree()
		 * FIXME: we can optimize freeing bnode without
		 * bnode_redirect, and if we did, this is not true.
		 */
		bfree(sb, bufindex(prev[0]), 1);
		log_bnode_free(sb, bufindex(prev[0]));
		blockput_free_unify(sb, prev[0]);

		vecmove(prev, prev + 1, btree->root.depth);
	}
	ret = 0;

out:
	if (leafprev)
		blockput(leafprev);
	for (int i = 0; i < btree->root.depth; i++) {
		if (prev[i])
			blockput(prev[i]);
	}
	release_cursor(cursor);
error_btree_probe:
	up_write(&btree->lock);

	free_cursor(cursor);
error_alloc_cursor:
	free(cii);
error_cii:
	free(prev);

	return ret;
}
Esempio n. 25
0
static int replay_log_stage2(struct replay *rp, struct buffer_head *logbuf)
{
	if(DEBUG_MODE_K==1)
	{
		printk(KERN_INFO"%25s  %25s  %4d  #in\n",__FILE__,__func__,__LINE__);
	}
	struct sb *sb = rp->sb;
	struct logblock *log = bufdata(logbuf);
	block_t blocknr = rp->blocknrs[bufindex(logbuf)];
	unsigned char *data = log->data;
	int err;

	/*
	 * Log block address itself works as balloc log, and adjust
	 * bitmap and deunify even if logblocks is before latest
	 * unify, to prevent to be overwritten. (This must be after
	 * LOG_FREEBLOCKS replay if there is it.)
	 */
	trace("LOG BLOCK: logblock %Lx", blocknr);
	err = replay_update_bitmap(rp, blocknr, 1, 1);
	if (err)
		return err;
	/* Mark log block as deunify block */
	defer_bfree(&sb->deunify, blocknr, 1);

	/* If log is before latest unify, those were already applied to FS. */
	if (bufindex(logbuf) < rp->unify_index) {
//		assert(0);	/* older logs should already be freed */
		return 0;
	}
	if (bufindex(logbuf) == rp->unify_index)
		data = rp->unify_pos;

	while (data < log->data + be16_to_cpu(log->bytes)) {
		u8 code = *data++;
		switch (code) {
		case LOG_BALLOC:
		case LOG_BFREE:
		case LOG_BFREE_ON_UNIFY:
		case LOG_BFREE_RELOG:
		{
			u64 block;
			u32 count;
			data = decode32(data, &count);
			data = decode48(data, &block);
			trace("%s: count %u, block %Lx",
			      log_name[code], count, block);

			err = 0;
			if (code == LOG_BALLOC)
				err = replay_update_bitmap(rp, block, count, 1);
			else if (code == LOG_BFREE_ON_UNIFY)
				defer_bfree(&sb->deunify, block, count);
			else
				err = replay_update_bitmap(rp, block, count, 0);
			if (err)
				return err;
			break;
		}
		case LOG_LEAF_REDIRECT:
		case LOG_BNODE_REDIRECT:
		{
			u64 oldblock, newblock;
			data = decode48(data, &oldblock);
			data = decode48(data, &newblock);
			trace("%s: oldblock %Lx, newblock %Lx",
			      log_name[code], oldblock, newblock);
			err = replay_update_bitmap(rp, newblock, 1, 1);
			if (err)
				return err;
			if (code == LOG_LEAF_REDIRECT) {
				err = replay_update_bitmap(rp, oldblock, 1, 0);
				if (err)
					return err;
			} else {
				/* newblock is not flushing yet */
				defer_bfree(&sb->deunify, oldblock, 1);
			}
			break;
		}
		case LOG_LEAF_FREE:
		case LOG_BNODE_FREE:
		{
			u64 block;
			data = decode48(data, &block);
			trace("%s: block %Lx", log_name[code], block);
			err = replay_update_bitmap(rp, block, 1, 0);
			if (err)
				return err;

			if (code == LOG_BNODE_FREE) {
				struct buffer_head *buffer =
					vol_find_get_block(sb, block);
				blockput_free_unify(sb, buffer);
			}
			break;
		}
		case LOG_BNODE_ROOT:
		{
			u64 root, left, right, rkey;
			u8 count;
			count = *data++;
			data = decode48(data, &root);
			data = decode48(data, &left);
			data = decode48(data, &right);
			data = decode48(data, &rkey);
			trace("%s: count %u, root block %Lx, left %Lx, right %Lx, rkey %Lx",
			      log_name[code], count, root, left, right, rkey);

			err = replay_update_bitmap(rp, root, 1, 1);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_SPLIT:
		{
			unsigned pos;
			u64 src, dst;
			data = decode16(data, &pos);
			data = decode48(data, &src);
			data = decode48(data, &dst);
			trace("%s: pos %x, src %Lx, dst %Lx",
			      log_name[code], pos, src, dst);
			err = replay_update_bitmap(rp, dst, 1, 1);
			if (err)
				return err;
			break;
		}
		case LOG_BNODE_MERGE:
		{
			u64 src, dst;
			data = decode48(data, &src);
			data = decode48(data, &dst);
			trace("%s: src 0x%Lx, dst 0x%Lx",
			      log_name[code], src, dst);
			err = replay_update_bitmap(rp, src, 1, 0);
			if (err)
				return err;

			blockput_free_unify(sb, vol_find_get_block(sb, src));
			break;
		}
		case LOG_ORPHAN_ADD:
		case LOG_ORPHAN_DEL:
		{
			unsigned version;
			u64 inum;
			data = decode16(data, &version);
			data = decode48(data, &inum);
			trace("%s: version 0x%x, inum 0x%Lx",
			      log_name[code], version, inum);
			if (code == LOG_ORPHAN_ADD)
				err = replay_orphan_add(rp, version, inum);
			else
				err = replay_orphan_del(rp, version, inum);
			if (err)
				return err;
			break;
		}
		case LOG_FREEBLOCKS:
		case LOG_BNODE_ADD:
		case LOG_BNODE_UPDATE:
		case LOG_BNODE_DEL:
		case LOG_BNODE_ADJUST:
		case LOG_UNIFY:
		case LOG_DELTA:
			data += log_size[code] - sizeof(code);
			break;
		default:
			tux3_err(sb, "unrecognized log code 0x%x", code);
			return -EINVAL;
		}
	}

	return 0;
}