Beispiel #1
0
static int array_block_check(struct dm_block_validator *v,
			     struct dm_block *b,
			     size_t size_of_block)
{
	struct array_block *bh_le = dm_block_data(b);
	__le32 csum_disk;

	if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
		DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
			    (unsigned long long) le64_to_cpu(bh_le->blocknr),
			    (unsigned long long) dm_block_location(b));
		return -ENOTBLK;
	}

	csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
					       size_of_block - sizeof(__le32),
					       CSUM_XOR));
	if (csum_disk != bh_le->csum) {
		DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
			    (unsigned) le32_to_cpu(csum_disk),
			    (unsigned) le32_to_cpu(bh_le->csum));
		return -EILSEQ;
	}

	return 0;
}
static int sb_check(struct dm_block_validator *v,
		    struct dm_block *b,
		    size_t block_size)
{
	struct thin_disk_superblock *disk_super = dm_block_data(b);
	__le32 csum_le;

	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
		DMERR("sb_check failed: blocknr %llu: "
		      "wanted %llu", le64_to_cpu(disk_super->blocknr),
		      (unsigned long long)dm_block_location(b));
		return -ENOTBLK;
	}

	if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) {
		DMERR("sb_check failed: magic %llu: "
		      "wanted %llu", le64_to_cpu(disk_super->magic),
		      (unsigned long long)THIN_SUPERBLOCK_MAGIC);
		return -EILSEQ;
	}

	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
					     block_size - sizeof(__le32),
					     SUPERBLOCK_CSUM_XOR));
	if (csum_le != disk_super->csum) {
		DMERR("sb_check failed: csum %u: wanted %u",
		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
		return -EILSEQ;
	}

	return 0;
}
Beispiel #3
0
static int node_check(struct dm_block_validator *v,
		      struct dm_block *b,
		      size_t block_size)
{
	struct node *n = dm_block_data(b);
	struct node_header *h = &n->header;
	size_t value_size;
	__le32 csum_disk;
	uint32_t flags;

	if (dm_block_location(b) != le64_to_cpu(h->blocknr)) {
		DMERR_LIMIT("node_check failed blocknr %llu wanted %llu",
			    le64_to_cpu(h->blocknr), dm_block_location(b));
		return -ENOTBLK;
	}

	csum_disk = cpu_to_le32(dm_bm_checksum(&h->flags,
					       block_size - sizeof(__le32),
					       BTREE_CSUM_XOR));
	if (csum_disk != h->csum) {
		DMERR_LIMIT("node_check failed csum %u wanted %u",
			    le32_to_cpu(csum_disk), le32_to_cpu(h->csum));
		return -EILSEQ;
	}

	value_size = le32_to_cpu(h->value_size);

	if (sizeof(struct node_header) +
	    (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) {
		DMERR_LIMIT("node_check failed: max_entries too large");
		return -EILSEQ;
	}

	if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) {
		DMERR_LIMIT("node_check failed, too many entries");
		return -EILSEQ;
	}

	/*
	 * The node must be either INTERNAL or LEAF.
	 */
	flags = le32_to_cpu(h->flags);
	if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) {
		DMERR_LIMIT("node_check failed, node is neither INTERNAL or LEAF");
		return -EILSEQ;
	}

	return 0;
}
/*
 * We dump as many entries from center as possible into left, then the rest
 * in right, then rebalance2.  This wastes some cpu, but I want something
 * simple atm.
 */
static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
			       struct child *l, struct child *c, struct child *r,
			       struct btree_node *left, struct btree_node *center, struct btree_node *right,
			       uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
	unsigned shift = min(max_entries - nr_left, nr_center);

	BUG_ON(nr_left + shift > max_entries);
	node_copy(left, center, -shift);
	left->header.nr_entries = cpu_to_le32(nr_left + shift);

	if (shift != nr_center) {
		shift = nr_center - shift;
		BUG_ON((nr_right + shift) > max_entries);
		node_shift(right, shift);
		node_copy(center, right, shift);
		right->header.nr_entries = cpu_to_le32(nr_right + shift);
	}
	*key_ptr(parent, r->index) = right->keys[0];

	delete_at(parent, c->index);
	r->index--;

	dm_tm_dec(info->tm, dm_block_location(c->block));
	__rebalance2(info, parent, l, r);
}
static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
		      struct btree_node *parent,
		      unsigned index, struct child *result)
{
	int r, inc;
	dm_block_t root;

	result->index = index;
	root = value64(parent, index);

	r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
			       &result->block, &inc);
	if (r)
		return r;

	result->n = dm_block_data(result->block);

	if (inc)
		inc_children(info->tm, result->n, vt);

	*((__le64 *) value_ptr(parent, index)) =
		cpu_to_le64(dm_block_location(result->block));

	return 0;
}
static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
			 struct child *l, struct child *r)
{
	struct btree_node *left = l->n;
	struct btree_node *right = r->n;
	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
	unsigned threshold = 2 * merge_threshold(left) + 1;

	if (nr_left + nr_right < threshold) {
		/*
		 * Merge
		 */
		node_copy(left, right, -nr_right);
		left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
		delete_at(parent, r->index);

		/*
		 * We need to decrement the right block, but not it's
		 * children, since they're still referenced by left.
		 */
		dm_tm_dec(info->tm, dm_block_location(r->block));
	} else {
		/*
		 * Rebalance.
		 */
		unsigned target_left = (nr_left + nr_right) / 2;
		shift(left, right, nr_left - target_left);
		*key_ptr(parent, r->index) = right->keys[0];
	}
}
Beispiel #7
0
static void pop_frame(struct del_stack *s)
{
	struct frame *f = s->spine + s->top--;

	dm_tm_dec(s->tm, dm_block_location(f->b));
	dm_tm_unlock(s->tm, f->b);
}
Beispiel #8
0
int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
{
	int r;
	struct dm_block *b;
	struct btree_node *n;
	size_t block_size;
	uint32_t max_entries;

	r = new_block(info, &b);
	if (r < 0)
		return r;

	block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
	max_entries = calc_max_entries(info->value_type.size, block_size);

	n = dm_block_data(b);
	memset(n, 0, block_size);
	n->header.flags = cpu_to_le32(LEAF_NODE);
	n->header.nr_entries = cpu_to_le32(0);
	n->header.max_entries = cpu_to_le32(max_entries);
	n->header.value_size = cpu_to_le32(info->value_type.size);

	*root = dm_block_location(b);
	return unlock_block(info, b);
}
Beispiel #9
0
/*
 * Insert an array block into the btree.  The block is _not_ unlocked.
 */
static int insert_ablock(struct dm_array_info *info, uint64_t index,
			 struct dm_block *block, dm_block_t *root)
{
	__le64 block_le = cpu_to_le64(dm_block_location(block));

	__dm_bless_for_disk(block_le);
	return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
}
static void sb_prepare_for_write(struct dm_block_validator *v,
				 struct dm_block *b,
				 size_t block_size)
{
	struct thin_disk_superblock *disk_super = dm_block_data(b);

	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
						      block_size - sizeof(__le32),
						      SUPERBLOCK_CSUM_XOR));
}
Beispiel #11
0
static void array_block_prepare_for_write(struct dm_block_validator *v,
					  struct dm_block *b,
					  size_t size_of_block)
{
	struct array_block *bh_le = dm_block_data(b);

	bh_le->blocknr = cpu_to_le64(dm_block_location(b));
	bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
						 size_of_block - sizeof(__le32),
						 CSUM_XOR));
}
static int rebalance_children(struct shadow_spine *s,
			      struct dm_btree_info *info,
			      struct dm_btree_value_type *vt, uint64_t key)
{
	int i, r, has_left_sibling, has_right_sibling;
	uint32_t child_entries;
	struct btree_node *n;

	n = dm_block_data(shadow_current(s));

	if (le32_to_cpu(n->header.nr_entries) == 1) {
		struct dm_block *child;
		dm_block_t b = value64(n, 0);

		r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
		if (r)
			return r;

		memcpy(n, dm_block_data(child),
		       dm_bm_block_size(dm_tm_get_bm(info->tm)));
		r = dm_tm_unlock(info->tm, child);
		if (r)
			return r;

		dm_tm_dec(info->tm, dm_block_location(child));
		return 0;
	}

	i = lower_bound(n, key);
	if (i < 0)
		return -ENODATA;

	r = get_nr_entries(info->tm, value64(n, i), &child_entries);
	if (r)
		return r;

	has_left_sibling = i > 0;
	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);

	if (!has_left_sibling)
		r = rebalance2(s, info, vt, i);

	else if (!has_right_sibling)
		r = rebalance2(s, info, vt, i - 1);

	else
		r = rebalance3(s, info, vt, i - 1);

	return r;
}
Beispiel #13
0
static void node_prepare_for_write(struct dm_block_validator *v,
				   struct dm_block *b,
				   size_t block_size)
{
	struct node *n = dm_block_data(b);
	struct node_header *h = &n->header;

	h->blocknr = cpu_to_le64(dm_block_location(b));
	h->csum = cpu_to_le32(dm_bm_checksum(&h->flags,
					     block_size - sizeof(__le32),
					     BTREE_CSUM_XOR));

	BUG_ON(node_check(v, b, 4096));
}
/*
 * Prepares for removal from one level of the hierarchy.  The caller must
 * call delete_at() to remove the entry at index.
 */
static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
		      struct dm_btree_value_type *vt, dm_block_t root,
		      uint64_t key, unsigned *index)
{
	int i = *index, r;
	struct btree_node *n;

	for (;;) {
		r = shadow_step(s, root, vt);
		if (r < 0)
			break;

		/*
		 * We have to patch up the parent node, ugly, but I don't
		 * see a way to do this automatically as part of the spine
		 * op.
		 */
		if (shadow_has_parent(s)) {
			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
			       &location, sizeof(__le64));
		}

		n = dm_block_data(shadow_current(s));

		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
			return do_leaf(n, key, index);

		r = rebalance_children(s, info, vt, key);
		if (r)
			break;

		n = dm_block_data(shadow_current(s));
		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
			return do_leaf(n, key, index);

		i = lower_bound(n, key);

		/*
		 * We know the key is present, or else
		 * rebalance_children would have returned
		 * -ENODATA
		 */
		root = value64(n, i);
	}

	return r;
}
Beispiel #15
0
/*
 * Looks up an array block in the btree.  Then shadows it, and updates the
 * btree to point to this new shadow.  'root' is an input/output parameter
 * for both the current root block, and the new one.
 */
static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
			 unsigned index, struct dm_block **block,
			 struct array_block **ab)
{
	int r, inc;
	uint64_t key = index;
	dm_block_t b;
	__le64 block_le;

	/*
	 * lookup
	 */
	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
	if (r)
		return r;
	b = le64_to_cpu(block_le);

	/*
	 * shadow
	 */
	r = dm_tm_shadow_block(info->btree_info.tm, b,
			       &array_validator, block, &inc);
	if (r)
		return r;

	*ab = dm_block_data(*block);
	if (inc)
		inc_ablock_entries(info, *ab);

	/*
	 * Reinsert.
	 *
	 * The shadow op will often be a noop.  Only insert if it really
	 * copied data.
	 */
	if (dm_block_location(*block) != b) {
		/*
		 * dm_tm_shadow_block will have already decremented the old
		 * block, but it is still referenced by the btree.  We
		 * increment to stop the insert decrementing it below zero
		 * when overwriting the old value.
		 */
		dm_tm_inc(info->btree_info.tm, b);
		r = insert_ablock(info, index, *block, root);
	}

	return r;
}
Beispiel #16
0
/*
 * Looks up an array block in the btree.  Then shadows it, and updates the
 * btree to point to this new shadow.  'root' is an input/output parameter
 * for both the current root block, and the new one.
 */
static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
			 unsigned index, struct dm_block **block,
			 struct array_block **ab)
{
	int r, inc;
	uint64_t key = index;
	dm_block_t b;
	__le64 block_le;

	/*
	 * lookup
	 */
	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
	if (r)
		return r;
	b = le64_to_cpu(block_le);

	/*
	 * shadow
	 */
	r = dm_tm_shadow_block(info->btree_info.tm, b,
			       &array_validator, block, &inc);
	if (r)
		return r;

	*ab = dm_block_data(*block);
	if (inc)
		inc_ablock_entries(info, *ab);

	/*
	 * Reinsert.
	 *
	 * The shadow op will often be a noop.  Only insert if it really
	 * copied data.
	 */
	if (dm_block_location(*block) != b)
		r = insert_ablock(info, index, *block, root);

	return r;
}
Beispiel #17
0
int shadow_step(struct shadow_spine *s, dm_block_t b,
		struct dm_btree_value_type *vt)
{
	int r;

	if (s->count == 2) {
		r = unlock_block(s->info, s->nodes[0]);
		if (r < 0)
			return r;
		s->nodes[0] = s->nodes[1];
		s->count--;
	}

	r = bn_shadow(s->info, b, vt, s->nodes + s->count);
	if (!r) {
		if (!s->count)
			s->root = dm_block_location(s->nodes[0]);

		s->count++;
	}

	return r;
}
Beispiel #18
0
static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
			    struct dm_btree_value_type *vt,
			    uint64_t key, unsigned *index)
{
	int r, i = *index, top = 1;
	struct btree_node *node;

	for (;;) {
		r = shadow_step(s, root, vt);
		if (r < 0)
			return r;

		node = dm_block_data(shadow_current(s));

		/*
		 * We have to patch up the parent node, ugly, but I don't
		 * see a way to do this automatically as part of the spine
		 * op.
		 */
		if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));

			__dm_bless_for_disk(&location);
			memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
				    &location, sizeof(__le64));
		}

		node = dm_block_data(shadow_current(s));

		if (node->header.nr_entries == node->header.max_entries) {
			if (top)
				r = btree_split_beneath(s, key);
			else
				r = btree_split_sibling(s, root, i, key);

			if (r < 0)
				return r;
		}

		node = dm_block_data(shadow_current(s));

		i = lower_bound(node, key);

		if (le32_to_cpu(node->header.flags) & LEAF_NODE)
			break;

		if (i < 0) {
			/* change the bounds on the lowest key */
			node->keys[0] = cpu_to_le64(key);
			i = 0;
		}

		root = value64(node, i);
		top = 0;
	}

	if (i < 0 || le64_to_cpu(node->keys[i]) != key)
		i++;

	*index = i;
	return 0;
}
Beispiel #19
0
/*
 * Splits a node by creating two new children beneath the given node.
 *
 * Before:
 *	  +----------+
 *	  | A ++++++ |
 *	  +----------+
 *
 *
 * After:
 *	+------------+
 *	| A (shadow) |
 *	+------------+
 *	    |	|
 *   +------+	+----+
 *   |		     |
 *   v		     v
 * +-------+	 +-------+
 * | B +++ |	 | C +++ |
 * +-------+	 +-------+
 */
static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
{
	int r;
	size_t size;
	unsigned nr_left, nr_right;
	struct dm_block *left, *right, *new_parent;
	struct btree_node *pn, *ln, *rn;
	__le64 val;

	new_parent = shadow_current(s);

	r = new_block(s->info, &left);
	if (r < 0)
		return r;

	r = new_block(s->info, &right);
	if (r < 0) {
		/* FIXME: put left */
		return r;
	}

	pn = dm_block_data(new_parent);
	ln = dm_block_data(left);
	rn = dm_block_data(right);

	nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
	nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;

	ln->header.flags = pn->header.flags;
	ln->header.nr_entries = cpu_to_le32(nr_left);
	ln->header.max_entries = pn->header.max_entries;
	ln->header.value_size = pn->header.value_size;

	rn->header.flags = pn->header.flags;
	rn->header.nr_entries = cpu_to_le32(nr_right);
	rn->header.max_entries = pn->header.max_entries;
	rn->header.value_size = pn->header.value_size;

	memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
	memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));

	size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
		sizeof(__le64) : s->info->value_type.size;
	memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
	memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
	       nr_right * size);

	/* new_parent should just point to l and r now */
	pn->header.flags = cpu_to_le32(INTERNAL_NODE);
	pn->header.nr_entries = cpu_to_le32(2);
	pn->header.max_entries = cpu_to_le32(
		calc_max_entries(sizeof(__le64),
				 dm_bm_block_size(
					 dm_tm_get_bm(s->info->tm))));
	pn->header.value_size = cpu_to_le32(sizeof(__le64));

	val = cpu_to_le64(dm_block_location(left));
	__dm_bless_for_disk(&val);
	pn->keys[0] = ln->keys[0];
	memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64));

	val = cpu_to_le64(dm_block_location(right));
	__dm_bless_for_disk(&val);
	pn->keys[1] = rn->keys[0];
	memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));

	/*
	 * rejig the spine.  This is ugly, since it knows too
	 * much about the spine
	 */
	if (s->nodes[0] != new_parent) {
		unlock_block(s->info, s->nodes[0]);
		s->nodes[0] = new_parent;
	}
	if (key < le64_to_cpu(rn->keys[0])) {
		unlock_block(s->info, right);
		s->nodes[1] = left;
	} else {
		unlock_block(s->info, left);
		s->nodes[1] = right;
	}
	s->count = 2;

	return 0;
}
Beispiel #20
0
/*
 * Splits a node by creating a sibling node and shifting half the nodes
 * contents across.  Assumes there is a parent node, and it has room for
 * another child.
 *
 * Before:
 *	  +--------+
 *	  | Parent |
 *	  +--------+
 *	     |
 *	     v
 *	+----------+
 *	| A ++++++ |
 *	+----------+
 *
 *
 * After:
 *		+--------+
 *		| Parent |
 *		+--------+
 *		  |	|
 *		  v	+------+
 *	    +---------+	       |
 *	    | A* +++  |	       v
 *	    +---------+	  +-------+
 *			  | B +++ |
 *			  +-------+
 *
 * Where A* is a shadow of A.
 */
static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
			       unsigned parent_index, uint64_t key)
{
	int r;
	size_t size;
	unsigned nr_left, nr_right;
	struct dm_block *left, *right, *parent;
	struct btree_node *ln, *rn, *pn;
	__le64 location;

	left = shadow_current(s);

	r = new_block(s->info, &right);
	if (r < 0)
		return r;

	ln = dm_block_data(left);
	rn = dm_block_data(right);

	nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
	nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;

	ln->header.nr_entries = cpu_to_le32(nr_left);

	rn->header.flags = ln->header.flags;
	rn->header.nr_entries = cpu_to_le32(nr_right);
	rn->header.max_entries = ln->header.max_entries;
	rn->header.value_size = ln->header.value_size;
	memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));

	size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
		sizeof(uint64_t) : s->info->value_type.size;
	memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
	       size * nr_right);

	/*
	 * Patch up the parent
	 */
	parent = shadow_parent(s);

	pn = dm_block_data(parent);
	location = cpu_to_le64(dm_block_location(left));
	__dm_bless_for_disk(&location);
	memcpy_disk(value_ptr(pn, parent_index),
		    &location, sizeof(__le64));

	location = cpu_to_le64(dm_block_location(right));
	__dm_bless_for_disk(&location);

	r = insert_at(sizeof(__le64), pn, parent_index + 1,
		      le64_to_cpu(rn->keys[0]), &location);
	if (r)
		return r;

	if (key < le64_to_cpu(rn->keys[0])) {
		unlock_block(s->info, right);
		s->nodes[1] = left;
	} else {
		unlock_block(s->info, left);
		s->nodes[1] = right;
	}

	return 0;
}