static int rebalance_children(struct shadow_spine *s,
			      struct dm_btree_info *info,
			      struct dm_btree_value_type *vt, uint64_t key)
{
	int i, r, has_left_sibling, has_right_sibling;
	uint32_t child_entries;
	struct btree_node *n;

	n = dm_block_data(shadow_current(s));

	if (le32_to_cpu(n->header.nr_entries) == 1) {
		struct dm_block *child;
		dm_block_t b = value64(n, 0);

		r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
		if (r)
			return r;

		memcpy(n, dm_block_data(child),
		       dm_bm_block_size(dm_tm_get_bm(info->tm)));
		r = dm_tm_unlock(info->tm, child);
		if (r)
			return r;

		dm_tm_dec(info->tm, dm_block_location(child));
		return 0;
	}

	i = lower_bound(n, key);
	if (i < 0)
		return -ENODATA;

	r = get_nr_entries(info->tm, value64(n, i), &child_entries);
	if (r)
		return r;

	has_left_sibling = i > 0;
	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);

	if (!has_left_sibling)
		r = rebalance2(s, info, vt, i);

	else if (!has_right_sibling)
		r = rebalance2(s, info, vt, i - 1);

	else
		r = rebalance3(s, info, vt, i - 1);

	return r;
}
/*
 * Prepares for removal from one level of the hierarchy.  The caller must
 * call delete_at() to remove the entry at index.
 */
static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
		      struct dm_btree_value_type *vt, dm_block_t root,
		      uint64_t key, unsigned *index)
{
	int i = *index, r;
	struct btree_node *n;

	for (;;) {
		r = shadow_step(s, root, vt);
		if (r < 0)
			break;

		/*
		 * We have to patch up the parent node, ugly, but I don't
		 * see a way to do this automatically as part of the spine
		 * op.
		 */
		if (shadow_has_parent(s)) {
			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
			       &location, sizeof(__le64));
		}

		n = dm_block_data(shadow_current(s));

		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
			return do_leaf(n, key, index);

		r = rebalance_children(s, info, vt, key);
		if (r)
			break;

		n = dm_block_data(shadow_current(s));
		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
			return do_leaf(n, key, index);

		i = lower_bound(n, key);

		/*
		 * We know the key is present, or else
		 * rebalance_children would have returned
		 * -ENODATA
		 */
		root = value64(n, i);
	}

	return r;
}
static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
		      struct dm_btree_value_type *vt, unsigned left_index)
{
	int r;
	struct btree_node *parent;
	struct child left, right;

	parent = dm_block_data(shadow_current(s));

	r = init_child(info, vt, parent, left_index, &left);
	if (r)
		return r;

	r = init_child(info, vt, parent, left_index + 1, &right);
	if (r) {
		exit_child(info, &left);
		return r;
	}

	__rebalance2(info, parent, &left, &right);

	r = exit_child(info, &left);
	if (r) {
		exit_child(info, &right);
		return r;
	}

	return exit_child(info, &right);
}
static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
		      struct btree_node *parent,
		      unsigned index, struct child *result)
{
	int r, inc;
	dm_block_t root;

	result->index = index;
	root = value64(parent, index);

	r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
			       &result->block, &inc);
	if (r)
		return r;

	result->n = dm_block_data(result->block);

	if (inc)
		inc_children(info->tm, result->n, vt);

	*((__le64 *) value_ptr(parent, index)) =
		cpu_to_le64(dm_block_location(result->block));

	return 0;
}
Example #5
0
int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
{
	int r;
	struct dm_block *b;
	struct btree_node *n;
	size_t block_size;
	uint32_t max_entries;

	r = new_block(info, &b);
	if (r < 0)
		return r;

	block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
	max_entries = calc_max_entries(info->value_type.size, block_size);

	n = dm_block_data(b);
	memset(n, 0, block_size);
	n->header.flags = cpu_to_le32(LEAF_NODE);
	n->header.nr_entries = cpu_to_le32(0);
	n->header.max_entries = cpu_to_le32(max_entries);
	n->header.value_size = cpu_to_le32(info->value_type.size);

	*root = dm_block_location(b);
	return unlock_block(info, b);
}
Example #6
0
static int array_block_check(struct dm_block_validator *v,
			     struct dm_block *b,
			     size_t size_of_block)
{
	struct array_block *bh_le = dm_block_data(b);
	__le32 csum_disk;

	if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
		DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu",
			    (unsigned long long) le64_to_cpu(bh_le->blocknr),
			    (unsigned long long) dm_block_location(b));
		return -ENOTBLK;
	}

	csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
					       size_of_block - sizeof(__le32),
					       CSUM_XOR));
	if (csum_disk != bh_le->csum) {
		DMERR_LIMIT("array_block_check failed: csum %u != wanted %u",
			    (unsigned) le32_to_cpu(csum_disk),
			    (unsigned) le32_to_cpu(bh_le->csum));
		return -EILSEQ;
	}

	return 0;
}
Example #7
0
/*
 * FIXME: We shouldn't use a recursive algorithm when we have limited stack
 * space.  Also this only works for single level trees.
 */
static int walk_node(struct dm_btree_info *info, dm_block_t block,
		     int (*fn)(void *context, uint64_t *keys, void *leaf),
		     void *context)
{
	int r;
	unsigned i, nr;
	struct dm_block *node;
	struct btree_node *n;
	uint64_t keys;

	r = bn_read_lock(info, block, &node);
	if (r)
		return r;

	n = dm_block_data(node);

	nr = le32_to_cpu(n->header.nr_entries);
	for (i = 0; i < nr; i++) {
		if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
			r = walk_node(info, value64(n, i), fn, context);
			if (r)
				goto out;
		} else {
			keys = le64_to_cpu(*key_ptr(n, i));
			r = fn(context, &keys, value_ptr(n, i));
			if (r)
				goto out;
		}
	}

out:
	dm_tm_unlock(info->tm, node);
	return r;
}
static int sb_check(struct dm_block_validator *v,
		    struct dm_block *b,
		    size_t block_size)
{
	struct thin_disk_superblock *disk_super = dm_block_data(b);
	__le32 csum_le;

	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
		DMERR("sb_check failed: blocknr %llu: "
		      "wanted %llu", le64_to_cpu(disk_super->blocknr),
		      (unsigned long long)dm_block_location(b));
		return -ENOTBLK;
	}

	if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) {
		DMERR("sb_check failed: magic %llu: "
		      "wanted %llu", le64_to_cpu(disk_super->magic),
		      (unsigned long long)THIN_SUPERBLOCK_MAGIC);
		return -EILSEQ;
	}

	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
					     block_size - sizeof(__le32),
					     SUPERBLOCK_CSUM_XOR));
	if (csum_le != disk_super->csum) {
		DMERR("sb_check failed: csum %u: wanted %u",
		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
		return -EILSEQ;
	}

	return 0;
}
Example #9
0
struct node *ro_node(struct ro_spine *s)
{
	struct dm_block *block;

	BUG_ON(!s->count);
	block = s->nodes[s->count - 1];

	return dm_block_data(block);
}
Example #10
0
static void array_block_prepare_for_write(struct dm_block_validator *v,
					  struct dm_block *b,
					  size_t size_of_block)
{
	struct array_block *bh_le = dm_block_data(b);

	bh_le->blocknr = cpu_to_le64(dm_block_location(b));
	bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
						 size_of_block - sizeof(__le32),
						 CSUM_XOR));
}
Example #11
0
static void sb_prepare_for_write(struct dm_block_validator *v,
				 struct dm_block *b,
				 size_t block_size)
{
	struct thin_disk_superblock *disk_super = dm_block_data(b);

	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
						      block_size - sizeof(__le32),
						      SUPERBLOCK_CSUM_XOR));
}
Example #12
0
/*
 * Read locks a block, and coerces it to an array block.  The caller must
 * unlock 'block' when finished.
 */
static int get_ablock(struct dm_array_info *info, dm_block_t b,
		      struct dm_block **block, struct array_block **ab)
{
	int r;

	r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
	if (r)
		return r;

	*ab = dm_block_data(*block);
	return 0;
}
Example #13
0
static int bn_shadow(struct dm_btree_info *info, dm_block_t orig,
	      struct dm_btree_value_type *vt,
	      struct dm_block **result)
{
	int r, inc;

	r = dm_tm_shadow_block(info->tm, orig, &btree_node_validator,
			       result, &inc);
	if (!r && inc)
		inc_children(info->tm, dm_block_data(*result), vt);

	return r;
}
Example #14
0
static void node_prepare_for_write(struct dm_block_validator *v,
				   struct dm_block *b,
				   size_t block_size)
{
	struct node *n = dm_block_data(b);
	struct node_header *h = &n->header;

	h->blocknr = cpu_to_le64(dm_block_location(b));
	h->csum = cpu_to_le32(dm_bm_checksum(&h->flags,
					     block_size - sizeof(__le32),
					     BTREE_CSUM_XOR));

	BUG_ON(node_check(v, b, 4096));
}
Example #15
0
static int node_check(struct dm_block_validator *v,
		      struct dm_block *b,
		      size_t block_size)
{
	struct node *n = dm_block_data(b);
	struct node_header *h = &n->header;
	size_t value_size;
	__le32 csum_disk;
	uint32_t flags;

	if (dm_block_location(b) != le64_to_cpu(h->blocknr)) {
		DMERR_LIMIT("node_check failed blocknr %llu wanted %llu",
			    le64_to_cpu(h->blocknr), dm_block_location(b));
		return -ENOTBLK;
	}

	csum_disk = cpu_to_le32(dm_bm_checksum(&h->flags,
					       block_size - sizeof(__le32),
					       BTREE_CSUM_XOR));
	if (csum_disk != h->csum) {
		DMERR_LIMIT("node_check failed csum %u wanted %u",
			    le32_to_cpu(csum_disk), le32_to_cpu(h->csum));
		return -EILSEQ;
	}

	value_size = le32_to_cpu(h->value_size);

	if (sizeof(struct node_header) +
	    (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) {
		DMERR_LIMIT("node_check failed: max_entries too large");
		return -EILSEQ;
	}

	if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) {
		DMERR_LIMIT("node_check failed, too many entries");
		return -EILSEQ;
	}

	/*
	 * The node must be either INTERNAL or LEAF.
	 */
	flags = le32_to_cpu(h->flags);
	if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) {
		DMERR_LIMIT("node_check failed, node is neither INTERNAL or LEAF");
		return -EILSEQ;
	}

	return 0;
}
Example #16
0
/*
 * Looks up an array block in the btree.  Then shadows it, and updates the
 * btree to point to this new shadow.  'root' is an input/output parameter
 * for both the current root block, and the new one.
 */
static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
			 unsigned index, struct dm_block **block,
			 struct array_block **ab)
{
	int r, inc;
	uint64_t key = index;
	dm_block_t b;
	__le64 block_le;

	/*
	 * lookup
	 */
	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
	if (r)
		return r;
	b = le64_to_cpu(block_le);

	/*
	 * shadow
	 */
	r = dm_tm_shadow_block(info->btree_info.tm, b,
			       &array_validator, block, &inc);
	if (r)
		return r;

	*ab = dm_block_data(*block);
	if (inc)
		inc_ablock_entries(info, *ab);

	/*
	 * Reinsert.
	 *
	 * The shadow op will often be a noop.  Only insert if it really
	 * copied data.
	 */
	if (dm_block_location(*block) != b) {
		/*
		 * dm_tm_shadow_block will have already decremented the old
		 * block, but it is still referenced by the btree.  We
		 * increment to stop the insert decrementing it below zero
		 * when overwriting the old value.
		 */
		dm_tm_inc(info->btree_info.tm, b);
		r = insert_ablock(info, index, *block, root);
	}

	return r;
}
static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
		      struct dm_btree_value_type *vt, unsigned left_index)
{
	int r;
	struct btree_node *parent = dm_block_data(shadow_current(s));
	struct child left, center, right;

	/*
	 * FIXME: fill out an array?
	 */
	r = init_child(info, vt, parent, left_index, &left);
	if (r)
		return r;

	r = init_child(info, vt, parent, left_index + 1, &center);
	if (r) {
		exit_child(info, &left);
		return r;
	}

	r = init_child(info, vt, parent, left_index + 2, &right);
	if (r) {
		exit_child(info, &left);
		exit_child(info, &center);
		return r;
	}

	__rebalance3(info, parent, &left, &center, &right);

	r = exit_child(info, &left);
	if (r) {
		exit_child(info, &center);
		exit_child(info, &right);
		return r;
	}

	r = exit_child(info, &center);
	if (r) {
		exit_child(info, &right);
		return r;
	}

	r = exit_child(info, &right);
	if (r)
		return r;

	return 0;
}
static int get_nr_entries(struct dm_transaction_manager *tm,
			  dm_block_t b, uint32_t *result)
{
	int r;
	struct dm_block *block;
	struct btree_node *n;

	r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
	if (r)
		return r;

	n = dm_block_data(block);
	*result = le32_to_cpu(n->header.nr_entries);

	return dm_tm_unlock(tm, block);
}
Example #19
0
/*
 * Allocate a new array block.  The caller will need to unlock block.
 */
static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
			uint32_t max_entries,
			struct dm_block **block, struct array_block **ab)
{
	int r;

	r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
	if (r)
		return r;

	(*ab) = dm_block_data(*block);
	(*ab)->max_entries = cpu_to_le32(max_entries);
	(*ab)->nr_entries = cpu_to_le32(0);
	(*ab)->value_size = cpu_to_le32(info->value_type.size);

	return 0;
}
Example #20
0
static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
{
	int r;
	uint32_t ref_count;

	if (s->top >= MAX_SPINE_DEPTH - 1) {
		DMERR("btree deletion stack out of memory");
		return -ENOMEM;
	}

	r = dm_tm_ref(s->tm, b, &ref_count);
	if (r)
		return r;

	if (ref_count > 1)
		/*
		 * This is a shared node, so we can just decrement it's
		 * reference counter and leave the children.
		 */
		dm_tm_dec(s->tm, b);

	else {
		uint32_t flags;
		struct frame *f = s->spine + ++s->top;

		r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
		if (r) {
			s->top--;
			return r;
		}

		f->n = dm_block_data(f->b);
		f->level = level;
		f->nr_children = le32_to_cpu(f->n->header.nr_entries);
		f->current_child = 0;

		flags = le32_to_cpu(f->n->header.flags);
		if (flags & INTERNAL_NODE || is_internal_level(s->info, f))
			prefetch_children(s, f);
	}

	return 0;
}
Example #21
0
/*
 * Looks up an array block in the btree.  Then shadows it, and updates the
 * btree to point to this new shadow.  'root' is an input/output parameter
 * for both the current root block, and the new one.
 */
static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
			 unsigned index, struct dm_block **block,
			 struct array_block **ab)
{
	int r, inc;
	uint64_t key = index;
	dm_block_t b;
	__le64 block_le;

	/*
	 * lookup
	 */
	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
	if (r)
		return r;
	b = le64_to_cpu(block_le);

	/*
	 * shadow
	 */
	r = dm_tm_shadow_block(info->btree_info.tm, b,
			       &array_validator, block, &inc);
	if (r)
		return r;

	*ab = dm_block_data(*block);
	if (inc)
		inc_ablock_entries(info, *ab);

	/*
	 * Reinsert.
	 *
	 * The shadow op will often be a noop.  Only insert if it really
	 * copied data.
	 */
	if (dm_block_location(*block) != b)
		r = insert_ablock(info, index, *block, root);

	return r;
}
Example #22
0
int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
		    uint64_t *keys, dm_block_t *new_root)
{
	unsigned level, last_level = info->levels - 1;
	int index = 0, r = 0;
	struct shadow_spine spine;
	struct btree_node *n;
	struct dm_btree_value_type le64_vt;

	init_le64_type(info->tm, &le64_vt);
	init_shadow_spine(&spine, info);
	for (level = 0; level < info->levels; level++) {
		r = remove_raw(&spine, info,
			       (level == last_level ?
				&info->value_type : &le64_vt),
			       root, keys[level], (unsigned *)&index);
		if (r < 0)
			break;

		n = dm_block_data(shadow_current(&spine));
		if (level != last_level) {
			root = value64(n, index);
			continue;
		}

		BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));

		if (info->value_type.dec)
			info->value_type.dec(info->value_type.context,
					     value_ptr(n, index));

		delete_at(n, index);
	}

	*new_root = shadow_root(&spine);
	exit_shadow_spine(&spine);

	return r;
}
Example #23
0
static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
			    struct dm_btree_value_type *vt,
			    uint64_t key, unsigned *index)
{
	int r, i = *index, top = 1;
	struct btree_node *node;

	for (;;) {
		r = shadow_step(s, root, vt);
		if (r < 0)
			return r;

		node = dm_block_data(shadow_current(s));

		/*
		 * We have to patch up the parent node, ugly, but I don't
		 * see a way to do this automatically as part of the spine
		 * op.
		 */
		if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));

			__dm_bless_for_disk(&location);
			memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
				    &location, sizeof(__le64));
		}

		node = dm_block_data(shadow_current(s));

		if (node->header.nr_entries == node->header.max_entries) {
			if (top)
				r = btree_split_beneath(s, key);
			else
				r = btree_split_sibling(s, root, i, key);

			if (r < 0)
				return r;
		}

		node = dm_block_data(shadow_current(s));

		i = lower_bound(node, key);

		if (le32_to_cpu(node->header.flags) & LEAF_NODE)
			break;

		if (i < 0) {
			/* change the bounds on the lowest key */
			node->keys[0] = cpu_to_le64(key);
			i = 0;
		}

		root = value64(node, i);
		top = 0;
	}

	if (i < 0 || le64_to_cpu(node->keys[i]) != key)
		i++;

	*index = i;
	return 0;
}
Example #24
0
/*
 * Splits a node by creating a sibling node and shifting half the nodes
 * contents across.  Assumes there is a parent node, and it has room for
 * another child.
 *
 * Before:
 *	  +--------+
 *	  | Parent |
 *	  +--------+
 *	     |
 *	     v
 *	+----------+
 *	| A ++++++ |
 *	+----------+
 *
 *
 * After:
 *		+--------+
 *		| Parent |
 *		+--------+
 *		  |	|
 *		  v	+------+
 *	    +---------+	       |
 *	    | A* +++  |	       v
 *	    +---------+	  +-------+
 *			  | B +++ |
 *			  +-------+
 *
 * Where A* is a shadow of A.
 */
static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
			       unsigned parent_index, uint64_t key)
{
	int r;
	size_t size;
	unsigned nr_left, nr_right;
	struct dm_block *left, *right, *parent;
	struct btree_node *ln, *rn, *pn;
	__le64 location;

	left = shadow_current(s);

	r = new_block(s->info, &right);
	if (r < 0)
		return r;

	ln = dm_block_data(left);
	rn = dm_block_data(right);

	nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
	nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;

	ln->header.nr_entries = cpu_to_le32(nr_left);

	rn->header.flags = ln->header.flags;
	rn->header.nr_entries = cpu_to_le32(nr_right);
	rn->header.max_entries = ln->header.max_entries;
	rn->header.value_size = ln->header.value_size;
	memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));

	size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
		sizeof(uint64_t) : s->info->value_type.size;
	memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
	       size * nr_right);

	/*
	 * Patch up the parent
	 */
	parent = shadow_parent(s);

	pn = dm_block_data(parent);
	location = cpu_to_le64(dm_block_location(left));
	__dm_bless_for_disk(&location);
	memcpy_disk(value_ptr(pn, parent_index),
		    &location, sizeof(__le64));

	location = cpu_to_le64(dm_block_location(right));
	__dm_bless_for_disk(&location);

	r = insert_at(sizeof(__le64), pn, parent_index + 1,
		      le64_to_cpu(rn->keys[0]), &location);
	if (r)
		return r;

	if (key < le64_to_cpu(rn->keys[0])) {
		unlock_block(s->info, right);
		s->nodes[1] = left;
	} else {
		unlock_block(s->info, left);
		s->nodes[1] = right;
	}

	return 0;
}
Example #25
0
/*
 * Splits a node by creating two new children beneath the given node.
 *
 * Before:
 *	  +----------+
 *	  | A ++++++ |
 *	  +----------+
 *
 *
 * After:
 *	+------------+
 *	| A (shadow) |
 *	+------------+
 *	    |	|
 *   +------+	+----+
 *   |		     |
 *   v		     v
 * +-------+	 +-------+
 * | B +++ |	 | C +++ |
 * +-------+	 +-------+
 */
static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
{
	int r;
	size_t size;
	unsigned nr_left, nr_right;
	struct dm_block *left, *right, *new_parent;
	struct btree_node *pn, *ln, *rn;
	__le64 val;

	new_parent = shadow_current(s);

	r = new_block(s->info, &left);
	if (r < 0)
		return r;

	r = new_block(s->info, &right);
	if (r < 0) {
		/* FIXME: put left */
		return r;
	}

	pn = dm_block_data(new_parent);
	ln = dm_block_data(left);
	rn = dm_block_data(right);

	nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
	nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;

	ln->header.flags = pn->header.flags;
	ln->header.nr_entries = cpu_to_le32(nr_left);
	ln->header.max_entries = pn->header.max_entries;
	ln->header.value_size = pn->header.value_size;

	rn->header.flags = pn->header.flags;
	rn->header.nr_entries = cpu_to_le32(nr_right);
	rn->header.max_entries = pn->header.max_entries;
	rn->header.value_size = pn->header.value_size;

	memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
	memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));

	size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
		sizeof(__le64) : s->info->value_type.size;
	memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
	memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
	       nr_right * size);

	/* new_parent should just point to l and r now */
	pn->header.flags = cpu_to_le32(INTERNAL_NODE);
	pn->header.nr_entries = cpu_to_le32(2);
	pn->header.max_entries = cpu_to_le32(
		calc_max_entries(sizeof(__le64),
				 dm_bm_block_size(
					 dm_tm_get_bm(s->info->tm))));
	pn->header.value_size = cpu_to_le32(sizeof(__le64));

	val = cpu_to_le64(dm_block_location(left));
	__dm_bless_for_disk(&val);
	pn->keys[0] = ln->keys[0];
	memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64));

	val = cpu_to_le64(dm_block_location(right));
	__dm_bless_for_disk(&val);
	pn->keys[1] = rn->keys[0];
	memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));

	/*
	 * rejig the spine.  This is ugly, since it knows too
	 * much about the spine
	 */
	if (s->nodes[0] != new_parent) {
		unlock_block(s->info, s->nodes[0]);
		s->nodes[0] = new_parent;
	}
	if (key < le64_to_cpu(rn->keys[0])) {
		unlock_block(s->info, right);
		s->nodes[1] = left;
	} else {
		unlock_block(s->info, left);
		s->nodes[1] = right;
	}
	s->count = 2;

	return 0;
}