int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; struct del_stack *s; s = kmalloc(sizeof(*s), GFP_NOIO); if (!s) return -ENOMEM; s->tm = info->tm; s->top = -1; r = push_frame(s, root, 0); if (r) goto out; while (unprocessed_frames(s)) { uint32_t flags; struct frame *f; dm_block_t b; r = top_frame(s, &f); if (r) goto out; if (f->current_child >= f->nr_children) { pop_frame(s); continue; } flags = le32_to_cpu(f->n->header.flags); if (flags & INTERNAL_NODE) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level); if (r) goto out; } else if (is_internal_level(info, f)) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level + 1); if (r) goto out; } else { if (info->value_type.dec) { unsigned i; for (i = 0; i < f->nr_children; i++) info->value_type.dec(info->value_type.context, value_ptr(f->n, i)); } f->current_child = f->nr_children; } } out: kfree(s); return r; }
static int rebalance_children(struct shadow_spine *s, struct dm_btree_info *info, struct dm_btree_value_type *vt, uint64_t key) { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; struct btree_node *n; n = dm_block_data(shadow_current(s)); if (le32_to_cpu(n->header.nr_entries) == 1) { struct dm_block *child; dm_block_t b = value64(n, 0); r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child); if (r) return r; memcpy(n, dm_block_data(child), dm_bm_block_size(dm_tm_get_bm(info->tm))); r = dm_tm_unlock(info->tm, child); if (r) return r; dm_tm_dec(info->tm, dm_block_location(child)); return 0; } i = lower_bound(n, key); if (i < 0) return -ENODATA; r = get_nr_entries(info->tm, value64(n, i), &child_entries); if (r) return r; has_left_sibling = i > 0; has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); if (!has_left_sibling) r = rebalance2(s, info, vt, i); else if (!has_right_sibling) r = rebalance2(s, info, vt, i - 1); else r = rebalance3(s, info, vt, i - 1); return r; }
/* * FIXME: We shouldn't use a recursive algorithm when we have limited stack * space. Also this only works for single level trees. */ static int walk_node(struct dm_btree_info *info, dm_block_t block, int (*fn)(void *context, uint64_t *keys, void *leaf), void *context) { int r; unsigned i, nr; struct dm_block *node; struct btree_node *n; uint64_t keys; r = bn_read_lock(info, block, &node); if (r) return r; n = dm_block_data(node); nr = le32_to_cpu(n->header.nr_entries); for (i = 0; i < nr; i++) { if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { r = walk_node(info, value64(n, i), fn, context); if (r) goto out; } else { keys = le64_to_cpu(*key_ptr(n, i)); r = fn(context, &keys, value_ptr(n, i)); if (r) goto out; } } out: dm_tm_unlock(info->tm, node); return r; }
static int find_highest_key(struct ro_spine *s, dm_block_t block, uint64_t *result_key, dm_block_t *next_block) { int i, r; uint32_t flags; do { r = ro_step(s, block); if (r < 0) return r; flags = le32_to_cpu(ro_node(s)->header.flags); i = le32_to_cpu(ro_node(s)->header.nr_entries); if (!i) return -ENODATA; else i--; *result_key = le64_to_cpu(ro_node(s)->keys[i]); if (next_block || flags & INTERNAL_NODE) block = value64(ro_node(s), i); } while (flags & INTERNAL_NODE); if (next_block) *next_block = block; return 0; }
static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, int (*search_fn)(struct btree_node *, uint64_t), uint64_t *result_key, void *v, size_t value_size) { int i, r; uint32_t flags, nr_entries; do { r = ro_step(s, block); if (r < 0) return r; i = search_fn(ro_node(s), key); flags = le32_to_cpu(ro_node(s)->header.flags); nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries); if (i < 0 || i >= nr_entries) return -ENODATA; if (flags & INTERNAL_NODE) block = value64(ro_node(s), i); } while (!(flags & LEAF_NODE)); *result_key = le64_to_cpu(ro_node(s)->keys[i]); memcpy(v, value_ptr(ro_node(s), i), value_size); return 0; }
static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, struct btree_node *parent, unsigned index, struct child *result) { int r, inc; dm_block_t root; result->index = index; root = value64(parent, index); r = dm_tm_shadow_block(info->tm, root, &btree_node_validator, &result->block, &inc); if (r) return r; result->n = dm_block_data(result->block); if (inc) inc_children(info->tm, result->n, vt); *((__le64 *) value_ptr(parent, index)) = cpu_to_le64(dm_block_location(result->block)); return 0; }
static void prefetch_children(struct del_stack *s, struct frame *f) { unsigned i; struct dm_block_manager *bm = dm_tm_get_bm(s->tm); for (i = 0; i < f->nr_children; i++) dm_bm_prefetch(bm, value64(f->n, i)); }
void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { unsigned i; uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) for (i = 0; i < nr_entries; i++) dm_tm_inc(tm, value64(n, i)); else if (vt->inc) for (i = 0; i < nr_entries; i++) vt->inc(vt->context, value_ptr(n, i)); }
/* * Prepares for removal from one level of the hierarchy. The caller must * call delete_at() to remove the entry at index. */ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, struct dm_btree_value_type *vt, dm_block_t root, uint64_t key, unsigned *index) { int i = *index, r; struct btree_node *n; for (;;) { r = shadow_step(s, root, vt); if (r < 0) break; /* * We have to patch up the parent node, ugly, but I don't * see a way to do this automatically as part of the spine * op. */ if (shadow_has_parent(s)) { __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); memcpy(value_ptr(dm_block_data(shadow_parent(s)), i), &location, sizeof(__le64)); } n = dm_block_data(shadow_current(s)); if (le32_to_cpu(n->header.flags) & LEAF_NODE) return do_leaf(n, key, index); r = rebalance_children(s, info, vt, key); if (r) break; n = dm_block_data(shadow_current(s)); if (le32_to_cpu(n->header.flags) & LEAF_NODE) return do_leaf(n, key, index); i = lower_bound(n, key); /* * We know the key is present, or else * rebalance_children would have returned * -ENODATA */ root = value64(n, i); } return r; }
int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, dm_block_t *new_root) { unsigned level, last_level = info->levels - 1; int index = 0, r = 0; struct shadow_spine spine; struct btree_node *n; struct dm_btree_value_type le64_vt; init_le64_type(info->tm, &le64_vt); init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { r = remove_raw(&spine, info, (level == last_level ? &info->value_type : &le64_vt), root, keys[level], (unsigned *)&index); if (r < 0) break; n = dm_block_data(shadow_current(&spine)); if (level != last_level) { root = value64(n, index); continue; } BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries)); if (info->value_type.dec) info->value_type.dec(info->value_type.context, value_ptr(n, index)); delete_at(n, index); } *new_root = shadow_root(&spine); exit_shadow_spine(&spine); return r; }
static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, struct dm_btree_value_type *vt, uint64_t key, unsigned *index) { int r, i = *index, top = 1; struct btree_node *node; for (;;) { r = shadow_step(s, root, vt); if (r < 0) return r; node = dm_block_data(shadow_current(s)); /* * We have to patch up the parent node, ugly, but I don't * see a way to do this automatically as part of the spine * op. */ if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */ __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); __dm_bless_for_disk(&location); memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), &location, sizeof(__le64)); } node = dm_block_data(shadow_current(s)); if (node->header.nr_entries == node->header.max_entries) { if (top) r = btree_split_beneath(s, key); else r = btree_split_sibling(s, root, i, key); if (r < 0) return r; } node = dm_block_data(shadow_current(s)); i = lower_bound(node, key); if (le32_to_cpu(node->header.flags) & LEAF_NODE) break; if (i < 0) { /* change the bounds on the lowest key */ node->keys[0] = cpu_to_le64(key); i = 0; } root = value64(node, i); top = 0; } if (i < 0 || le64_to_cpu(node->keys[i]) != key) i++; *index = i; return 0; }
uint64 get64 () {fill(); uint64 n = value64(input); input+=8; return n;}