int btree_probe(struct cursor *cursor, tuxkey_t key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } int ret; ret = cursor_read_root(cursor); if (ret < 0) return ret; do { cursor_bnode_lookup(cursor, key); ret = cursor_advance_down(cursor); if (ret < 0) goto error; } while (ret); return 0; error: release_cursor(cursor); return ret; }
int advance(struct btree *btree, struct cursor *cursor) { int depth = btree->root.depth, level = depth; struct buffer_head *buffer; do { level_pop_brelse(cursor); if (!level) return 0; level--; } while (level_finished(cursor, level)); while (1) { buffer = sb_bread(vfs_sb(btree->sb), from_be_u64(cursor->path[level].next->block)); if (!buffer) goto eek; cursor->path[level].next++; if (level + 1 == depth) break; level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); level++; } level_push(cursor, buffer, NULL); cursor_check(cursor); return 1; eek: release_cursor(cursor); return -EIO; }
int probe(struct btree *btree, tuxkey_t key, struct cursor *cursor) { unsigned i, depth = btree->root.depth; struct buffer_head *buffer = sb_bread(vfs_sb(btree->sb), btree->root.block); if (!buffer) return -EIO; struct bnode *node = bufdata(buffer); for (i = 0; i < depth; i++) { struct index_entry *next = node->entries, *top = next + bcount(node); while (++next < top) /* binary search goes here */ if (from_be_u64(next->key) > key) break; trace("probe level %i, %ti of %i", i, next - node->entries, bcount(node)); level_push(cursor, buffer, next); if (!(buffer = sb_bread(vfs_sb(btree->sb), from_be_u64((next - 1)->block)))) goto eek; node = (struct bnode *)bufdata(buffer); } assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); level_push(cursor, buffer, NULL); cursor_check(cursor); return 0; eek: release_cursor(cursor); return -EIO; /* stupid, it might have been NOMEM */ }
int btree_probe(struct cursor *cursor, tuxkey_t key) { int ret; ret = cursor_read_root(cursor); if (ret < 0) return ret; do { cursor_bnode_lookup(cursor, key); ret = cursor_advance_down(cursor); if (ret < 0) goto error; } while (ret); return 0; error: release_cursor(cursor); return ret; }
/* * This is range deletion. So, instead of adjusting balance of the * space on sibling nodes for each change, this just removes the range * and merges from right to left even if it is not same parent. * * +--------------- (A, B, C)--------------------+ * | | | * +-- (AA, AB, AC) -+ +- (BA, BB, BC) -+ + (CA, CB, CC) + * | | | | | | | | | * (AAA,AAB)(ABA,ABB)(ACA,ACB) (BAA,BAB)(BBA)(BCA,BCB) (CAA)(CBA,CBB)(CCA) * * [less : A, AA, AAA, AAB, AB, ABA, ABB, AC, ACA, ACB, B, BA ... : greater] * * If we merged from cousin (or re-distributed), we may have to update * the index until common parent. (e.g. removed (ACB), then merged * from (BAA,BAB) to (ACA), we have to adjust B in root node to BB) * * See, adjust_parent_sep(). * * FIXME: no re-distribute. so, we don't guarantee above than 50% * space efficiency. And if range is end of key (truncate() case), we * don't need to merge, and adjust_parent_sep(). * * FIXME2: we may want to split chop work for each step. instead of * blocking for a long time. */ int btree_chop(struct btree *btree, tuxkey_t start, u64 len) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = btree->sb; struct btree_ops *ops = btree->ops; struct buffer_head **prev, *leafprev = NULL; struct chopped_index_info *cii; struct cursor *cursor; tuxkey_t limit; int ret, done = 0; if (!has_root(btree)) return 0; /* Chop all range if len >= TUXKEY_LIMIT */ limit = (len >= TUXKEY_LIMIT) ? TUXKEY_LIMIT : start + len; prev = malloc(sizeof(*prev) * btree->root.depth); if (prev == NULL) return -ENOMEM; memset(prev, 0, sizeof(*prev) * btree->root.depth); cii = malloc(sizeof(*cii) * btree->root.depth); if (cii == NULL) { ret = -ENOMEM; goto error_cii; } memset(cii, 0, sizeof(*cii) * btree->root.depth); cursor = alloc_cursor(btree, 0); if (!cursor) { ret = -ENOMEM; goto error_alloc_cursor; } down_write(&btree->lock); ret = btree_probe(cursor, start); if (ret) goto error_btree_probe; /* Walk leaves */ while (1) { struct buffer_head *leafbuf; tuxkey_t this_key; /* * FIXME: If leaf was merged and freed later, we don't * need to redirect leaf and leaf_chop() */ if ((ret = cursor_redirect(cursor))) goto out; leafbuf = cursor_pop(cursor); /* Adjust start and len for this leaf */ this_key = cursor_level_this_key(cursor); if (start < this_key) { if (limit < TUXKEY_LIMIT) len -= this_key - start; start = this_key; } ret = ops->leaf_chop(btree, start, len, bufdata(leafbuf)); if (ret) { if (ret < 0) { blockput(leafbuf); goto out; } mark_buffer_dirty_non(leafbuf); } /* Try to merge this leaf with prev */ if (leafprev) { if (try_leaf_merge(btree, leafprev, leafbuf)) { trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev); remove_index(cursor, cii); mark_buffer_dirty_non(leafprev); blockput_free(sb, leafbuf); goto keep_prev_leaf; } blockput(leafprev); } leafprev = leafbuf; keep_prev_leaf: if (cursor_level_next_key(cursor) >= limit) done = 1; /* Pop and try to merge finished nodes */ while (done || cursor_level_finished(cursor)) { struct buffer_head *buf; int level = cursor->level; struct chopped_index_info *ciil = &cii[level]; /* Get merge src buffer, and go parent level */ buf = cursor_pop(cursor); /* * Logging chopped indexes * FIXME: If node is freed later (e.g. merged), * we dont't need to log this */ if (ciil->count) { log_bnode_del(sb, bufindex(buf), ciil->start, ciil->count); } memset(ciil, 0, sizeof(*ciil)); /* Try to merge node with prev */ if (prev[level]) { assert(level); if (try_bnode_merge(sb, prev[level], buf)) { trace(">>> can merge node %p into node %p", buf, prev[level]); remove_index(cursor, cii); mark_buffer_unify_non(prev[level]); blockput_free_unify(sb, buf); goto keep_prev_node; } blockput(prev[level]); } prev[level] = buf; keep_prev_node: if (!level) goto chop_root; } /* Push back down to leaf level */ do { ret = cursor_advance_down(cursor); if (ret < 0) goto out; } while (ret); } chop_root: /* Remove depth if possible */ while (btree->root.depth > 1 && bcount(bufdata(prev[0])) == 1) { trace("drop btree level"); btree->root.block = bufindex(prev[1]); btree->root.depth--; tux3_mark_btree_dirty(btree); /* * We know prev[0] is redirected and dirty. So, in * here, we can just cancel bnode_redirect by bfree(), * instead of defered_bfree() * FIXME: we can optimize freeing bnode without * bnode_redirect, and if we did, this is not true. */ bfree(sb, bufindex(prev[0]), 1); log_bnode_free(sb, bufindex(prev[0])); blockput_free_unify(sb, prev[0]); vecmove(prev, prev + 1, btree->root.depth); } ret = 0; out: if (leafprev) blockput(leafprev); for (int i = 0; i < btree->root.depth; i++) { if (prev[i]) blockput(prev[i]); } release_cursor(cursor); error_btree_probe: up_write(&btree->lock); free_cursor(cursor); error_alloc_cursor: free(cii); error_cii: free(prev); return ret; }