/* * Cursor down to child node or leaf, and update ->next. * < 0 - error * 0 - there is no further child (leaf was pushed) * 1 - there is child */ static int cursor_advance_down(struct cursor *cursor) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct buffer_head *buffer; block_t child; assert(cursor->level < btree->root.depth); child = be64_to_cpu(cursor->path[cursor->level].next->block); buffer = vol_bread(btree->sb, child); if (!buffer) return -EIO; /* FIXME: stupid, it might have been NOMEM */ cursor->path[cursor->level].next++; if (cursor->level < btree->root.depth - 1) { struct bnode *node = bufdata(buffer); assert(bnode_sniff(node)); cursor_push(cursor, buffer, node->entries); cursor_check(cursor); return 1; } assert(btree->ops->leaf_sniff(btree, bufdata(buffer))); cursor_push(cursor, buffer, NULL); cursor_check(cursor); return 0; }
int advance(struct btree *btree, struct cursor *cursor) { int depth = btree->root.depth, level = depth; struct buffer_head *buffer; do { level_pop_brelse(cursor); if (!level) return 0; level--; } while (level_finished(cursor, level)); while (1) { buffer = sb_bread(vfs_sb(btree->sb), from_be_u64(cursor->path[level].next->block)); if (!buffer) goto eek; cursor->path[level].next++; if (level + 1 == depth) break; level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); level++; } level_push(cursor, buffer, NULL); cursor_check(cursor); return 1; eek: release_cursor(cursor); return -EIO; }
int probe(struct btree *btree, tuxkey_t key, struct cursor *cursor) { unsigned i, depth = btree->root.depth; struct buffer_head *buffer = sb_bread(vfs_sb(btree->sb), btree->root.block); if (!buffer) return -EIO; struct bnode *node = bufdata(buffer); for (i = 0; i < depth; i++) { struct index_entry *next = node->entries, *top = next + bcount(node); while (++next < top) /* binary search goes here */ if (from_be_u64(next->key) > key) break; trace("probe level %i, %ti of %i", i, next - node->entries, bcount(node)); level_push(cursor, buffer, next); if (!(buffer = sb_bread(vfs_sb(btree->sb), from_be_u64((next - 1)->block)))) goto eek; node = (struct bnode *)bufdata(buffer); } assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); level_push(cursor, buffer, NULL); cursor_check(cursor); return 0; eek: release_cursor(cursor); return -EIO; /* stupid, it might have been NOMEM */ }
int main(void) { log_init(); log_set_level(LOG_DEBUG, NULL); log_set_file("cursor_check.log"); cursor_check(); log_fini(); return EXIT_SUCCESS; }
/* * Recursively redirect non-dirty buffers on path to modify leaf. * * Redirect order is from root to leaf. Otherwise, blocks of path will * be allocated by reverse order. * * FIXME: We can allocate/copy blocks before change common ancestor * (before changing common ancestor, changes are not visible for * reader). With this, we may be able to reduce locking time. */ int cursor_redirect(struct cursor *cursor) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct sb *sb = btree->sb; int level; for (level = 0; level <= btree->root.depth; level++) { struct buffer_head *buffer, *clone; block_t parent, oldblock, newblock; struct index_entry *entry; int redirect, is_leaf = (level == btree->root.depth); buffer = cursor->path[level].buffer; /* If buffer needs to redirect to dirty, redirect it */ if (is_leaf) redirect = leaf_need_redirect(sb, buffer); else redirect = bnode_need_redirect(sb, buffer); /* No need to redirect */ if (!redirect) continue; /* Redirect buffer before changing */ clone = new_block(btree); if (IS_ERR(clone)) return PTR_ERR(clone); oldblock = bufindex(buffer); newblock = bufindex(clone); trace("redirect %Lx to %Lx", oldblock, newblock); level_redirect_blockput(cursor, level, clone); if (is_leaf) { /* This is leaf buffer */ mark_buffer_dirty_atomic(clone); log_leaf_redirect(sb, oldblock, newblock); defer_bfree(&sb->defree, oldblock, 1); } else { /* This is bnode buffer */ mark_buffer_unify_atomic(clone); log_bnode_redirect(sb, oldblock, newblock); defer_bfree(&sb->deunify, oldblock, 1); } trace("update parent"); if (!level) { /* Update pointer in btree->root */ trace("redirect root"); assert(oldblock == btree->root.block); btree->root.block = newblock; tux3_mark_btree_dirty(btree); continue; } /* Update entry on parent for the redirected block */ parent = bufindex(cursor->path[level - 1].buffer); entry = cursor->path[level - 1].next - 1; entry->block = cpu_to_be64(newblock); log_bnode_update(sb, parent, newblock, be64_to_cpu(entry->key)); } cursor_check(cursor); return 0; }
/* * Insert new leaf to next cursor position. * keep == 1: keep current cursor position. * keep == 0, set cursor position to new leaf. */ static int insert_leaf(struct cursor *cursor, tuxkey_t childkey, struct buffer_head *leafbuf, int keep) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct sb *sb = btree->sb; int level = btree->root.depth; block_t childblock = bufindex(leafbuf); if (keep) blockput(leafbuf); else { cursor_pop_blockput(cursor); cursor_push(cursor, leafbuf, NULL); } while (level--) { struct path_level *at = &cursor->path[level]; struct buffer_head *parentbuf = at->buffer; struct bnode *parent = bufdata(parentbuf); /* insert and exit if not full */ if (bcount(parent) < btree->sb->entries_per_node) { bnode_add_index(parent, at->next, childblock, childkey); if (!keep) at->next++; log_bnode_add(sb, bufindex(parentbuf), childblock, childkey); mark_buffer_unify_non(parentbuf); cursor_check(cursor); return 0; } /* split a full index node */ struct buffer_head *newbuf = new_node(btree); if (IS_ERR(newbuf)) return PTR_ERR(newbuf); struct bnode *newnode = bufdata(newbuf); unsigned half = bcount(parent) / 2; u64 newkey = be64_to_cpu(parent->entries[half].key); bnode_split(parent, half, newnode); log_bnode_split(sb, bufindex(parentbuf), half, bufindex(newbuf)); /* if the cursor is in the new node, use that as the parent */ int child_is_left = at->next <= parent->entries + half; if (!child_is_left) { struct index_entry *newnext; mark_buffer_unify_non(parentbuf); newnext = newnode->entries + (at->next - &parent->entries[half]); get_bh(newbuf); level_replace_blockput(cursor, level, newbuf, newnext); parentbuf = newbuf; parent = newnode; } else mark_buffer_unify_non(newbuf); bnode_add_index(parent, at->next, childblock, childkey); if (!keep) at->next++; log_bnode_add(sb, bufindex(parentbuf), childblock, childkey); mark_buffer_unify_non(parentbuf); childkey = newkey; childblock = bufindex(newbuf); blockput(newbuf); /* * If child is in left bnode, we should keep the * cursor position to child, otherwise adjust cursor * to new bnode. */ keep = child_is_left; } /* Make new root bnode */ trace("add tree level"); struct buffer_head *newbuf = new_node(btree); if (IS_ERR(newbuf)) return PTR_ERR(newbuf); struct bnode *newroot = bufdata(newbuf); block_t newrootblock = bufindex(newbuf); block_t oldrootblock = btree->root.block; int left_node = bufindex(cursor->path[0].buffer) != childblock; bnode_init_root(newroot, 2, oldrootblock, childblock, childkey); cursor_root_add(cursor, newbuf, newroot->entries + 1 + !left_node); log_bnode_root(sb, newrootblock, 2, oldrootblock, childblock, childkey); /* Change btree to point the new root */ btree->root.block = newrootblock; btree->root.depth++; mark_buffer_unify_non(newbuf); tux3_mark_btree_dirty(btree); cursor_check(cursor); return 0; }
int cursor_redirect(struct cursor *cursor) { struct btree *btree = cursor->btree; unsigned level = btree->root.depth; struct sb *sb = btree->sb; block_t uninitialized_var(child); while (1) { struct buffer_head *buffer; block_t uninitialized_var(oldblock); block_t uninitialized_var(newblock); int redirect, is_leaf = (level == btree->root.depth); buffer = cursor->path[level].buffer; /* If buffer needs to redirect to dirty, redirect it */ if (is_leaf) redirect = leaf_need_redirect(sb, buffer); else redirect = bnode_need_redirect(sb, buffer); if (redirect) { /* Redirect buffer before changing */ struct buffer_head *clone = new_block(btree); if (IS_ERR(clone)) return PTR_ERR(clone); oldblock = bufindex(buffer); newblock = bufindex(clone); trace("redirect %Lx to %Lx", oldblock, newblock); level_redirect_blockput(cursor, level, clone); if (is_leaf) { /* This is leaf buffer */ mark_buffer_dirty_atomic(clone); log_leaf_redirect(sb, oldblock, newblock); defer_bfree(&sb->defree, oldblock, 1); goto parent_level; } /* This is bnode buffer */ mark_buffer_rollup_atomic(clone); log_bnode_redirect(sb, oldblock, newblock); defer_bfree(&sb->derollup, oldblock, 1); } else { if (is_leaf) { /* This is leaf buffer */ goto parent_level; } } /* Update entry for the redirected child block */ trace("update parent"); block_t block = bufindex(cursor->path[level].buffer); struct index_entry *entry = cursor->path[level].next - 1; entry->block = cpu_to_be64(child); log_bnode_update(sb, block, child, be64_to_cpu(entry->key)); parent_level: /* If it is already redirected, ancestor is also redirected */ if (!redirect) { cursor_check(cursor); return 0; } if (!level--) { trace("redirect root"); assert(oldblock == btree->root.block); btree->root.block = newblock; tux3_mark_btree_dirty(btree); cursor_check(cursor); return 0; } child = newblock; } }