/* * Recursively redirect non-dirty buffers on path to modify leaf. * * Redirect order is from root to leaf. Otherwise, blocks of path will * be allocated by reverse order. * * FIXME: We can allocate/copy blocks before change common ancestor * (before changing common ancestor, changes are not visible for * reader). With this, we may be able to reduce locking time. */ int cursor_redirect(struct cursor *cursor) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct sb *sb = btree->sb; int level; for (level = 0; level <= btree->root.depth; level++) { struct buffer_head *buffer, *clone; block_t parent, oldblock, newblock; struct index_entry *entry; int redirect, is_leaf = (level == btree->root.depth); buffer = cursor->path[level].buffer; /* If buffer needs to redirect to dirty, redirect it */ if (is_leaf) redirect = leaf_need_redirect(sb, buffer); else redirect = bnode_need_redirect(sb, buffer); /* No need to redirect */ if (!redirect) continue; /* Redirect buffer before changing */ clone = new_block(btree); if (IS_ERR(clone)) return PTR_ERR(clone); oldblock = bufindex(buffer); newblock = bufindex(clone); trace("redirect %Lx to %Lx", oldblock, newblock); level_redirect_blockput(cursor, level, clone); if (is_leaf) { /* This is leaf buffer */ mark_buffer_dirty_atomic(clone); log_leaf_redirect(sb, oldblock, newblock); defer_bfree(&sb->defree, oldblock, 1); } else { /* This is bnode buffer */ mark_buffer_unify_atomic(clone); log_bnode_redirect(sb, oldblock, newblock); defer_bfree(&sb->deunify, oldblock, 1); } trace("update parent"); if (!level) { /* Update pointer in btree->root */ trace("redirect root"); assert(oldblock == btree->root.block); btree->root.block = newblock; tux3_mark_btree_dirty(btree); continue; } /* Update entry on parent for the redirected block */ parent = bufindex(cursor->path[level - 1].buffer); entry = cursor->path[level - 1].next - 1; entry->block = cpu_to_be64(newblock); log_bnode_update(sb, parent, newblock, be64_to_cpu(entry->key)); } cursor_check(cursor); return 0; }
static int replay_log_stage2(struct replay *rp, struct buffer_head *logbuf) { if(DEBUG_MODE_K==1) { printk(KERN_INFO"%25s %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct logblock *log = bufdata(logbuf); block_t blocknr = rp->blocknrs[bufindex(logbuf)]; unsigned char *data = log->data; int err; /* * Log block address itself works as balloc log, and adjust * bitmap and deunify even if logblocks is before latest * unify, to prevent to be overwritten. (This must be after * LOG_FREEBLOCKS replay if there is it.) */ trace("LOG BLOCK: logblock %Lx", blocknr); err = replay_update_bitmap(rp, blocknr, 1, 1); if (err) return err; /* Mark log block as deunify block */ defer_bfree(&sb->deunify, blocknr, 1); /* If log is before latest unify, those were already applied to FS. */ if (bufindex(logbuf) < rp->unify_index) { // assert(0); /* older logs should already be freed */ return 0; } if (bufindex(logbuf) == rp->unify_index) data = rp->unify_pos; while (data < log->data + be16_to_cpu(log->bytes)) { u8 code = *data++; switch (code) { case LOG_BALLOC: case LOG_BFREE: case LOG_BFREE_ON_UNIFY: case LOG_BFREE_RELOG: { u64 block; u32 count; data = decode32(data, &count); data = decode48(data, &block); trace("%s: count %u, block %Lx", log_name[code], count, block); err = 0; if (code == LOG_BALLOC) err = replay_update_bitmap(rp, block, count, 1); else if (code == LOG_BFREE_ON_UNIFY) defer_bfree(&sb->deunify, block, count); else err = replay_update_bitmap(rp, block, count, 0); if (err) return err; break; } case LOG_LEAF_REDIRECT: case LOG_BNODE_REDIRECT: { u64 oldblock, newblock; data = decode48(data, &oldblock); data = decode48(data, &newblock); trace("%s: oldblock %Lx, newblock %Lx", log_name[code], oldblock, newblock); err = replay_update_bitmap(rp, newblock, 1, 1); if (err) return err; if (code == LOG_LEAF_REDIRECT) { err = replay_update_bitmap(rp, oldblock, 1, 0); if (err) return err; } else { /* newblock is not flushing yet */ defer_bfree(&sb->deunify, oldblock, 1); } break; } case LOG_LEAF_FREE: case LOG_BNODE_FREE: { u64 block; data = decode48(data, &block); trace("%s: block %Lx", log_name[code], block); err = replay_update_bitmap(rp, block, 1, 0); if (err) return err; if (code == LOG_BNODE_FREE) { struct buffer_head *buffer = vol_find_get_block(sb, block); blockput_free_unify(sb, buffer); } break; } case LOG_BNODE_ROOT: { u64 root, left, right, rkey; u8 count; count = *data++; data = decode48(data, &root); data = decode48(data, &left); data = decode48(data, &right); data = decode48(data, &rkey); trace("%s: count %u, root block %Lx, left %Lx, right %Lx, rkey %Lx", log_name[code], count, root, left, right, rkey); err = replay_update_bitmap(rp, root, 1, 1); if (err) return err; break; } case LOG_BNODE_SPLIT: { unsigned pos; u64 src, dst; data = decode16(data, &pos); data = decode48(data, &src); data = decode48(data, &dst); trace("%s: pos %x, src %Lx, dst %Lx", log_name[code], pos, src, dst); err = replay_update_bitmap(rp, dst, 1, 1); if (err) return err; break; } case LOG_BNODE_MERGE: { u64 src, dst; data = decode48(data, &src); data = decode48(data, &dst); trace("%s: src 0x%Lx, dst 0x%Lx", log_name[code], src, dst); err = replay_update_bitmap(rp, src, 1, 0); if (err) return err; blockput_free_unify(sb, vol_find_get_block(sb, src)); break; } case LOG_ORPHAN_ADD: case LOG_ORPHAN_DEL: { unsigned version; u64 inum; data = decode16(data, &version); data = decode48(data, &inum); trace("%s: version 0x%x, inum 0x%Lx", log_name[code], version, inum); if (code == LOG_ORPHAN_ADD) err = replay_orphan_add(rp, version, inum); else err = replay_orphan_del(rp, version, inum); if (err) return err; break; } case LOG_FREEBLOCKS: case LOG_BNODE_ADD: case LOG_BNODE_UPDATE: case LOG_BNODE_DEL: case LOG_BNODE_ADJUST: case LOG_UNIFY: case LOG_DELTA: data += log_size[code] - sizeof(code); break; default: tux3_err(sb, "unrecognized log code 0x%x", code); return -EINVAL; } } return 0; }
int alloc_empty_btree(struct btree *btree) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = btree->sb; struct buffer_head *rootbuf = new_node(btree); if (IS_ERR(rootbuf)) goto error; struct buffer_head *leafbuf = new_leaf(btree); if (IS_ERR(leafbuf)) goto error_leafbuf; assert(!has_root(btree)); struct bnode *rootnode = bufdata(rootbuf); block_t rootblock = bufindex(rootbuf); block_t leafblock = bufindex(leafbuf); trace("root at %Lx", rootblock); trace("leaf at %Lx", leafblock); bnode_init_root(rootnode, 1, leafblock, 0, 0); log_bnode_root(sb, rootblock, 1, leafblock, 0, 0); log_balloc(sb, leafblock, 1); mark_buffer_unify_non(rootbuf); blockput(rootbuf); mark_buffer_dirty_non(leafbuf); blockput(leafbuf); btree->root = (struct root){ .block = rootblock, .depth = 1 }; tux3_mark_btree_dirty(btree); return 0; error_leafbuf: (btree->ops->bfree)(sb, bufindex(rootbuf), 1); blockput(rootbuf); rootbuf = leafbuf; error: return PTR_ERR(rootbuf); } /* FIXME: right? and this should be done by btree_chop()? */ int free_empty_btree(struct btree *btree) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree_ops *ops = btree->ops; if (!has_root(btree)) return 0; assert(btree->root.depth == 1); struct sb *sb = btree->sb; struct buffer_head *rootbuf = vol_bread(sb, btree->root.block); if (!rootbuf) return -EIO; assert(bnode_sniff(bufdata(rootbuf))); /* Make btree has no root */ btree->root = no_root; tux3_mark_btree_dirty(btree); struct bnode *rootnode = bufdata(rootbuf); assert(bcount(rootnode) == 1); block_t leaf = be64_to_cpu(rootnode->entries[0].block); struct buffer_head *leafbuf = vol_find_get_block(sb, leaf); if (leafbuf && !leaf_need_redirect(sb, leafbuf)) { /* * This is redirected leaf. So, in here, we can just * cancel leaf_redirect by bfree(), instead of * defered_bfree(). */ bfree(sb, leaf, 1); log_leaf_free(sb, leaf); assert(ops->leaf_can_free(btree, bufdata(leafbuf))); blockput_free(sb, leafbuf); } else { defer_bfree(&sb->defree, leaf, 1); log_bfree(sb, leaf, 1); if (leafbuf) { assert(ops->leaf_can_free(btree, bufdata(leafbuf))); blockput(leafbuf); } } if (!bnode_need_redirect(sb, rootbuf)) { /* * This is redirected bnode. So, in here, we can just * cancel bnode_redirect by bfree(), instead of * defered_bfree(). */ bfree(sb, bufindex(rootbuf), 1); log_bnode_free(sb, bufindex(rootbuf)); blockput_free_unify(sb, rootbuf); } else { defer_bfree(&sb->deunify, bufindex(rootbuf), 1); log_bfree_on_unify(sb, bufindex(rootbuf), 1); blockput(rootbuf); } return 0; } int replay_bnode_redirect(struct replay *rp, block_t oldblock, block_t newblock) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *newbuf, *oldbuf; int err = 0; newbuf = vol_getblk(sb, newblock); if (!newbuf) { err = -ENOMEM; /* FIXME: error code */ goto error; } oldbuf = vol_bread(sb, oldblock); if (!oldbuf) { err = -EIO; /* FIXME: error code */ goto error_put_newbuf; } assert(bnode_sniff(bufdata(oldbuf))); memcpy(bufdata(newbuf), bufdata(oldbuf), bufsize(newbuf)); mark_buffer_unify_atomic(newbuf); blockput(oldbuf); error_put_newbuf: blockput(newbuf); error: return err; } int replay_bnode_root(struct replay *rp, block_t root, unsigned count, block_t left, block_t right, tuxkey_t rkey) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *rootbuf; rootbuf = vol_getblk(sb, root); if (!rootbuf) return -ENOMEM; bnode_buffer_init(rootbuf); bnode_init_root(bufdata(rootbuf), count, left, right, rkey); mark_buffer_unify_atomic(rootbuf); blockput(rootbuf); return 0; } /* * Before this replay, replay should already dirty the buffer of src. * (e.g. by redirect) */ int replay_bnode_split(struct replay *rp, block_t src, unsigned pos, block_t dst) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *srcbuf, *dstbuf; int err = 0; srcbuf = vol_getblk(sb, src); if (!srcbuf) { err = -ENOMEM; /* FIXME: error code */ goto error; } dstbuf = vol_getblk(sb, dst); if (!dstbuf) { err = -ENOMEM; /* FIXME: error code */ goto error_put_srcbuf; } bnode_buffer_init(dstbuf); bnode_split(bufdata(srcbuf), pos, bufdata(dstbuf)); mark_buffer_unify_non(srcbuf); mark_buffer_unify_atomic(dstbuf); blockput(dstbuf); error_put_srcbuf: blockput(srcbuf); error: return err; } /* * Before this replay, replay should already dirty the buffer of bnodeblock. * (e.g. by redirect) */ static int replay_bnode_change(struct sb *sb, block_t bnodeblock, u64 val1, u64 val2, void (*change)(struct bnode *, u64, u64)) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct buffer_head *bnodebuf; bnodebuf = vol_getblk(sb, bnodeblock); if (!bnodebuf) return -ENOMEM; /* FIXME: error code */ struct bnode *bnode = bufdata(bnodebuf); change(bnode, val1, val2); mark_buffer_unify_non(bnodebuf); blockput(bnodebuf); return 0; } static void add_func(struct bnode *bnode, u64 child, u64 key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, key) + 1; bnode_add_index(bnode, entry, child, key); } int replay_bnode_add(struct replay *rp, block_t parent, block_t child, tuxkey_t key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, parent, child, key, add_func); } static void update_func(struct bnode *bnode, u64 child, u64 key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, key); assert(be64_to_cpu(entry->key) == key); entry->block = cpu_to_be64(child); } int replay_bnode_update(struct replay *rp, block_t parent, block_t child, tuxkey_t key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, parent, child, key, update_func); } int replay_bnode_merge(struct replay *rp, block_t src, block_t dst) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *srcbuf, *dstbuf; int err = 0, ret; srcbuf = vol_getblk(sb, src); if (!srcbuf) { err = -ENOMEM; /* FIXME: error code */ goto error; } dstbuf = vol_getblk(sb, dst); if (!dstbuf) { err = -ENOMEM; /* FIXME: error code */ goto error_put_srcbuf; } ret = bnode_merge_nodes(sb, bufdata(dstbuf), bufdata(srcbuf)); assert(ret == 1); mark_buffer_unify_non(dstbuf); mark_buffer_unify_non(srcbuf); blockput(dstbuf); error_put_srcbuf: blockput(srcbuf); error: return err; } static void del_func(struct bnode *bnode, u64 key, u64 count) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, key); assert(be64_to_cpu(entry->key) == key); bnode_remove_index(bnode, entry, count); } int replay_bnode_del(struct replay *rp, block_t bnode, tuxkey_t key, unsigned count) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, bnode, key, count, del_func); } static void adjust_func(struct bnode *bnode, u64 from, u64 to) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, from); assert(be64_to_cpu(entry->key) == from); entry->key = cpu_to_be64(to); } int replay_bnode_adjust(struct replay *rp, block_t bnode, tuxkey_t from, tuxkey_t to) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, bnode, from, to, adjust_func); }
int cursor_redirect(struct cursor *cursor) { struct btree *btree = cursor->btree; unsigned level = btree->root.depth; struct sb *sb = btree->sb; block_t uninitialized_var(child); while (1) { struct buffer_head *buffer; block_t uninitialized_var(oldblock); block_t uninitialized_var(newblock); int redirect, is_leaf = (level == btree->root.depth); buffer = cursor->path[level].buffer; /* If buffer needs to redirect to dirty, redirect it */ if (is_leaf) redirect = leaf_need_redirect(sb, buffer); else redirect = bnode_need_redirect(sb, buffer); if (redirect) { /* Redirect buffer before changing */ struct buffer_head *clone = new_block(btree); if (IS_ERR(clone)) return PTR_ERR(clone); oldblock = bufindex(buffer); newblock = bufindex(clone); trace("redirect %Lx to %Lx", oldblock, newblock); level_redirect_blockput(cursor, level, clone); if (is_leaf) { /* This is leaf buffer */ mark_buffer_dirty_atomic(clone); log_leaf_redirect(sb, oldblock, newblock); defer_bfree(&sb->defree, oldblock, 1); goto parent_level; } /* This is bnode buffer */ mark_buffer_rollup_atomic(clone); log_bnode_redirect(sb, oldblock, newblock); defer_bfree(&sb->derollup, oldblock, 1); } else { if (is_leaf) { /* This is leaf buffer */ goto parent_level; } } /* Update entry for the redirected child block */ trace("update parent"); block_t block = bufindex(cursor->path[level].buffer); struct index_entry *entry = cursor->path[level].next - 1; entry->block = cpu_to_be64(child); log_bnode_update(sb, block, child, be64_to_cpu(entry->key)); parent_level: /* If it is already redirected, ancestor is also redirected */ if (!redirect) { cursor_check(cursor); return 0; } if (!level--) { trace("redirect root"); assert(oldblock == btree->root.block); btree->root.block = newblock; tux3_mark_btree_dirty(btree); cursor_check(cursor); return 0; } child = newblock; } }