static int try_bnode_merge(struct sb *sb, struct buffer_head *intobuf, struct buffer_head *frombuf) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct bnode *into = bufdata(intobuf); struct bnode *from = bufdata(frombuf); /* Try to merge nodes */ if (bnode_merge_nodes(sb, into, from)) { /* * We know frombuf is redirected and dirty. So, in * here, we can just cancel bnode_redirect by bfree(), * instead of defered_bfree() * FIXME: we can optimize freeing bnode without * bnode_redirect, and if we did, this is not true. */ bfree(sb, bufindex(frombuf), 1); log_bnode_merge(sb, bufindex(frombuf), bufindex(intobuf)); return 1; } return 0; }
static int try_leaf_merge(struct btree *btree, struct buffer_head *intobuf, struct buffer_head *frombuf) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct vleaf *from = bufdata(frombuf); struct vleaf *into = bufdata(intobuf); /* Try to merge leaves */ if (btree->ops->leaf_merge(btree, into, from)) { struct sb *sb = btree->sb; /* * We know frombuf is redirected and dirty. So, in * here, we can just cancel leaf_redirect by bfree(), * instead of defered_bfree() * FIXME: we can optimize freeing leaf without * leaf_redirect, and if we did, this is not true. */ bfree(sb, bufindex(frombuf), 1); log_leaf_free(sb, bufindex(frombuf)); return 1; } return 0; }
/* Prepare log info for replay and pin logblocks. */ static struct replay *replay_prepare(struct sb *sb) { if(DEBUG_MODE_K==1) { printk(KERN_INFO"%25s %25s %4d #in\n",__FILE__,__func__,__LINE__); } block_t logchain = be64_to_cpu(sb->super.logchain); unsigned i, logcount = be32_to_cpu(sb->super.logcount); struct replay *rp; struct buffer_head *buffer; int err; /* FIXME: this address array is quick hack. Rethink about log * block management and log block address. */ rp = alloc_replay(sb, logcount); if (IS_ERR(rp)) return rp; /* FIXME: maybe, we should use bufvec to read log blocks */ trace("load %u logblocks", logcount); i = logcount; while (i-- > 0) { struct logblock *log; buffer = blockget(mapping(sb->logmap), i); if (!buffer) { i++; err = -ENOMEM; goto error; } assert(bufindex(buffer) == i); err = blockio(READ, sb, buffer, logchain); if (err) goto error; err = replay_check_log(rp, buffer); if (err) goto error; /* Store index => blocknr map */ rp->blocknrs[bufindex(buffer)] = logchain; log = bufdata(buffer); logchain = be64_to_cpu(log->logchain); } return rp; error: free_replay(rp); replay_unpin_logblocks(sb, i, logcount); return ERR_PTR(err); }
static void brelse_free(struct btree *btree, struct buffer_head *buffer) { struct sb *sb = btree->sb; block_t block = bufindex(buffer); if (bufcount(buffer) != 1) { warn("free block %Lx still in use!", (L)bufindex(buffer)); brelse(buffer); assert(bufcount(buffer) == 0); return; } brelse(buffer); (btree->ops->bfree)(sb, block, 1); set_buffer_empty(buffer); // free it!!! (and need a buffer free state) }
/* unused */ void show_cursor(struct cursor *cursor, int depth) { printf(">>> cursor %p/%i:", cursor, depth); for (int i = 0; i < depth; i++) printf(" [%Lx/%i]", (L)bufindex(cursor->path[i].buffer), bufcount(cursor->path[i].buffer)); printf("\n"); }
/* * Split leaf, then insert to parent. * key: key to add after split (cursor will point leaf which is including key) * hint: hint for split * * return value: * 0 - success * < 0 - error */ static int btree_leaf_split(struct cursor *cursor, tuxkey_t key, tuxkey_t hint) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } trace("split leaf"); struct btree *btree = cursor->btree; struct buffer_head *newbuf; newbuf = new_leaf(btree); if (IS_ERR(newbuf)) return PTR_ERR(newbuf); log_balloc(btree->sb, bufindex(newbuf), 1); struct buffer_head *leafbuf = cursor_leafbuf(cursor); tuxkey_t newkey = btree->ops->leaf_split(btree, hint, bufdata(leafbuf), bufdata(newbuf)); assert(cursor_this_key(cursor) < newkey); assert(newkey < cursor_next_key(cursor)); if (key < newkey) mark_buffer_dirty_non(newbuf); else mark_buffer_dirty_non(leafbuf); return insert_leaf(cursor, newkey, newbuf, key < newkey); }
static void cursor_check(struct cursor *cursor) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } if (cursor->level == -1) return; tuxkey_t key = 0; block_t block = cursor->btree->root.block; for (int i = 0; i <= cursor->level; i++) { assert(bufindex(cursor->path[i].buffer) == block); if (i == cursor->level) break; struct bnode *bnode = level_node(cursor, i); struct index_entry *entry = cursor->path[i].next - 1; assert(bnode->entries <= entry); assert(entry < bnode->entries + bcount(bnode)); /* * If this entry is most left, it should be same key * with parent. Otherwise, most left key may not be * correct as next key. */ if (bnode->entries == entry) assert(be64_to_cpu(entry->key) == key); else assert(be64_to_cpu(entry->key) > key); block = be64_to_cpu(entry->block); key = be64_to_cpu(entry->key); } }
static void adjust_parent_sep(struct cursor *cursor, int level, __be64 newsep) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } /* Update separating key until nearest common parent */ while (level >= 0) { struct path_level *parent_at = &cursor->path[level]; struct index_entry *parent = parent_at->next - 1; assert(0 < be64_to_cpu(parent->key)); assert(be64_to_cpu(parent->key) < be64_to_cpu(newsep)); log_bnode_adjust(cursor->btree->sb, bufindex(parent_at->buffer), be64_to_cpu(parent->key), be64_to_cpu(newsep)); parent->key = newsep; mark_buffer_unify_non(parent_at->buffer); if (parent != level_node(cursor, level)->entries) break; level--; } }
/* unused */ void show_cursor(struct cursor *cursor, int depth) { __tux3_dbg(">>> cursor %p/%i:", cursor, depth); for (int i = 0; i < depth; i++) { __tux3_dbg(" [%Lx/%i]", bufindex(cursor->path[i].buffer), bufcount(cursor->path[i].buffer)); } __tux3_dbg("\n"); }
static int try_bnode_merge(struct sb *sb, struct buffer_head *intobuf, struct buffer_head *frombuf) { struct bnode *into = bufdata(intobuf); struct bnode *from = bufdata(frombuf); /* Try to merge nodes */ if (bnode_merge_nodes(sb, into, from)) { /* * We know frombuf is redirected and dirty. So, in * here, we can just cancel bnode_redirect by bfree(), * instead of defered_bfree() * FIXME: we can optimize freeing bnode without * bnode_redirect, and if we did, this is not true. */ bfree(sb, bufindex(frombuf), 1); log_bnode_merge(sb, bufindex(frombuf), bufindex(intobuf)); return 1; } return 0; }
/* unused */ void show_cursor(struct cursor *cursor, int depth) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } __tux3_dbg(">>> cursor %p/%i:", cursor, depth); for (int i = 0; i < depth; i++) { __tux3_dbg(" [%Lx/%i]", bufindex(cursor->path[i].buffer), bufcount(cursor->path[i].buffer)); } __tux3_dbg("\n"); }
static void cursor_check(struct cursor *cursor) { if (cursor->len == 0) return; tuxkey_t key = 0; block_t block = cursor->btree->root.block; for (int i = 0; i < cursor->len; i++) { assert(bufindex(cursor->path[i].buffer) == block); if (!cursor->path[i].next) break; struct bnode *node = cursor_node(cursor, i); assert(node->entries < cursor->path[i].next); assert(cursor->path[i].next <= node->entries + bcount(node)); assert(from_be_u64((cursor->path[i].next - 1)->key) >= key); block = from_be_u64((cursor->path[i].next - 1)->block); key = from_be_u64((cursor->path[i].next - 1)->key); } }
int tux_delete_entry(struct inode *dir, struct buffer_head *buffer, tux_dirent *entry) { unsigned delta = tux3_get_current_delta(); tux_dirent *prev = NULL, *this = bufdata(buffer); struct buffer_head *clone; void *olddata; while ((char *)this < (char *)entry) { if (this->rec_len == 0) { blockput(buffer); tux_zero_len_error(dir, bufindex(buffer)); return -EIO; } prev = this; this = next_entry(this); } /* * The directory is protected by i_mutex. * blockdirty() should never return -EAGAIN. */ olddata = bufdata(buffer); clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } entry = ptr_redirect(entry, olddata, bufdata(clone)); prev = ptr_redirect(prev, olddata, bufdata(clone)); if (prev) prev->rec_len = tux_rec_len_to_disk((void *)next_entry(entry) - (void *)prev); memset(entry->name, 0, entry->name_len); entry->name_len = entry->type = 0; entry->inum = 0; mark_buffer_dirty_non(clone); blockput(clone); return 0; }
static int replay_check_log(struct replay *rp, struct buffer_head *logbuf) { if(DEBUG_MODE_K==1) { printk(KERN_INFO"%25s %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct logblock *log = bufdata(logbuf); unsigned char *data = log->data; if (log->magic != cpu_to_be16(TUX3_MAGIC_LOG)) { tux3_err(sb, "bad log magic %x", be16_to_cpu(log->magic)); return -EINVAL; } if (be16_to_cpu(log->bytes) + sizeof(*log) > sb->blocksize) { tux3_err(sb, "log bytes is too big"); return -EINVAL; } while (data < log->data + be16_to_cpu(log->bytes)) { u8 code = *data; /* Find latest unify. */ if (code == LOG_UNIFY && rp->unify_index == -1) { rp->unify_pos = data; /* FIXME: index is unnecessary to use. We just * want to know whether before or after unify * mark. */ rp->unify_index = bufindex(logbuf); } if (log_size[code] == 0) { tux3_err(sb, "invalid log code: 0x%02x", code); return -EINVAL; } data += log_size[code]; } return 0; }
/* * Split leaf, then insert to parent. * key: key to add after split (cursor will point leaf which is including key) * hint: hint for split * * return value: * 0 - success * < 0 - error */ static int btree_leaf_split(struct cursor *cursor, tuxkey_t key, tuxkey_t hint) { trace("split leaf"); struct btree *btree = cursor->btree; struct buffer_head *newbuf; newbuf = new_leaf(btree); if (IS_ERR(newbuf)) return PTR_ERR(newbuf); log_balloc(btree->sb, bufindex(newbuf), 1); struct buffer_head *leafbuf = cursor_leafbuf(cursor); tuxkey_t newkey = btree->ops->leaf_split(btree, hint, bufdata(leafbuf), bufdata(newbuf)); assert(cursor_this_key(cursor) < newkey); assert(newkey < cursor_next_key(cursor)); if (key < newkey) mark_buffer_dirty_non(newbuf); else mark_buffer_dirty_non(leafbuf); return insert_leaf(cursor, newkey, newbuf, key < newkey); }
static void adjust_parent_sep(struct cursor *cursor, int level, __be64 newsep) { /* Update separating key until nearest common parent */ while (level >= 0) { struct path_level *parent_at = &cursor->path[level]; struct index_entry *parent = parent_at->next - 1; assert(0 < be64_to_cpu(parent->key)); assert(be64_to_cpu(parent->key) < be64_to_cpu(newsep)); log_bnode_adjust(cursor->btree->sb, bufindex(parent_at->buffer), be64_to_cpu(parent->key), be64_to_cpu(newsep)); parent->key = newsep; mark_buffer_rollup_non(parent_at->buffer); if (parent != level_node(cursor, level)->entries) break; level--; } }
/* * Recursively redirect non-dirty buffers on path to modify leaf. * * Redirect order is from root to leaf. Otherwise, blocks of path will * be allocated by reverse order. * * FIXME: We can allocate/copy blocks before change common ancestor * (before changing common ancestor, changes are not visible for * reader). With this, we may be able to reduce locking time. */ int cursor_redirect(struct cursor *cursor) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct sb *sb = btree->sb; int level; for (level = 0; level <= btree->root.depth; level++) { struct buffer_head *buffer, *clone; block_t parent, oldblock, newblock; struct index_entry *entry; int redirect, is_leaf = (level == btree->root.depth); buffer = cursor->path[level].buffer; /* If buffer needs to redirect to dirty, redirect it */ if (is_leaf) redirect = leaf_need_redirect(sb, buffer); else redirect = bnode_need_redirect(sb, buffer); /* No need to redirect */ if (!redirect) continue; /* Redirect buffer before changing */ clone = new_block(btree); if (IS_ERR(clone)) return PTR_ERR(clone); oldblock = bufindex(buffer); newblock = bufindex(clone); trace("redirect %Lx to %Lx", oldblock, newblock); level_redirect_blockput(cursor, level, clone); if (is_leaf) { /* This is leaf buffer */ mark_buffer_dirty_atomic(clone); log_leaf_redirect(sb, oldblock, newblock); defer_bfree(&sb->defree, oldblock, 1); } else { /* This is bnode buffer */ mark_buffer_unify_atomic(clone); log_bnode_redirect(sb, oldblock, newblock); defer_bfree(&sb->deunify, oldblock, 1); } trace("update parent"); if (!level) { /* Update pointer in btree->root */ trace("redirect root"); assert(oldblock == btree->root.block); btree->root.block = newblock; tux3_mark_btree_dirty(btree); continue; } /* Update entry on parent for the redirected block */ parent = bufindex(cursor->path[level - 1].buffer); entry = cursor->path[level - 1].next - 1; entry->block = cpu_to_be64(newblock); log_bnode_update(sb, parent, newblock, be64_to_cpu(entry->key)); } cursor_check(cursor); return 0; }
int alloc_empty_btree(struct btree *btree) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = btree->sb; struct buffer_head *rootbuf = new_node(btree); if (IS_ERR(rootbuf)) goto error; struct buffer_head *leafbuf = new_leaf(btree); if (IS_ERR(leafbuf)) goto error_leafbuf; assert(!has_root(btree)); struct bnode *rootnode = bufdata(rootbuf); block_t rootblock = bufindex(rootbuf); block_t leafblock = bufindex(leafbuf); trace("root at %Lx", rootblock); trace("leaf at %Lx", leafblock); bnode_init_root(rootnode, 1, leafblock, 0, 0); log_bnode_root(sb, rootblock, 1, leafblock, 0, 0); log_balloc(sb, leafblock, 1); mark_buffer_unify_non(rootbuf); blockput(rootbuf); mark_buffer_dirty_non(leafbuf); blockput(leafbuf); btree->root = (struct root){ .block = rootblock, .depth = 1 }; tux3_mark_btree_dirty(btree); return 0; error_leafbuf: (btree->ops->bfree)(sb, bufindex(rootbuf), 1); blockput(rootbuf); rootbuf = leafbuf; error: return PTR_ERR(rootbuf); } /* FIXME: right? and this should be done by btree_chop()? */ int free_empty_btree(struct btree *btree) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree_ops *ops = btree->ops; if (!has_root(btree)) return 0; assert(btree->root.depth == 1); struct sb *sb = btree->sb; struct buffer_head *rootbuf = vol_bread(sb, btree->root.block); if (!rootbuf) return -EIO; assert(bnode_sniff(bufdata(rootbuf))); /* Make btree has no root */ btree->root = no_root; tux3_mark_btree_dirty(btree); struct bnode *rootnode = bufdata(rootbuf); assert(bcount(rootnode) == 1); block_t leaf = be64_to_cpu(rootnode->entries[0].block); struct buffer_head *leafbuf = vol_find_get_block(sb, leaf); if (leafbuf && !leaf_need_redirect(sb, leafbuf)) { /* * This is redirected leaf. So, in here, we can just * cancel leaf_redirect by bfree(), instead of * defered_bfree(). */ bfree(sb, leaf, 1); log_leaf_free(sb, leaf); assert(ops->leaf_can_free(btree, bufdata(leafbuf))); blockput_free(sb, leafbuf); } else { defer_bfree(&sb->defree, leaf, 1); log_bfree(sb, leaf, 1); if (leafbuf) { assert(ops->leaf_can_free(btree, bufdata(leafbuf))); blockput(leafbuf); } } if (!bnode_need_redirect(sb, rootbuf)) { /* * This is redirected bnode. So, in here, we can just * cancel bnode_redirect by bfree(), instead of * defered_bfree(). */ bfree(sb, bufindex(rootbuf), 1); log_bnode_free(sb, bufindex(rootbuf)); blockput_free_unify(sb, rootbuf); } else { defer_bfree(&sb->deunify, bufindex(rootbuf), 1); log_bfree_on_unify(sb, bufindex(rootbuf), 1); blockput(rootbuf); } return 0; } int replay_bnode_redirect(struct replay *rp, block_t oldblock, block_t newblock) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *newbuf, *oldbuf; int err = 0; newbuf = vol_getblk(sb, newblock); if (!newbuf) { err = -ENOMEM; /* FIXME: error code */ goto error; } oldbuf = vol_bread(sb, oldblock); if (!oldbuf) { err = -EIO; /* FIXME: error code */ goto error_put_newbuf; } assert(bnode_sniff(bufdata(oldbuf))); memcpy(bufdata(newbuf), bufdata(oldbuf), bufsize(newbuf)); mark_buffer_unify_atomic(newbuf); blockput(oldbuf); error_put_newbuf: blockput(newbuf); error: return err; } int replay_bnode_root(struct replay *rp, block_t root, unsigned count, block_t left, block_t right, tuxkey_t rkey) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *rootbuf; rootbuf = vol_getblk(sb, root); if (!rootbuf) return -ENOMEM; bnode_buffer_init(rootbuf); bnode_init_root(bufdata(rootbuf), count, left, right, rkey); mark_buffer_unify_atomic(rootbuf); blockput(rootbuf); return 0; } /* * Before this replay, replay should already dirty the buffer of src. * (e.g. by redirect) */ int replay_bnode_split(struct replay *rp, block_t src, unsigned pos, block_t dst) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *srcbuf, *dstbuf; int err = 0; srcbuf = vol_getblk(sb, src); if (!srcbuf) { err = -ENOMEM; /* FIXME: error code */ goto error; } dstbuf = vol_getblk(sb, dst); if (!dstbuf) { err = -ENOMEM; /* FIXME: error code */ goto error_put_srcbuf; } bnode_buffer_init(dstbuf); bnode_split(bufdata(srcbuf), pos, bufdata(dstbuf)); mark_buffer_unify_non(srcbuf); mark_buffer_unify_atomic(dstbuf); blockput(dstbuf); error_put_srcbuf: blockput(srcbuf); error: return err; } /* * Before this replay, replay should already dirty the buffer of bnodeblock. * (e.g. by redirect) */ static int replay_bnode_change(struct sb *sb, block_t bnodeblock, u64 val1, u64 val2, void (*change)(struct bnode *, u64, u64)) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct buffer_head *bnodebuf; bnodebuf = vol_getblk(sb, bnodeblock); if (!bnodebuf) return -ENOMEM; /* FIXME: error code */ struct bnode *bnode = bufdata(bnodebuf); change(bnode, val1, val2); mark_buffer_unify_non(bnodebuf); blockput(bnodebuf); return 0; } static void add_func(struct bnode *bnode, u64 child, u64 key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, key) + 1; bnode_add_index(bnode, entry, child, key); } int replay_bnode_add(struct replay *rp, block_t parent, block_t child, tuxkey_t key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, parent, child, key, add_func); } static void update_func(struct bnode *bnode, u64 child, u64 key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, key); assert(be64_to_cpu(entry->key) == key); entry->block = cpu_to_be64(child); } int replay_bnode_update(struct replay *rp, block_t parent, block_t child, tuxkey_t key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, parent, child, key, update_func); } int replay_bnode_merge(struct replay *rp, block_t src, block_t dst) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct buffer_head *srcbuf, *dstbuf; int err = 0, ret; srcbuf = vol_getblk(sb, src); if (!srcbuf) { err = -ENOMEM; /* FIXME: error code */ goto error; } dstbuf = vol_getblk(sb, dst); if (!dstbuf) { err = -ENOMEM; /* FIXME: error code */ goto error_put_srcbuf; } ret = bnode_merge_nodes(sb, bufdata(dstbuf), bufdata(srcbuf)); assert(ret == 1); mark_buffer_unify_non(dstbuf); mark_buffer_unify_non(srcbuf); blockput(dstbuf); error_put_srcbuf: blockput(srcbuf); error: return err; } static void del_func(struct bnode *bnode, u64 key, u64 count) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, key); assert(be64_to_cpu(entry->key) == key); bnode_remove_index(bnode, entry, count); } int replay_bnode_del(struct replay *rp, block_t bnode, tuxkey_t key, unsigned count) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, bnode, key, count, del_func); } static void adjust_func(struct bnode *bnode, u64 from, u64 to) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct index_entry *entry = bnode_lookup(bnode, from); assert(be64_to_cpu(entry->key) == from); entry->key = cpu_to_be64(to); } int replay_bnode_adjust(struct replay *rp, block_t bnode, tuxkey_t from, tuxkey_t to) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return replay_bnode_change(rp->sb, bnode, from, to, adjust_func); }
/* * Insert new leaf to next cursor position. * keep == 1: keep current cursor position. * keep == 0, set cursor position to new leaf. */ static int insert_leaf(struct cursor *cursor, tuxkey_t childkey, struct buffer_head *leafbuf, int keep) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct sb *sb = btree->sb; int level = btree->root.depth; block_t childblock = bufindex(leafbuf); if (keep) blockput(leafbuf); else { cursor_pop_blockput(cursor); cursor_push(cursor, leafbuf, NULL); } while (level--) { struct path_level *at = &cursor->path[level]; struct buffer_head *parentbuf = at->buffer; struct bnode *parent = bufdata(parentbuf); /* insert and exit if not full */ if (bcount(parent) < btree->sb->entries_per_node) { bnode_add_index(parent, at->next, childblock, childkey); if (!keep) at->next++; log_bnode_add(sb, bufindex(parentbuf), childblock, childkey); mark_buffer_unify_non(parentbuf); cursor_check(cursor); return 0; } /* split a full index node */ struct buffer_head *newbuf = new_node(btree); if (IS_ERR(newbuf)) return PTR_ERR(newbuf); struct bnode *newnode = bufdata(newbuf); unsigned half = bcount(parent) / 2; u64 newkey = be64_to_cpu(parent->entries[half].key); bnode_split(parent, half, newnode); log_bnode_split(sb, bufindex(parentbuf), half, bufindex(newbuf)); /* if the cursor is in the new node, use that as the parent */ int child_is_left = at->next <= parent->entries + half; if (!child_is_left) { struct index_entry *newnext; mark_buffer_unify_non(parentbuf); newnext = newnode->entries + (at->next - &parent->entries[half]); get_bh(newbuf); level_replace_blockput(cursor, level, newbuf, newnext); parentbuf = newbuf; parent = newnode; } else mark_buffer_unify_non(newbuf); bnode_add_index(parent, at->next, childblock, childkey); if (!keep) at->next++; log_bnode_add(sb, bufindex(parentbuf), childblock, childkey); mark_buffer_unify_non(parentbuf); childkey = newkey; childblock = bufindex(newbuf); blockput(newbuf); /* * If child is in left bnode, we should keep the * cursor position to child, otherwise adjust cursor * to new bnode. */ keep = child_is_left; } /* Make new root bnode */ trace("add tree level"); struct buffer_head *newbuf = new_node(btree); if (IS_ERR(newbuf)) return PTR_ERR(newbuf); struct bnode *newroot = bufdata(newbuf); block_t newrootblock = bufindex(newbuf); block_t oldrootblock = btree->root.block; int left_node = bufindex(cursor->path[0].buffer) != childblock; bnode_init_root(newroot, 2, oldrootblock, childblock, childkey); cursor_root_add(cursor, newbuf, newroot->entries + 1 + !left_node); log_bnode_root(sb, newrootblock, 2, oldrootblock, childblock, childkey); /* Change btree to point the new root */ btree->root.block = newrootblock; btree->root.depth++; mark_buffer_unify_non(newbuf); tux3_mark_btree_dirty(btree); cursor_check(cursor); return 0; }
int cursor_redirect(struct cursor *cursor) { struct btree *btree = cursor->btree; unsigned level = btree->root.depth; struct sb *sb = btree->sb; block_t uninitialized_var(child); while (1) { struct buffer_head *buffer; block_t uninitialized_var(oldblock); block_t uninitialized_var(newblock); int redirect, is_leaf = (level == btree->root.depth); buffer = cursor->path[level].buffer; /* If buffer needs to redirect to dirty, redirect it */ if (is_leaf) redirect = leaf_need_redirect(sb, buffer); else redirect = bnode_need_redirect(sb, buffer); if (redirect) { /* Redirect buffer before changing */ struct buffer_head *clone = new_block(btree); if (IS_ERR(clone)) return PTR_ERR(clone); oldblock = bufindex(buffer); newblock = bufindex(clone); trace("redirect %Lx to %Lx", oldblock, newblock); level_redirect_blockput(cursor, level, clone); if (is_leaf) { /* This is leaf buffer */ mark_buffer_dirty_atomic(clone); log_leaf_redirect(sb, oldblock, newblock); defer_bfree(&sb->defree, oldblock, 1); goto parent_level; } /* This is bnode buffer */ mark_buffer_rollup_atomic(clone); log_bnode_redirect(sb, oldblock, newblock); defer_bfree(&sb->derollup, oldblock, 1); } else { if (is_leaf) { /* This is leaf buffer */ goto parent_level; } } /* Update entry for the redirected child block */ trace("update parent"); block_t block = bufindex(cursor->path[level].buffer); struct index_entry *entry = cursor->path[level].next - 1; entry->block = cpu_to_be64(child); log_bnode_update(sb, block, child, be64_to_cpu(entry->key)); parent_level: /* If it is already redirected, ancestor is also redirected */ if (!redirect) { cursor_check(cursor); return 0; } if (!level--) { trace("redirect root"); assert(oldblock == btree->root.block); btree->root.block = newblock; tux3_mark_btree_dirty(btree); cursor_check(cursor); return 0; } child = newblock; } }
int tree_chop(struct btree *btree, struct delete_info *info, millisecond_t deadline) { int depth = btree->root.depth, level = depth - 1, suspend = 0; struct cursor *cursor; struct buffer_head *leafbuf, **prev, *leafprev = NULL; struct btree_ops *ops = btree->ops; struct sb *sb = btree->sb; int ret; cursor = alloc_cursor(btree, 0); prev = malloc(sizeof(*prev) * depth); memset(prev, 0, sizeof(*prev) * depth); down_write(&btree->lock); probe(btree, info->resume, cursor); leafbuf = level_pop(cursor); /* leaf walk */ while (1) { ret = (ops->leaf_chop)(btree, info->key, bufdata(leafbuf)); if (ret) { mark_buffer_dirty(leafbuf); if (ret < 0) goto error_leaf_chop; } /* try to merge this leaf with prev */ if (leafprev) { struct vleaf *this = bufdata(leafbuf); struct vleaf *that = bufdata(leafprev); /* try to merge leaf with prev */ if ((ops->leaf_need)(btree, this) <= (ops->leaf_free)(btree, that)) { trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev); (ops->leaf_merge)(btree, that, this); remove_index(cursor, level); mark_buffer_dirty(leafprev); brelse_free(btree, leafbuf); //dirty_buffer_count_check(sb); goto keep_prev_leaf; } brelse(leafprev); } leafprev = leafbuf; keep_prev_leaf: //nanosleep(&(struct timespec){ 0, 50 * 1000000 }, NULL); //printf("time remaining: %Lx\n", deadline - gettime()); // if (deadline && gettime() > deadline) // suspend = -1; if (info->blocks && info->freed >= info->blocks) suspend = -1; /* pop and try to merge finished nodes */ while (suspend || level_finished(cursor, level)) { /* try to merge node with prev */ if (prev[level]) { assert(level); /* node has no prev */ struct bnode *this = cursor_node(cursor, level); struct bnode *that = bufdata(prev[level]); trace_off("check node %p against %p", this, that); trace_off("this count = %i prev count = %i", bcount(this), bcount(that)); /* try to merge with node to left */ if (bcount(this) <= sb->entries_per_node - bcount(that)) { trace(">>> can merge node %p into node %p", this, that); merge_nodes(that, this); remove_index(cursor, level - 1); mark_buffer_dirty(prev[level]); brelse_free(btree, level_pop(cursor)); //dirty_buffer_count_check(sb); goto keep_prev_node; } brelse(prev[level]); } prev[level] = level_pop(cursor); keep_prev_node: /* deepest key in the cursor is the resume address */ if (suspend == -1 && !level_finished(cursor, level)) { suspend = 1; /* only set resume once */ info->resume = from_be_u64((cursor->path[level].next)->key); } if (!level) { /* remove depth if possible */ while (depth > 1 && bcount(bufdata(prev[0])) == 1) { trace("drop btree level"); btree->root.block = bufindex(prev[1]); mark_btree_dirty(btree); brelse_free(btree, prev[0]); //dirty_buffer_count_check(sb); depth = --btree->root.depth; vecmove(prev, prev + 1, depth); //set_sb_dirty(sb); } //sb->snapmask &= ~snapmask; delete_snapshot_from_disk(); //set_sb_dirty(sb); //save_sb(sb); ret = suspend; goto out; } level--; trace_off(printf("pop to level %i, block %Lx, %i of %i nodes\n", level, bufindex(cursor->path[level].buffer), cursor->path[level].next - cursor_node(cursor, level)->entries, bcount(cursor_node(cursor, level)));); } /* push back down to leaf level */ while (level < depth - 1) { struct buffer_head *buffer = sb_bread(vfs_sb(sb), from_be_u64(cursor->path[level++].next++->block)); if (!buffer) { ret = -EIO; goto out; } level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); trace_off(printf("push to level %i, block %Lx, %i nodes\n", level, bufindex(buffer), bcount(cursor_node(cursor, level)));); }
static int replay_log_stage1(struct replay *rp, struct buffer_head *logbuf) { if(DEBUG_MODE_K==1) { printk(KERN_INFO"%25s %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct logblock *log = bufdata(logbuf); unsigned char *data = log->data; int err; /* Check whether array is uptodate */ BUILD_BUG_ON(ARRAY_SIZE(log_name) != LOG_TYPES); /* If log is before latest unify, those were already applied to FS. */ if (bufindex(logbuf) < rp->unify_index) { // assert(0); /* older logs should already be freed */ return 0; } if (bufindex(logbuf) == rp->unify_index) data = rp->unify_pos; while (data < log->data + be16_to_cpu(log->bytes)) { u8 code = *data++; switch (code) { case LOG_BNODE_REDIRECT: { u64 oldblock, newblock; data = decode48(data, &oldblock); data = decode48(data, &newblock); trace("%s: oldblock %Lx, newblock %Lx", log_name[code], oldblock, newblock); err = replay_bnode_redirect(rp, oldblock, newblock); if (err) return err; break; } case LOG_BNODE_ROOT: { u64 root, left, right, rkey; u8 count; count = *data++; data = decode48(data, &root); data = decode48(data, &left); data = decode48(data, &right); data = decode48(data, &rkey); trace("%s: count %u, root block %Lx, left %Lx, right %Lx, rkey %Lx", log_name[code], count, root, left, right, rkey); err = replay_bnode_root(rp, root, count, left, right, rkey); if (err) return err; break; } case LOG_BNODE_SPLIT: { unsigned pos; u64 src, dst; data = decode16(data, &pos); data = decode48(data, &src); data = decode48(data, &dst); trace("%s: pos %x, src %Lx, dst %Lx", log_name[code], pos, src, dst); err = replay_bnode_split(rp, src, pos, dst); if (err) return err; break; } case LOG_BNODE_ADD: case LOG_BNODE_UPDATE: { u64 child, parent, key; data = decode48(data, &parent); data = decode48(data, &child); data = decode48(data, &key); trace("%s: parent 0x%Lx, child 0x%Lx, key 0x%Lx", log_name[code], parent, child, key); if (code == LOG_BNODE_UPDATE) err = replay_bnode_update(rp, parent, child, key); else err = replay_bnode_add(rp, parent, child, key); if (err) return err; break; } case LOG_BNODE_MERGE: { u64 src, dst; data = decode48(data, &src); data = decode48(data, &dst); trace("%s: src 0x%Lx, dst 0x%Lx", log_name[code], src, dst); err = replay_bnode_merge(rp, src, dst); if (err) return err; break; } case LOG_BNODE_DEL: { unsigned count; u64 bnode, key; data = decode16(data, &count); data = decode48(data, &bnode); data = decode48(data, &key); trace("%s: bnode 0x%Lx, count 0x%x, key 0x%Lx", log_name[code], bnode, count, key); err = replay_bnode_del(rp, bnode, key, count); if (err) return err; break; } case LOG_BNODE_ADJUST: { u64 bnode, from, to; data = decode48(data, &bnode); data = decode48(data, &from); data = decode48(data, &to); trace("%s: bnode 0x%Lx, from 0x%Lx, to 0x%Lx", log_name[code], bnode, from, to); err = replay_bnode_adjust(rp, bnode, from, to); if (err) return err; break; } case LOG_FREEBLOCKS: { u64 freeblocks; data = decode48(data, &freeblocks); trace("%s: freeblocks %llu", log_name[code], freeblocks); sb->freeblocks = freeblocks; break; } case LOG_BALLOC: case LOG_BFREE: case LOG_BFREE_ON_UNIFY: case LOG_BFREE_RELOG: case LOG_LEAF_REDIRECT: case LOG_LEAF_FREE: case LOG_BNODE_FREE: case LOG_ORPHAN_ADD: case LOG_ORPHAN_DEL: case LOG_UNIFY: case LOG_DELTA: data += log_size[code] - sizeof(code); break; default: tux3_err(rp->sb, "unrecognized log code 0x%x", code); return -EINVAL; } } return 0; }
static int tux3_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct tux_iattr iattr = { .uid = current_fsuid(), .gid = current_fsgid(), .mode = S_IFLNK | S_IRWXUGO, }; return __tux3_symlink(dir, dentry, &iattr, symname); } #endif /* !__KERNEL__ */ static int tux3_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct sb *sb = tux_sb(inode->i_sb); change_begin(sb); int err = tux_del_dirent(dir, dentry); if (!err) { tux3_iattrdirty(inode); inode->i_ctime = dir->i_ctime; /* FIXME: we shouldn't write inode for i_nlink = 0? */ inode_dec_link_count(inode); } change_end(sb); return err; } static int tux3_rmdir(struct inode *dir, struct dentry *dentry) { struct sb *sb = tux_sb(dir->i_sb); struct inode *inode = dentry->d_inode; int err = tux_dir_is_empty(inode); if (!err) { change_begin(sb); err = tux_del_dirent(dir, dentry); if (!err) { tux3_iattrdirty(inode); inode->i_ctime = dir->i_ctime; /* FIXME: we need to do this for POSIX? */ /* inode->i_size = 0; */ clear_nlink(inode); tux3_mark_inode_dirty_sync(inode); inode_dec_link_count(dir); } change_end(sb); } return err; } static int tux3_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct sb *sb = tux_sb(old_inode->i_sb); struct buffer_head *old_buffer, *new_buffer, *clone; tux_dirent *old_entry, *new_entry; void *olddata; int err, new_subdir = 0; unsigned delta; old_entry = tux_find_dirent(old_dir, &old_dentry->d_name, &old_buffer); if (IS_ERR(old_entry)) return PTR_ERR(old_entry); /* FIXME: is this needed? */ assert(be64_to_cpu(old_entry->inum) == tux_inode(old_inode)->inum); change_begin(sb); delta = tux3_get_current_delta(); if (new_inode) { int old_is_dir = S_ISDIR(old_inode->i_mode); if (old_is_dir) { err = tux_dir_is_empty(new_inode); if (err) goto error; } new_entry = tux_find_dirent(new_dir, &new_dentry->d_name, &new_buffer); if (IS_ERR(new_entry)) { assert(PTR_ERR(new_entry) != -ENOENT); err = PTR_ERR(new_entry); goto error; } /* * The directory is protected by i_mutex. * blockdirty() should never return -EAGAIN. */ olddata = bufdata(new_buffer); clone = blockdirty(new_buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(new_buffer); err = PTR_ERR(clone); goto error; } new_entry = ptr_redirect(new_entry, olddata, bufdata(clone)); /* this releases new_buffer */ tux_update_dirent(new_dir, clone, new_entry, old_inode); tux3_iattrdirty(new_inode); new_inode->i_ctime = new_dir->i_ctime; if (old_is_dir) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { new_subdir = S_ISDIR(old_inode->i_mode) && new_dir != old_dir; if (new_subdir) { if (new_dir->i_nlink >= TUX_LINK_MAX) { err = -EMLINK; goto error; } } err = tux_create_dirent(new_dir, &new_dentry->d_name, old_inode); if (err) goto error; if (new_subdir) inode_inc_link_count(new_dir); } tux3_iattrdirty(old_inode); old_inode->i_ctime = new_dir->i_ctime; tux3_mark_inode_dirty(old_inode); /* * The new entry can be on same buffer with old_buffer, and * may did buffer fork in the above path. So if old_buffer is * forked buffer, we update the old_buffer in here. */ if (buffer_forked(old_buffer)) { clone = blockget(mapping(old_dir), bufindex(old_buffer)); assert(clone); old_entry = ptr_redirect(old_entry, bufdata(old_buffer), bufdata(clone)); blockput(old_buffer); old_buffer = clone; } err = tux_delete_dirent(old_dir, old_buffer, old_entry); if (err) { tux3_fs_error(sb, "couldn't delete old entry (%Lu)", tux_inode(old_inode)->inum); /* FIXME: now, we have hardlink even if it's dir. */ inode_inc_link_count(old_inode); } if (!err && new_subdir) inode_dec_link_count(old_dir); change_end(sb); return err; error: change_end(sb); blockput(old_buffer); return err; } #ifdef __KERNEL__ const struct file_operations tux_dir_fops = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = tux_readdir, .fsync = tux3_sync_file, }; const struct inode_operations tux_dir_iops = { .create = tux3_create, .lookup = tux3_lookup, .link = tux3_link, .unlink = tux3_unlink, .symlink = tux3_symlink, .mkdir = tux3_mkdir, .rmdir = tux3_rmdir, .mknod = tux3_mknod, .rename = tux3_rename, .setattr = tux3_setattr, .getattr = tux3_getattr // .setxattr = generic_setxattr, // .getxattr = generic_getxattr, // .listxattr = ext3_listxattr, // .removexattr = generic_removexattr, // .permission = ext3_permission, /* FIXME: why doesn't ext4 support this for directory? */ // .fallocate = ext4_fallocate, // .fiemap = ext4_fiemap, };
/* * This is range deletion. So, instead of adjusting balance of the * space on sibling nodes for each change, this just removes the range * and merges from right to left even if it is not same parent. * * +--------------- (A, B, C)--------------------+ * | | | * +-- (AA, AB, AC) -+ +- (BA, BB, BC) -+ + (CA, CB, CC) + * | | | | | | | | | * (AAA,AAB)(ABA,ABB)(ACA,ACB) (BAA,BAB)(BBA)(BCA,BCB) (CAA)(CBA,CBB)(CCA) * * [less : A, AA, AAA, AAB, AB, ABA, ABB, AC, ACA, ACB, B, BA ... : greater] * * If we merged from cousin (or re-distributed), we may have to update * the index until common parent. (e.g. removed (ACB), then merged * from (BAA,BAB) to (ACA), we have to adjust B in root node to BB) * * See, adjust_parent_sep(). * * FIXME: no re-distribute. so, we don't guarantee above than 50% * space efficiency. And if range is end of key (truncate() case), we * don't need to merge, and adjust_parent_sep(). * * FIXME2: we may want to split chop work for each step. instead of * blocking for a long time. */ int btree_chop(struct btree *btree, tuxkey_t start, u64 len) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = btree->sb; struct btree_ops *ops = btree->ops; struct buffer_head **prev, *leafprev = NULL; struct chopped_index_info *cii; struct cursor *cursor; tuxkey_t limit; int ret, done = 0; if (!has_root(btree)) return 0; /* Chop all range if len >= TUXKEY_LIMIT */ limit = (len >= TUXKEY_LIMIT) ? TUXKEY_LIMIT : start + len; prev = malloc(sizeof(*prev) * btree->root.depth); if (prev == NULL) return -ENOMEM; memset(prev, 0, sizeof(*prev) * btree->root.depth); cii = malloc(sizeof(*cii) * btree->root.depth); if (cii == NULL) { ret = -ENOMEM; goto error_cii; } memset(cii, 0, sizeof(*cii) * btree->root.depth); cursor = alloc_cursor(btree, 0); if (!cursor) { ret = -ENOMEM; goto error_alloc_cursor; } down_write(&btree->lock); ret = btree_probe(cursor, start); if (ret) goto error_btree_probe; /* Walk leaves */ while (1) { struct buffer_head *leafbuf; tuxkey_t this_key; /* * FIXME: If leaf was merged and freed later, we don't * need to redirect leaf and leaf_chop() */ if ((ret = cursor_redirect(cursor))) goto out; leafbuf = cursor_pop(cursor); /* Adjust start and len for this leaf */ this_key = cursor_level_this_key(cursor); if (start < this_key) { if (limit < TUXKEY_LIMIT) len -= this_key - start; start = this_key; } ret = ops->leaf_chop(btree, start, len, bufdata(leafbuf)); if (ret) { if (ret < 0) { blockput(leafbuf); goto out; } mark_buffer_dirty_non(leafbuf); } /* Try to merge this leaf with prev */ if (leafprev) { if (try_leaf_merge(btree, leafprev, leafbuf)) { trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev); remove_index(cursor, cii); mark_buffer_dirty_non(leafprev); blockput_free(sb, leafbuf); goto keep_prev_leaf; } blockput(leafprev); } leafprev = leafbuf; keep_prev_leaf: if (cursor_level_next_key(cursor) >= limit) done = 1; /* Pop and try to merge finished nodes */ while (done || cursor_level_finished(cursor)) { struct buffer_head *buf; int level = cursor->level; struct chopped_index_info *ciil = &cii[level]; /* Get merge src buffer, and go parent level */ buf = cursor_pop(cursor); /* * Logging chopped indexes * FIXME: If node is freed later (e.g. merged), * we dont't need to log this */ if (ciil->count) { log_bnode_del(sb, bufindex(buf), ciil->start, ciil->count); } memset(ciil, 0, sizeof(*ciil)); /* Try to merge node with prev */ if (prev[level]) { assert(level); if (try_bnode_merge(sb, prev[level], buf)) { trace(">>> can merge node %p into node %p", buf, prev[level]); remove_index(cursor, cii); mark_buffer_unify_non(prev[level]); blockput_free_unify(sb, buf); goto keep_prev_node; } blockput(prev[level]); } prev[level] = buf; keep_prev_node: if (!level) goto chop_root; } /* Push back down to leaf level */ do { ret = cursor_advance_down(cursor); if (ret < 0) goto out; } while (ret); } chop_root: /* Remove depth if possible */ while (btree->root.depth > 1 && bcount(bufdata(prev[0])) == 1) { trace("drop btree level"); btree->root.block = bufindex(prev[1]); btree->root.depth--; tux3_mark_btree_dirty(btree); /* * We know prev[0] is redirected and dirty. So, in * here, we can just cancel bnode_redirect by bfree(), * instead of defered_bfree() * FIXME: we can optimize freeing bnode without * bnode_redirect, and if we did, this is not true. */ bfree(sb, bufindex(prev[0]), 1); log_bnode_free(sb, bufindex(prev[0])); blockput_free_unify(sb, prev[0]); vecmove(prev, prev + 1, btree->root.depth); } ret = 0; out: if (leafprev) blockput(leafprev); for (int i = 0; i < btree->root.depth; i++) { if (prev[i]) blockput(prev[i]); } release_cursor(cursor); error_btree_probe: up_write(&btree->lock); free_cursor(cursor); error_alloc_cursor: free(cii); error_cii: free(prev); return ret; }
static int replay_log_stage2(struct replay *rp, struct buffer_head *logbuf) { if(DEBUG_MODE_K==1) { printk(KERN_INFO"%25s %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = rp->sb; struct logblock *log = bufdata(logbuf); block_t blocknr = rp->blocknrs[bufindex(logbuf)]; unsigned char *data = log->data; int err; /* * Log block address itself works as balloc log, and adjust * bitmap and deunify even if logblocks is before latest * unify, to prevent to be overwritten. (This must be after * LOG_FREEBLOCKS replay if there is it.) */ trace("LOG BLOCK: logblock %Lx", blocknr); err = replay_update_bitmap(rp, blocknr, 1, 1); if (err) return err; /* Mark log block as deunify block */ defer_bfree(&sb->deunify, blocknr, 1); /* If log is before latest unify, those were already applied to FS. */ if (bufindex(logbuf) < rp->unify_index) { // assert(0); /* older logs should already be freed */ return 0; } if (bufindex(logbuf) == rp->unify_index) data = rp->unify_pos; while (data < log->data + be16_to_cpu(log->bytes)) { u8 code = *data++; switch (code) { case LOG_BALLOC: case LOG_BFREE: case LOG_BFREE_ON_UNIFY: case LOG_BFREE_RELOG: { u64 block; u32 count; data = decode32(data, &count); data = decode48(data, &block); trace("%s: count %u, block %Lx", log_name[code], count, block); err = 0; if (code == LOG_BALLOC) err = replay_update_bitmap(rp, block, count, 1); else if (code == LOG_BFREE_ON_UNIFY) defer_bfree(&sb->deunify, block, count); else err = replay_update_bitmap(rp, block, count, 0); if (err) return err; break; } case LOG_LEAF_REDIRECT: case LOG_BNODE_REDIRECT: { u64 oldblock, newblock; data = decode48(data, &oldblock); data = decode48(data, &newblock); trace("%s: oldblock %Lx, newblock %Lx", log_name[code], oldblock, newblock); err = replay_update_bitmap(rp, newblock, 1, 1); if (err) return err; if (code == LOG_LEAF_REDIRECT) { err = replay_update_bitmap(rp, oldblock, 1, 0); if (err) return err; } else { /* newblock is not flushing yet */ defer_bfree(&sb->deunify, oldblock, 1); } break; } case LOG_LEAF_FREE: case LOG_BNODE_FREE: { u64 block; data = decode48(data, &block); trace("%s: block %Lx", log_name[code], block); err = replay_update_bitmap(rp, block, 1, 0); if (err) return err; if (code == LOG_BNODE_FREE) { struct buffer_head *buffer = vol_find_get_block(sb, block); blockput_free_unify(sb, buffer); } break; } case LOG_BNODE_ROOT: { u64 root, left, right, rkey; u8 count; count = *data++; data = decode48(data, &root); data = decode48(data, &left); data = decode48(data, &right); data = decode48(data, &rkey); trace("%s: count %u, root block %Lx, left %Lx, right %Lx, rkey %Lx", log_name[code], count, root, left, right, rkey); err = replay_update_bitmap(rp, root, 1, 1); if (err) return err; break; } case LOG_BNODE_SPLIT: { unsigned pos; u64 src, dst; data = decode16(data, &pos); data = decode48(data, &src); data = decode48(data, &dst); trace("%s: pos %x, src %Lx, dst %Lx", log_name[code], pos, src, dst); err = replay_update_bitmap(rp, dst, 1, 1); if (err) return err; break; } case LOG_BNODE_MERGE: { u64 src, dst; data = decode48(data, &src); data = decode48(data, &dst); trace("%s: src 0x%Lx, dst 0x%Lx", log_name[code], src, dst); err = replay_update_bitmap(rp, src, 1, 0); if (err) return err; blockput_free_unify(sb, vol_find_get_block(sb, src)); break; } case LOG_ORPHAN_ADD: case LOG_ORPHAN_DEL: { unsigned version; u64 inum; data = decode16(data, &version); data = decode48(data, &inum); trace("%s: version 0x%x, inum 0x%Lx", log_name[code], version, inum); if (code == LOG_ORPHAN_ADD) err = replay_orphan_add(rp, version, inum); else err = replay_orphan_del(rp, version, inum); if (err) return err; break; } case LOG_FREEBLOCKS: case LOG_BNODE_ADD: case LOG_BNODE_UPDATE: case LOG_BNODE_DEL: case LOG_BNODE_ADJUST: case LOG_UNIFY: case LOG_DELTA: data += log_size[code] - sizeof(code); break; default: tux3_err(sb, "unrecognized log code 0x%x", code); return -EINVAL; } } return 0; }