int probe(struct btree *btree, tuxkey_t key, struct cursor *cursor) { unsigned i, depth = btree->root.depth; struct buffer_head *buffer = sb_bread(vfs_sb(btree->sb), btree->root.block); if (!buffer) return -EIO; struct bnode *node = bufdata(buffer); for (i = 0; i < depth; i++) { struct index_entry *next = node->entries, *top = next + bcount(node); while (++next < top) /* binary search goes here */ if (from_be_u64(next->key) > key) break; trace("probe level %i, %ti of %i", i, next - node->entries, bcount(node)); level_push(cursor, buffer, next); if (!(buffer = sb_bread(vfs_sb(btree->sb), from_be_u64((next - 1)->block)))) goto eek; node = (struct bnode *)bufdata(buffer); } assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); level_push(cursor, buffer, NULL); cursor_check(cursor); return 0; eek: release_cursor(cursor); return -EIO; /* stupid, it might have been NOMEM */ }
static void __tux3_init_flusher(struct sb *sb) { #ifdef __KERNEL__ /* Disable writeback task to control inode reclaim by dirty flags */ vfs_sb(sb)->s_bdi = &noop_backing_dev_info; #endif }
int advance(struct btree *btree, struct cursor *cursor) { int depth = btree->root.depth, level = depth; struct buffer_head *buffer; do { level_pop_brelse(cursor); if (!level) return 0; level--; } while (level_finished(cursor, level)); while (1) { buffer = sb_bread(vfs_sb(btree->sb), from_be_u64(cursor->path[level].next->block)); if (!buffer) goto eek; cursor->path[level].next++; if (level + 1 == depth) break; level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); level++; } level_push(cursor, buffer, NULL); cursor_check(cursor); return 1; eek: release_cursor(cursor); return -EIO; }
// desperately need ERR_PTR return here to distinguish between // ENOMEM, which should be impossible but when it happens we // need to do something reasonable, or ENOSPC which we must // just report and keep going without a fuss. static struct buffer_head *new_block(struct btree *btree) { block_t block; int err = btree->ops->balloc(btree->sb, 1, &block); if (err) return NULL; // ERR_PTR me!!! struct buffer_head *buffer = sb_getblk(vfs_sb(btree->sb), block); if (!buffer) return NULL; memset(bufdata(buffer), 0, bufsize(buffer)); mark_buffer_dirty(buffer); return buffer; }
void __tux3_msg(struct sb *sb, const char *level, const char *prefix, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk("%sTUX3-fs%s (%s): %pV\n", level, prefix, vfs_sb(sb)->s_id, &vaf); va_end(args); }
void __tux3_fs_error(struct sb *sb, const char *func, unsigned int line, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_ERR "TUX3-fs error (%s): %s:%d: %pV\n", vfs_sb(sb)->s_id, func, line, &vaf); va_end(args); BUG(); /* FIXME: maybe panic() or MS_RDONLY */ }
/* Clear buffer dirty for I/O (Caller must remove buffer from list) */ static void tux3_clear_buffer_dirty_for_io(struct buffer_head *buffer, struct sb *sb, block_t block) { assert(list_empty(&buffer->b_assoc_buffers)); assert(buffer_dirty(buffer)); /* Who cleared the dirty? */ /* If buffer was hole and dirtied, it can be !buffer_mapped() */ /*assert(buffer_mapped(buffer));*/ assert(buffer_uptodate(buffer)); /* Set up buffer for I/O. FIXME: need? */ map_bh(buffer, vfs_sb(sb), block); clear_buffer_delay(buffer); /*buffer->b_assoc_map = NULL;*/ /* FIXME: hack for *_for_io_hack */ tux3_clear_bufdelta(buffer); /* FIXME: hack for save delta */ clear_buffer_dirty(buffer); }
int tux3_init_flusher(struct sb *sb) { struct task_struct *task; char b[BDEVNAME_SIZE]; __tux3_init_flusher(sb); bdevname(vfs_sb(sb)->s_bdev, b); /* FIXME: we should use normal bdi-writeback by changing core */ task = kthread_run(flush_delta_work, sb, "tux3/%s", b); if (IS_ERR(task)) return PTR_ERR(task); sb->flush_task = task; return 0; }
/* Setup sb by on-disk super block */ static void __setup_sb(struct sb *sb, struct disksuper *super) { sb->next_delta = TUX3_INIT_DELTA; sb->rollup = TUX3_INIT_DELTA; sb->marshal_delta = TUX3_INIT_DELTA - 1; sb->committed_delta = TUX3_INIT_DELTA - 1; /* Setup initial delta_ref */ __delta_transition(sb, &sb->delta_refs[0]); sb->blockbits = be16_to_cpu(super->blockbits); sb->volblocks = be64_to_cpu(super->volblocks); sb->version = 0; /* FIXME: not yet implemented */ sb->blocksize = 1 << sb->blockbits; sb->blockmask = (1 << sb->blockbits) - 1; sb->entries_per_node = calc_entries_per_node(sb->blocksize); /* Initialize base indexes for atable */ atable_init_base(sb); /* vfs fields */ vfs_sb(sb)->s_maxbytes = calc_maxbytes(sb->blocksize); /* Probably does not belong here (maybe metablock) */ sb->freeinodes = MAX_INODES - be64_to_cpu(super->usedinodes); sb->freeblocks = sb->volblocks; sb->nextalloc = be64_to_cpu(super->nextalloc); sb->atomdictsize = be64_to_cpu(super->atomdictsize); sb->atomgen = be32_to_cpu(super->atomgen); sb->freeatom = be32_to_cpu(super->freeatom); /* logchain and logcount are read from super directly */ trace("blocksize %u, blockbits %u, blockmask %08x", sb->blocksize, sb->blockbits, sb->blockmask); trace("volblocks %Lu, freeblocks %Lu, freeinodes %Lu, nextalloc %Lu", sb->volblocks, sb->freeblocks, sb->freeinodes, sb->nextalloc); trace("atom_dictsize %Lu, freeatom %u, atomgen %u", (s64)sb->atomdictsize, sb->freeatom, sb->atomgen); trace("logchain %Lu, logcount %u", be64_to_cpu(super->logchain), be32_to_cpu(super->logcount)); setup_roots(sb, super); }
static void ileaf_dump(struct btree *btree, void *vleaf) { if (!tux3_trace) return; struct ileaf_attr_ops *attr_ops = btree->ops->private_ops; struct ileaf *leaf = vleaf; inum_t inum = ibase(leaf); __be16 *dict = ileaf_dict(btree, leaf); unsigned offset = 0; trace_on("inode table block 0x%Lx/%i (%x bytes free)", ibase(leaf), icount(leaf), ileaf_free(btree, leaf)); for (int i = 0; i < icount(leaf); i++, inum++) { int limit = __atdict(dict, i + 1), size = limit - offset; if (!size) continue; if (size < 0) trace_on(" 0x%Lx: <corrupt>\n", inum); else if (!size) trace_on(" 0x%Lx: <empty>\n", inum); else if (attr_ops == &iattr_ops) { /* FIXME: this doesn't work in kernel */ struct tux3_inode tuxnode = {}; struct inode *inode = &tuxnode.vfs_inode; void *attrs = leaf->table + offset; inode->i_sb = vfs_sb(btree->sb), attr_ops->decode(btree, inode, attrs, size); free_xcache(inode); } offset = limit; } }
int tree_chop(struct btree *btree, struct delete_info *info, millisecond_t deadline) { int depth = btree->root.depth, level = depth - 1, suspend = 0; struct cursor *cursor; struct buffer_head *leafbuf, **prev, *leafprev = NULL; struct btree_ops *ops = btree->ops; struct sb *sb = btree->sb; int ret; cursor = alloc_cursor(btree, 0); prev = malloc(sizeof(*prev) * depth); memset(prev, 0, sizeof(*prev) * depth); down_write(&btree->lock); probe(btree, info->resume, cursor); leafbuf = level_pop(cursor); /* leaf walk */ while (1) { ret = (ops->leaf_chop)(btree, info->key, bufdata(leafbuf)); if (ret) { mark_buffer_dirty(leafbuf); if (ret < 0) goto error_leaf_chop; } /* try to merge this leaf with prev */ if (leafprev) { struct vleaf *this = bufdata(leafbuf); struct vleaf *that = bufdata(leafprev); /* try to merge leaf with prev */ if ((ops->leaf_need)(btree, this) <= (ops->leaf_free)(btree, that)) { trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev); (ops->leaf_merge)(btree, that, this); remove_index(cursor, level); mark_buffer_dirty(leafprev); brelse_free(btree, leafbuf); //dirty_buffer_count_check(sb); goto keep_prev_leaf; } brelse(leafprev); } leafprev = leafbuf; keep_prev_leaf: //nanosleep(&(struct timespec){ 0, 50 * 1000000 }, NULL); //printf("time remaining: %Lx\n", deadline - gettime()); // if (deadline && gettime() > deadline) // suspend = -1; if (info->blocks && info->freed >= info->blocks) suspend = -1; /* pop and try to merge finished nodes */ while (suspend || level_finished(cursor, level)) { /* try to merge node with prev */ if (prev[level]) { assert(level); /* node has no prev */ struct bnode *this = cursor_node(cursor, level); struct bnode *that = bufdata(prev[level]); trace_off("check node %p against %p", this, that); trace_off("this count = %i prev count = %i", bcount(this), bcount(that)); /* try to merge with node to left */ if (bcount(this) <= sb->entries_per_node - bcount(that)) { trace(">>> can merge node %p into node %p", this, that); merge_nodes(that, this); remove_index(cursor, level - 1); mark_buffer_dirty(prev[level]); brelse_free(btree, level_pop(cursor)); //dirty_buffer_count_check(sb); goto keep_prev_node; } brelse(prev[level]); } prev[level] = level_pop(cursor); keep_prev_node: /* deepest key in the cursor is the resume address */ if (suspend == -1 && !level_finished(cursor, level)) { suspend = 1; /* only set resume once */ info->resume = from_be_u64((cursor->path[level].next)->key); } if (!level) { /* remove depth if possible */ while (depth > 1 && bcount(bufdata(prev[0])) == 1) { trace("drop btree level"); btree->root.block = bufindex(prev[1]); mark_btree_dirty(btree); brelse_free(btree, prev[0]); //dirty_buffer_count_check(sb); depth = --btree->root.depth; vecmove(prev, prev + 1, depth); //set_sb_dirty(sb); } //sb->snapmask &= ~snapmask; delete_snapshot_from_disk(); //set_sb_dirty(sb); //save_sb(sb); ret = suspend; goto out; } level--; trace_off(printf("pop to level %i, block %Lx, %i of %i nodes\n", level, bufindex(cursor->path[level].buffer), cursor->path[level].next - cursor_node(cursor, level)->entries, bcount(cursor_node(cursor, level)));); } /* push back down to leaf level */ while (level < depth - 1) { struct buffer_head *buffer = sb_bread(vfs_sb(sb), from_be_u64(cursor->path[level++].next++->block)); if (!buffer) { ret = -EIO; goto out; } level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); trace_off(printf("push to level %i, block %Lx, %i nodes\n", level, bufindex(buffer), bcount(cursor_node(cursor, level)));); }