int probe(struct btree *btree, tuxkey_t key, struct cursor *cursor) { unsigned i, depth = btree->root.depth; struct buffer_head *buffer = sb_bread(vfs_sb(btree->sb), btree->root.block); if (!buffer) return -EIO; struct bnode *node = bufdata(buffer); for (i = 0; i < depth; i++) { struct index_entry *next = node->entries, *top = next + bcount(node); while (++next < top) /* binary search goes here */ if (from_be_u64(next->key) > key) break; trace("probe level %i, %ti of %i", i, next - node->entries, bcount(node)); level_push(cursor, buffer, next); if (!(buffer = sb_bread(vfs_sb(btree->sb), from_be_u64((next - 1)->block)))) goto eek; node = (struct bnode *)bufdata(buffer); } assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); level_push(cursor, buffer, NULL); cursor_check(cursor); return 0; eek: release_cursor(cursor); return -EIO; /* stupid, it might have been NOMEM */ }
static int try_leaf_merge(struct btree *btree, struct buffer_head *intobuf, struct buffer_head *frombuf) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct vleaf *from = bufdata(frombuf); struct vleaf *into = bufdata(intobuf); /* Try to merge leaves */ if (btree->ops->leaf_merge(btree, into, from)) { struct sb *sb = btree->sb; /* * We know frombuf is redirected and dirty. So, in * here, we can just cancel leaf_redirect by bfree(), * instead of defered_bfree() * FIXME: we can optimize freeing leaf without * leaf_redirect, and if we did, this is not true. */ bfree(sb, bufindex(frombuf), 1); log_leaf_free(sb, bufindex(frombuf)); return 1; } return 0; }
struct buffer_head *blockdirty(struct buffer_head *buffer, unsigned newdelta) { map_t *map = buffer->map; assert(buffer->state < BUFFER_STATES); buftrace("---- before: fork buffer %p ----", buffer); if (buffer_dirty(buffer)) { if (buffer_already_dirty(buffer, newdelta)) return buffer; /* Buffer can't modify already, we have to fork buffer */ buftrace("---- fork buffer %p ----", buffer); struct buffer_head *clone = new_buffer(map); if (IS_ERR(clone)) return clone; /* Create the cloned buffer */ memcpy(bufdata(clone), bufdata(buffer), bufsize(buffer)); clone->index = buffer->index; /* Replace the buffer by cloned buffer. */ remove_buffer_hash(buffer); insert_buffer_hash(clone); /* * The refcount of buffer is used for backend. So, the * backend has to free this buffer (blockput(buffer)) */ buffer = clone; } __tux3_mark_buffer_dirty(buffer, newdelta); return buffer; }
static int try_bnode_merge(struct sb *sb, struct buffer_head *intobuf, struct buffer_head *frombuf) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct bnode *into = bufdata(intobuf); struct bnode *from = bufdata(frombuf); /* Try to merge nodes */ if (bnode_merge_nodes(sb, into, from)) { /* * We know frombuf is redirected and dirty. So, in * here, we can just cancel bnode_redirect by bfree(), * instead of defered_bfree() * FIXME: we can optimize freeing bnode without * bnode_redirect, and if we did, this is not true. */ bfree(sb, bufindex(frombuf), 1); log_bnode_merge(sb, bufindex(frombuf), bufindex(intobuf)); return 1; } return 0; }
/* * Cursor down to child node or leaf, and update ->next. * < 0 - error * 0 - there is no further child (leaf was pushed) * 1 - there is child */ static int cursor_advance_down(struct cursor *cursor) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct buffer_head *buffer; block_t child; assert(cursor->level < btree->root.depth); child = be64_to_cpu(cursor->path[cursor->level].next->block); buffer = vol_bread(btree->sb, child); if (!buffer) return -EIO; /* FIXME: stupid, it might have been NOMEM */ cursor->path[cursor->level].next++; if (cursor->level < btree->root.depth - 1) { struct bnode *node = bufdata(buffer); assert(bnode_sniff(node)); cursor_push(cursor, buffer, node->entries); cursor_check(cursor); return 1; } assert(btree->ops->leaf_sniff(btree, bufdata(buffer))); cursor_push(cursor, buffer, NULL); cursor_check(cursor); return 0; }
/* * Split leaf, then insert to parent. * key: key to add after split (cursor will point leaf which is including key) * hint: hint for split * * return value: * 0 - success * < 0 - error */ static int btree_leaf_split(struct cursor *cursor, tuxkey_t key, tuxkey_t hint) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } trace("split leaf"); struct btree *btree = cursor->btree; struct buffer_head *newbuf; newbuf = new_leaf(btree); if (IS_ERR(newbuf)) return PTR_ERR(newbuf); log_balloc(btree->sb, bufindex(newbuf), 1); struct buffer_head *leafbuf = cursor_leafbuf(cursor); tuxkey_t newkey = btree->ops->leaf_split(btree, hint, bufdata(leafbuf), bufdata(newbuf)); assert(cursor_this_key(cursor) < newkey); assert(newkey < cursor_next_key(cursor)); if (key < newkey) mark_buffer_dirty_non(newbuf); else mark_buffer_dirty_non(leafbuf); return insert_leaf(cursor, newkey, newbuf, key < newkey); }
void show_tree_range(struct btree *btree, tuxkey_t start, unsigned count) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } __tux3_dbg("%i level btree at %Li:\n", btree->root.depth, btree->root.block); if (!has_root(btree)) return; struct cursor *cursor = alloc_cursor(btree, 0); if (!cursor) { tux3_err(btree->sb, "out of memory"); return; } if (btree_probe(cursor, start)) { tux3_fs_error(btree->sb, "tell me why!!!"); goto out; } struct buffer_head *buffer; do { buffer = cursor_leafbuf(cursor); assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); (btree->ops->leaf_dump)(btree, bufdata(buffer)); } while (--count && cursor_advance(cursor)); out: free_cursor(cursor); }
/* Modify atom refcount */ static int atomref(struct inode *atable, atom_t atom, int use) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct sb *sb = tux_sb(atable->i_sb); unsigned shift = sb->blockbits - ATOMREF_BLKBITS; unsigned block = sb->atomref_base + ATOMREF_SIZE * (atom >> shift); unsigned offset = atom & ~(-1 << shift), kill = 0; struct buffer_head *buffer; __be16 *refcount; int err; buffer = blockread(mapping(atable), block); if (!buffer) return -EIO; refcount = bufdata(buffer); int low = be16_to_cpu(refcount[offset]) + use; trace("inc atom %x by %d, offset %x[%x], low = %d", atom, use, block, offset, low); /* This releases buffer */ err = update_refcount(sb, buffer, offset, low); if (err) return err; if (!low || (low & (-1 << 16))) { buffer = blockread(mapping(atable), block + 1); if (!buffer) return -EIO; refcount = bufdata(buffer); int high = be16_to_cpu(refcount[offset]); if (!low) blockput(buffer); else { trace("carry %d, offset %x[%x], high = %d", (low >> 16), block, offset, high); high += (low >> 16); assert(high >= 0); /* paranoia check */ /* This releases buffer */ err = update_refcount(sb, buffer, offset, high); if (err) { /* FIXME: better set a flag that atomref broke * or something! */ return err; } } kill = !(low | high); }
struct buffer_head *new_leaf(struct btree *btree) { struct buffer_head *buffer = new_block(btree); if (!IS_ERR(buffer)) { memset(bufdata(buffer), 0, bufsize(buffer)); (btree->ops->leaf_init)(btree, bufdata(buffer)); mark_buffer_dirty_atomic(buffer); } return buffer; }
static void level_redirect_blockput(struct cursor *cursor, int level, struct buffer_head *clone) { struct buffer_head *buffer = cursor->path[level].buffer; struct index_entry *next = cursor->path[level].next; /* If this level has ->next, update ->next to the clone buffer */ if (next) next = ptr_redirect(next, bufdata(buffer), bufdata(clone)); memcpy(bufdata(clone), bufdata(buffer), bufsize(clone)); level_replace_blockput(cursor, level, clone, next); }
/* * Cursor read root node. * < 0 - error * 0 - success */ static int cursor_read_root(struct cursor *cursor) { struct btree *btree = cursor->btree; struct buffer_head *buffer; assert(has_root(btree)); buffer = vol_bread(btree->sb, btree->root.block); if (!buffer) return -EIO; /* FIXME: stupid, it might have been NOMEM */ assert(bnode_sniff(bufdata(buffer))); cursor_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); return 0; }
struct buffer_head *new_leaf(struct btree *btree) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct buffer_head *buffer = new_block(btree); if (!IS_ERR(buffer)) { memset(bufdata(buffer), 0, bufsize(buffer)); (btree->ops->leaf_init)(btree, bufdata(buffer)); mark_buffer_dirty_atomic(buffer); } return buffer; }
/* Modify buffer of refcount, then release buffer */ static int update_refcount(struct sb *sb, struct buffer_head *buffer, unsigned offset, u16 val) { unsigned delta = tux3_get_current_delta(); struct buffer_head *clone; __be16 *refcount; /* * The atable is protected by i_mutex for now. * blockdirty() should never return -EAGAIN. * FIXME: need finer granularity locking */ clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } refcount = bufdata(clone); refcount[offset] = cpu_to_be16(val); mark_buffer_dirty_non(clone); blockput(clone); return 0; }
static loff_t unatom_dict_write(struct inode *atable, atom_t atom, loff_t where) { unsigned delta = tux3_get_current_delta(); struct buffer_head *buffer, *clone; loff_t old; unsigned offset; buffer = blockread_unatom(atable, atom, &offset); if (!buffer) return -EIO; /* * The atable is protected by i_mutex for now. * blockdirty() should never return -EAGAIN. * FIXME: need finer granularity locking */ clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } __be64 *unatom_dict = bufdata(clone); old = be64_to_cpu(unatom_dict[offset]); unatom_dict[offset] = cpu_to_be64(where); mark_buffer_dirty_non(clone); blockput(clone); return old; }
int blockio(int rw, struct sb *sb, struct buffer_head *buffer, block_t block) { trace("%s: buffer %p, block %Lx", rw ? "write" : "read", buffer, block); return devio(rw, sb_dev(sb), block << sb->blockbits, bufdata(buffer), sb->blocksize); }
tux_dirent *tux_find_entry(struct inode *dir, const char *name, unsigned len, struct buffer_head **result, loff_t size) { struct sb *sb = tux_sb(dir->i_sb); unsigned reclen = TUX_REC_LEN(len); block_t block, blocks = size >> sb->blockbits; int err = -ENOENT; for (block = 0; block < blocks; block++) { struct buffer_head *buffer = blockread(mapping(dir), block); if (!buffer) { err = -EIO; // need ERR_PTR for blockread!!! goto error; } tux_dirent *entry = bufdata(buffer); tux_dirent *limit = (void *)entry + sb->blocksize - reclen; while (entry <= limit) { if (entry->rec_len == 0) { blockput(buffer); tux_zero_len_error(dir, block); err = -EIO; goto error; } if (tux_match(entry, name, len)) { *result = buffer; return entry; } entry = next_entry(entry); } blockput(buffer); } error: *result = NULL; /* for debug */ return ERR_PTR(err); }
static struct buffer_head *new_node(struct btree *btree) { struct buffer_head *buffer = new_block(btree); if (buffer) ((struct bnode *)bufdata(buffer))->count = 0; return buffer; }
struct buffer_head *new_leaf(struct btree *btree) { struct buffer_head *buffer = new_block(btree); if (buffer) (btree->ops->leaf_init)(btree, bufdata(buffer)); return buffer; }
static int bitmap_test(struct sb *sb, block_t start, block_t count, int set) { struct inode *bitmap = sb->bitmap; unsigned mapshift = sb->blockbits + 3; unsigned mapsize = 1 << mapshift; unsigned mapmask = mapsize - 1; unsigned mapoffset = start & mapmask; block_t mapblock, mapblocks = (start + count + mapmask) >> mapshift; int (*test)(u8 *, unsigned, unsigned) = set ? all_set : all_clear; for (mapblock = start >> mapshift; mapblock < mapblocks; mapblock++) { struct buffer_head *buffer; unsigned len; int ret; buffer = blockget(mapping(bitmap), mapblock); assert(buffer); len = min_t(block_t, mapsize - mapoffset, count); ret = test(bufdata(buffer), mapoffset, len); blockput(buffer); if (!ret) return 0; mapoffset = 0; count -= len; } return 1; }
int advance(struct btree *btree, struct cursor *cursor) { int depth = btree->root.depth, level = depth; struct buffer_head *buffer; do { level_pop_brelse(cursor); if (!level) return 0; level--; } while (level_finished(cursor, level)); while (1) { buffer = sb_bread(vfs_sb(btree->sb), from_be_u64(cursor->path[level].next->block)); if (!buffer) goto eek; cursor->path[level].next++; if (level + 1 == depth) break; level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); level++; } level_push(cursor, buffer, NULL); cursor_check(cursor); return 1; eek: release_cursor(cursor); return -EIO; }
static void level_redirect_blockput(struct cursor *cursor, int level, struct buffer_head *clone) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct buffer_head *buffer = cursor->path[level].buffer; struct index_entry *next = cursor->path[level].next; /* If this level has ->next, update ->next to the clone buffer */ if (next) next = ptr_redirect(next, bufdata(buffer), bufdata(clone)); memcpy(bufdata(clone), bufdata(buffer), bufsize(clone)); level_replace_blockput(cursor, level, clone, next); }
static inline struct bnode *level_node(struct cursor *cursor, int level) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } return bufdata(cursor->path[level].buffer); }
int tux_readdir(struct file *file, void *state, filldir_t filldir) { loff_t pos = file->f_pos; #ifdef __KERNEL__ struct inode *dir = file->f_dentry->d_inode; #else struct inode *dir = file->f_inode; #endif int revalidate = file->f_version != dir->i_version; struct sb *sb = tux_sb(dir->i_sb); unsigned blockbits = sb->blockbits; block_t block, blocks = dir->i_size >> blockbits; unsigned offset = pos & sb->blockmask; assert(!(dir->i_size & sb->blockmask)); for (block = pos >> blockbits ; block < blocks; block++) { struct buffer_head *buffer = blockread(mapping(dir), block); if (!buffer) return -EIO; void *base = bufdata(buffer); if (revalidate) { if (offset) { tux_dirent *entry = base + offset; tux_dirent *p = base + (offset & sb->blockmask); while (p < entry && p->rec_len) p = next_entry(p); offset = (void *)p - base; file->f_pos = (block << blockbits) + offset; } file->f_version = dir->i_version; revalidate = 0; } tux_dirent *limit = base + sb->blocksize - TUX_REC_LEN(1); for (tux_dirent *entry = base + offset; entry <= limit; entry = next_entry(entry)) { if (entry->rec_len == 0) { blockput(buffer); tux_zero_len_error(dir, block); return -EIO; } if (!is_deleted(entry)) { unsigned type = (entry->type < TUX_TYPES) ? filetype[entry->type] : DT_UNKNOWN; int lame = filldir( state, entry->name, entry->name_len, (block << blockbits) | ((void *)entry - base), be64_to_cpu(entry->inum), type); if (lame) { blockput(buffer); return 0; } } file->f_pos += tux_rec_len_from_disk(entry->rec_len); } blockput(buffer); offset = 0; } return 0; }
/* Lookup index and set it as next down path */ static void cursor_bnode_lookup(struct cursor *cursor, tuxkey_t key) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct path_level *at = &cursor->path[cursor->level]; at->next = bnode_lookup(bufdata(at->buffer), key); }
void show_tree_range(struct btree *btree, tuxkey_t start, unsigned count) { printf("%i level btree at %Li:\n", btree->root.depth, (L)btree->root.block); struct cursor *cursor = alloc_cursor(btree, 0); if (!cursor) error("out of memory"); if (probe(btree, start, cursor)) error("tell me why!!!"); struct buffer_head *buffer; do { buffer = cursor_leafbuf(cursor); assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); (btree->ops->leaf_dump)(btree, bufdata(buffer)); //tuxkey_t *next = pnext_key(cursor, btree->depth); //printf("next key = %Lx:\n", next ? (L)*next : 0); } while (--count && advance(btree, cursor)); free_cursor(cursor); }
/* * Cursor read root node. * < 0 - error * 0 - success */ static int cursor_read_root(struct cursor *cursor) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct btree *btree = cursor->btree; struct buffer_head *buffer; assert(has_root(btree)); buffer = vol_bread(btree->sb, btree->root.block); if (!buffer) return -EIO; /* FIXME: stupid, it might have been NOMEM */ assert(bnode_sniff(bufdata(buffer))); cursor_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); return 0; }
int tux_delete_entry(struct inode *dir, struct buffer_head *buffer, tux_dirent *entry) { unsigned delta = tux3_get_current_delta(); tux_dirent *prev = NULL, *this = bufdata(buffer); struct buffer_head *clone; void *olddata; while ((char *)this < (char *)entry) { if (this->rec_len == 0) { blockput(buffer); tux_zero_len_error(dir, bufindex(buffer)); return -EIO; } prev = this; this = next_entry(this); } /* * The directory is protected by i_mutex. * blockdirty() should never return -EAGAIN. */ olddata = bufdata(buffer); clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } entry = ptr_redirect(entry, olddata, bufdata(clone)); prev = ptr_redirect(prev, olddata, bufdata(clone)); if (prev) prev->rec_len = tux_rec_len_to_disk((void *)next_entry(entry) - (void *)prev); memset(entry->name, 0, entry->name_len); entry->name_len = entry->type = 0; entry->inum = 0; mark_buffer_dirty_non(clone); blockput(clone); return 0; }
static int tux_load_sb(struct super_block *sb, struct root *iroot, int silent) { struct buffer_head *bh; int err; BUG_ON(SB_LOC < sb->s_blocksize); bh = sb_bread(sb, SB_LOC >> sb->s_blocksize_bits); if (!bh) { if (!silent) printk(KERN_ERR "TUX3: unable to read superblock\n"); return -EIO; } err = unpack_sb(tux_sb(sb), bufdata(bh), iroot, silent); /* FIXME: this is needed? */ memcpy(&tux_sb(sb)->super, bufdata(bh), sizeof(tux_sb(sb)->super)); brelse(bh); return err; }
static inline void bnode_buffer_init(struct buffer_head *buffer) { if(DEBUG_MODE_K==1) { printf("\t\t\t\t%25s[K] %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct bnode *bnode = bufdata(buffer); memset(bnode, 0, bufsize(buffer)); bnode->magic = cpu_to_be16(TUX3_MAGIC_BNODE); }
/* Prepare log info for replay and pin logblocks. */ static struct replay *replay_prepare(struct sb *sb) { if(DEBUG_MODE_K==1) { printk(KERN_INFO"%25s %25s %4d #in\n",__FILE__,__func__,__LINE__); } block_t logchain = be64_to_cpu(sb->super.logchain); unsigned i, logcount = be32_to_cpu(sb->super.logcount); struct replay *rp; struct buffer_head *buffer; int err; /* FIXME: this address array is quick hack. Rethink about log * block management and log block address. */ rp = alloc_replay(sb, logcount); if (IS_ERR(rp)) return rp; /* FIXME: maybe, we should use bufvec to read log blocks */ trace("load %u logblocks", logcount); i = logcount; while (i-- > 0) { struct logblock *log; buffer = blockget(mapping(sb->logmap), i); if (!buffer) { i++; err = -ENOMEM; goto error; } assert(bufindex(buffer) == i); err = blockio(READ, sb, buffer, logchain); if (err) goto error; err = replay_check_log(rp, buffer); if (err) goto error; /* Store index => blocknr map */ rp->blocknrs[bufindex(buffer)] = logchain; log = bufdata(buffer); logchain = be64_to_cpu(log->logchain); } return rp; error: free_replay(rp); replay_unpin_logblocks(sb, i, logcount); return ERR_PTR(err); }