int probe(struct btree *btree, tuxkey_t key, struct cursor *cursor) { unsigned i, depth = btree->root.depth; struct buffer_head *buffer = sb_bread(vfs_sb(btree->sb), btree->root.block); if (!buffer) return -EIO; struct bnode *node = bufdata(buffer); for (i = 0; i < depth; i++) { struct index_entry *next = node->entries, *top = next + bcount(node); while (++next < top) /* binary search goes here */ if (from_be_u64(next->key) > key) break; trace("probe level %i, %ti of %i", i, next - node->entries, bcount(node)); level_push(cursor, buffer, next); if (!(buffer = sb_bread(vfs_sb(btree->sb), from_be_u64((next - 1)->block)))) goto eek; node = (struct bnode *)bufdata(buffer); } assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); level_push(cursor, buffer, NULL); cursor_check(cursor); return 0; eek: release_cursor(cursor); return -EIO; /* stupid, it might have been NOMEM */ }
int advance(struct btree *btree, struct cursor *cursor) { int depth = btree->root.depth, level = depth; struct buffer_head *buffer; do { level_pop_brelse(cursor); if (!level) return 0; level--; } while (level_finished(cursor, level)); while (1) { buffer = sb_bread(vfs_sb(btree->sb), from_be_u64(cursor->path[level].next->block)); if (!buffer) goto eek; cursor->path[level].next++; if (level + 1 == depth) break; level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); level++; } level_push(cursor, buffer, NULL); cursor_check(cursor); return 1; eek: release_cursor(cursor); return -EIO; }
static void cursor_check(struct cursor *cursor) { if (cursor->len == 0) return; tuxkey_t key = 0; block_t block = cursor->btree->root.block; for (int i = 0; i < cursor->len; i++) { assert(bufindex(cursor->path[i].buffer) == block); if (!cursor->path[i].next) break; struct bnode *node = cursor_node(cursor, i); assert(node->entries < cursor->path[i].next); assert(cursor->path[i].next <= node->entries + bcount(node)); assert(from_be_u64((cursor->path[i].next - 1)->key) >= key); block = from_be_u64((cursor->path[i].next - 1)->block); key = from_be_u64((cursor->path[i].next - 1)->key); } }
/* * This code should give better results on 32-bit CPU with less than * ~24 registers, both size and performance wise... */ void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) { uint64_t A, E, T; uint64_t X[9 + 80], *F; int i; while (num--) { F = X + 80; A = state[0]; F[1] = state[1]; F[2] = state[2]; F[3] = state[3]; E = state[4]; F[5] = state[5]; F[6] = state[6]; F[7] = state[7]; for (i = 0; i < 16; i++, F--) { T = from_be_u64(W[i]); F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A, F[1], F[2]); } for (; i < 80; i++, F--) { T = sigma0(F[8 + 16 - 1]); T += sigma1(F[8 + 16 - 14]); T += F[8 + 16] + F[8 + 16 - 9]; F[0] = A; F[4] = E; F[8] = T; T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; E = F[3] + T; A = T + Sigma0(A) + Maj(A, F[1], F[2]); } state[0] += A; state[1] += F[1]; state[2] += F[2]; state[3] += F[3]; state[4] += E; state[5] += F[5]; state[6] += F[6]; state[7] += F[7]; W += 16; } }
static int find_first_key(uint64_t key) { index_first = -1; index_count = 0; index_weight_count = 0; index_best = -1; index_rand = -1; ssize_t start = 0; ssize_t end = keycount; do { ssize_t mid = (end + start) / 2; if (from_be_u64(polyhash[mid].key) < key) start = mid; else { if (from_be_u64(polyhash[mid].key) > key) end = mid; else { start = max(mid - 4, 0); end = min(mid + 4, keycount); } } } while (end - start > 8); for (ssize_t i = start; i < end; i++) if (key == from_be_u64(polyhash[i].key)) { index_first = i; while ( index_first > 0 && key == from_be_u64(polyhash[index_first - 1].key)) index_first--; return get_key_data(); } return -1; }
static struct dentry *tux3_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct buffer_head *buffer; struct inode *inode; tux_dirent *entry; entry = tux_find_dirent(dir, dentry->d_name.name, dentry->d_name.len, &buffer); if (IS_ERR(entry)) { if (PTR_ERR(entry) != -ENOENT) return ERR_PTR(PTR_ERR(entry)); inode = NULL; goto out; } inode = tux3_iget(dir->i_sb, from_be_u64(entry->inum)); brelse(buffer); if (IS_ERR(inode)) return ERR_PTR(PTR_ERR(inode)); out: return d_splice_alias(inode, dentry); }
void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) { uint64_t a, b, c, d, e, f, g, h, s0, s1, T1; uint64_t X[16]; int i; while (num--) { a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; f = state[5]; g = state[6]; h = state[7]; T1 = X[0] = from_be_u64(W[0]); ROUND_00_15(0, a, b, c, d, e, f, g, h); T1 = X[1] = from_be_u64(W[1]); ROUND_00_15(1, h, a, b, c, d, e, f, g); T1 = X[2] = from_be_u64(W[2]); ROUND_00_15(2, g, h, a, b, c, d, e, f); T1 = X[3] = from_be_u64(W[3]); ROUND_00_15(3, f, g, h, a, b, c, d, e); T1 = X[4] = from_be_u64(W[4]); ROUND_00_15(4, e, f, g, h, a, b, c, d); T1 = X[5] = from_be_u64(W[5]); ROUND_00_15(5, d, e, f, g, h, a, b, c); T1 = X[6] = from_be_u64(W[6]); ROUND_00_15(6, c, d, e, f, g, h, a, b); T1 = X[7] = from_be_u64(W[7]); ROUND_00_15(7, b, c, d, e, f, g, h, a); T1 = X[8] = from_be_u64(W[8]); ROUND_00_15(8, a, b, c, d, e, f, g, h); T1 = X[9] = from_be_u64(W[9]); ROUND_00_15(9, h, a, b, c, d, e, f, g); T1 = X[10] = from_be_u64(W[10]); ROUND_00_15(10, g, h, a, b, c, d, e, f); T1 = X[11] = from_be_u64(W[11]); ROUND_00_15(11, f, g, h, a, b, c, d, e); T1 = X[12] = from_be_u64(W[12]); ROUND_00_15(12, e, f, g, h, a, b, c, d); T1 = X[13] = from_be_u64(W[13]); ROUND_00_15(13, d, e, f, g, h, a, b, c); T1 = X[14] = from_be_u64(W[14]); ROUND_00_15(14, c, d, e, f, g, h, a, b); T1 = X[15] = from_be_u64(W[15]); ROUND_00_15(15, b, c, d, e, f, g, h, a); for (i = 16; i < 80; i += 16) { ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X); ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X); ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X); ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X); ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X); ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X); ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X); ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X); ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X); ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X); ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X); ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X); ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X); ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X); ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X); ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X); } state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; state[5] += f; state[6] += g; state[7] += h; W += 16; } }
static inline tuxkey_t ibase(struct ileaf *leaf) { return from_be_u64(leaf->ibase); }
int main(int argc, char *argv[]) { unsigned abits = DATA_BTREE_BIT|CTIME_SIZE_BIT|MODE_OWNER_BIT|LINK_COUNT_BIT|MTIME_BIT; struct dev *dev = &(struct dev){ .bits = 8, .fd = open(argv[1], O_CREAT|O_RDWR, S_IRUSR|S_IWUSR) }; assert(!ftruncate(dev->fd, 1 << 24)); init_buffers(dev, 1 << 20, 0); struct sb *sb = rapid_sb(dev, .version = 0, .atomref_base = 1 << 10, .unatom_base = 1 << 11, .atomgen = 1); struct inode *inode = rapid_open_inode(sb, NULL, S_IFDIR | 0x666, .present = abits, .i_uid = 0x12121212, .i_gid = 0x34343434, .i_ctime = spectime(0xdec0debeadULL), .i_mtime = spectime(0xbadfaced00dULL)); inode->btree = (struct btree){ .root = { .block = 0xcaba1f00dULL, .depth = 3 } }; sb->atable = inode; for (int i = 0; i < 2; i++) { struct buffer_head *buffer = blockget(mapping(inode), tux_sb(inode->i_sb)->atomref_base + i); memset(bufdata(buffer), 0, sb->blocksize); blockput_dirty(buffer); } if (1) { warn("---- test positive and negative refcount carry ----"); use_atom(inode, 6, 1 << 15); use_atom(inode, 6, (1 << 15)); use_atom(inode, 6, -(1 << 15)); use_atom(inode, 6, -(1 << 15)); } warn("---- test atom table ----"); printf("atom = %Lx\n", (L)make_atom(inode, "foo", 3)); printf("atom = %Lx\n", (L)make_atom(inode, "foo", 3)); printf("atom = %Lx\n", (L)make_atom(inode, "bar", 3)); printf("atom = %Lx\n", (L)make_atom(inode, "foo", 3)); printf("atom = %Lx\n", (L)make_atom(inode, "bar", 3)); warn("---- test inode xattr cache ----"); int err; err = xcache_update(inode, 0x666, "hello", 5, 0); if (err) printf("err %d\n", err); err = xcache_update(inode, 0x777, "world!", 6, 0); if (err) printf("err %d\n", err); xcache_dump(inode); struct xattr *xattr = xcache_lookup(tux_inode(inode)->xcache, 0x777); if (!IS_ERR(xattr)) printf("atom %x => %.*s\n", xattr->atom, xattr->size, xattr->body); err = xcache_update(inode, 0x111, "class", 5, 0); if (err) printf("err %d\n", err); err = xcache_update(inode, 0x666, NULL, 0, 0); if (err) printf("err %d\n", err); err = xcache_update(inode, 0x222, "boooyah", 7, 0); if (err) printf("err %d\n", err); xcache_dump(inode); warn("---- test xattr inode table encode and decode ----"); char attrs[1000] = { }; char *top = encode_xattrs(inode, attrs, sizeof(attrs)); hexdump(attrs, top - attrs); printf("predicted size = %x, encoded size = %Lx\n", encode_xsize(inode), (L)(top - attrs)); inode->xcache->size = offsetof(struct xcache, xattrs); char *newtop = decode_attrs(inode, attrs, top - attrs); printf("predicted size = %x, xcache size = %x\n", decode_xsize(inode, attrs, top - attrs), inode->xcache->size); assert(top == newtop); xcache_dump(inode); free(inode->xcache); inode->xcache = NULL; warn("---- xattr update ----"); set_xattr(inode, "hello", 5, "world!", 6, 0); set_xattr(inode, "empty", 5, "zot", 0, 0); set_xattr(inode, "foo", 3, "foobar", 6, 0); xcache_dump(inode); warn("---- xattr remove ----"); // del_xattr(inode, "hello", 5); xcache_dump(inode); warn("---- xattr lookup ----"); for (int i = 0, len; i < 3; i++) { char *namelist[] = { "hello", "foo", "world" }, *name = namelist[i]; char data[100]; int size = get_xattr(inode, name, len = strlen(name), data, sizeof(data)); if (size < 0) printf("xattr %.*s not found (%s)\n", len, name, strerror(-size)); else printf("found xattr %.*s => %.*s\n", len, name, size, data); } warn("---- list xattrs ----"); int len = xattr_list(inode, attrs, sizeof(attrs)); printf("xattr list length = %i\n", xattr_list(inode, NULL, 0)); hexdump(attrs, len); warn("---- atom reverse map ----"); for (int i = 0; i < 5; i++) { unsigned atom = i, offset; struct buffer_head *buffer = blockread_unatom(inode, atom, &offset); loff_t where = from_be_u64(((be_u64 *)bufdata(buffer))[offset]); blockput_dirty(buffer); buffer = blockread(mapping(inode), where >> sb->blockbits); printf("atom %.3Lx at dirent %.4Lx, ", (L)atom, (L)where); hexdump(bufdata(buffer) + (where & sb->blockmask), 16); blockput(buffer); } warn("---- atom recycle ----"); set_xattr(inode, "hello", 5, NULL, 0, 0); show_freeatoms(sb); printf("got free atom %x\n", get_freeatom(inode)); printf("got free atom %x\n", get_freeatom(inode)); printf("got free atom %x\n", get_freeatom(inode)); warn("---- dump atom table ----"); dump_atoms(inode); show_buffers(inode->map); exit(0); }
int tree_chop(struct btree *btree, struct delete_info *info, millisecond_t deadline) { int depth = btree->root.depth, level = depth - 1, suspend = 0; struct cursor *cursor; struct buffer_head *leafbuf, **prev, *leafprev = NULL; struct btree_ops *ops = btree->ops; struct sb *sb = btree->sb; int ret; cursor = alloc_cursor(btree, 0); prev = malloc(sizeof(*prev) * depth); memset(prev, 0, sizeof(*prev) * depth); down_write(&btree->lock); probe(btree, info->resume, cursor); leafbuf = level_pop(cursor); /* leaf walk */ while (1) { ret = (ops->leaf_chop)(btree, info->key, bufdata(leafbuf)); if (ret) { mark_buffer_dirty(leafbuf); if (ret < 0) goto error_leaf_chop; } /* try to merge this leaf with prev */ if (leafprev) { struct vleaf *this = bufdata(leafbuf); struct vleaf *that = bufdata(leafprev); /* try to merge leaf with prev */ if ((ops->leaf_need)(btree, this) <= (ops->leaf_free)(btree, that)) { trace(">>> can merge leaf %p into leaf %p", leafbuf, leafprev); (ops->leaf_merge)(btree, that, this); remove_index(cursor, level); mark_buffer_dirty(leafprev); brelse_free(btree, leafbuf); //dirty_buffer_count_check(sb); goto keep_prev_leaf; } brelse(leafprev); } leafprev = leafbuf; keep_prev_leaf: //nanosleep(&(struct timespec){ 0, 50 * 1000000 }, NULL); //printf("time remaining: %Lx\n", deadline - gettime()); // if (deadline && gettime() > deadline) // suspend = -1; if (info->blocks && info->freed >= info->blocks) suspend = -1; /* pop and try to merge finished nodes */ while (suspend || level_finished(cursor, level)) { /* try to merge node with prev */ if (prev[level]) { assert(level); /* node has no prev */ struct bnode *this = cursor_node(cursor, level); struct bnode *that = bufdata(prev[level]); trace_off("check node %p against %p", this, that); trace_off("this count = %i prev count = %i", bcount(this), bcount(that)); /* try to merge with node to left */ if (bcount(this) <= sb->entries_per_node - bcount(that)) { trace(">>> can merge node %p into node %p", this, that); merge_nodes(that, this); remove_index(cursor, level - 1); mark_buffer_dirty(prev[level]); brelse_free(btree, level_pop(cursor)); //dirty_buffer_count_check(sb); goto keep_prev_node; } brelse(prev[level]); } prev[level] = level_pop(cursor); keep_prev_node: /* deepest key in the cursor is the resume address */ if (suspend == -1 && !level_finished(cursor, level)) { suspend = 1; /* only set resume once */ info->resume = from_be_u64((cursor->path[level].next)->key); } if (!level) { /* remove depth if possible */ while (depth > 1 && bcount(bufdata(prev[0])) == 1) { trace("drop btree level"); btree->root.block = bufindex(prev[1]); mark_btree_dirty(btree); brelse_free(btree, prev[0]); //dirty_buffer_count_check(sb); depth = --btree->root.depth; vecmove(prev, prev + 1, depth); //set_sb_dirty(sb); } //sb->snapmask &= ~snapmask; delete_snapshot_from_disk(); //set_sb_dirty(sb); //save_sb(sb); ret = suspend; goto out; } level--; trace_off(printf("pop to level %i, block %Lx, %i of %i nodes\n", level, bufindex(cursor->path[level].buffer), cursor->path[level].next - cursor_node(cursor, level)->entries, bcount(cursor_node(cursor, level)));); } /* push back down to leaf level */ while (level < depth - 1) { struct buffer_head *buffer = sb_bread(vfs_sb(sb), from_be_u64(cursor->path[level++].next++->block)); if (!buffer) { ret = -EIO; goto out; } level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); trace_off(printf("push to level %i, block %Lx, %i nodes\n", level, bufindex(buffer), bcount(cursor_node(cursor, level)));); }
tuxkey_t next_key(struct cursor *cursor, int depth) { be_u64 *keyp = next_keyp(cursor, depth); return keyp ? from_be_u64(*keyp) : -1; }
static int tux3_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct buffer_head *old_buffer, *new_buffer; tux_dirent *old_entry, *new_entry; int err, new_subdir = 0; old_entry = tux_find_dirent(old_dir, old_dentry->d_name.name, old_dentry->d_name.len, &old_buffer); if (IS_ERR(old_entry)) return PTR_ERR(old_entry); /* FIXME: is this needed? */ BUG_ON(from_be_u64(old_entry->inum) != tux_inode(old_inode)->inum); change_begin(tux_sb(old_inode->i_sb)); if (new_inode) { int old_is_dir = S_ISDIR(old_inode->i_mode); if (old_is_dir) { err = tux_dir_is_empty(new_inode); if (err) goto error; } new_entry = tux_find_dirent(new_dir, new_dentry->d_name.name, new_dentry->d_name.len, &new_buffer); if (IS_ERR(new_entry)) { BUG_ON(PTR_ERR(new_entry) == -ENOENT); err = PTR_ERR(new_entry); goto error; } /* this releases new_buffer */ tux_update_dirent(new_buffer, new_entry, old_inode); new_inode->i_ctime = new_dir->i_ctime; if (old_is_dir) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { new_subdir = S_ISDIR(old_inode->i_mode) && new_dir != old_dir; if (new_subdir) { if (new_dir->i_nlink >= TUX_LINK_MAX) { err = -EMLINK; goto error; } } err = __tux_add_dirent(new_dir, new_dentry, old_inode); if (err) goto error; if (new_subdir) inode_inc_link_count(new_dir); } old_inode->i_ctime = new_dir->i_ctime; mark_inode_dirty(old_inode); err = tux_delete_dirent(old_buffer, old_entry); if (err) { printk(KERN_ERR "TUX3: %s: couldn't delete old entry (%Lu)\n", __func__, (L)tux_inode(old_inode)->inum); /* FIXME: now, we have hardlink even if it's dir. */ inode_inc_link_count(old_inode); } if (!err && new_subdir) inode_dec_link_count(old_dir); change_end(tux_sb(old_inode->i_sb)); return err; error: change_end(tux_sb(old_inode->i_sb)); brelse(old_buffer); return err; }
static int tux3_fill_super(struct super_block *sb, void *data, int silent) { struct sb *sbi; int err, blocksize; sbi = kzalloc(sizeof(struct sb), GFP_KERNEL); if (!sbi) return -ENOMEM; sbi->vfs_sb = sb; sb->s_fs_info = sbi; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = TUX3_SUPER_MAGIC; sb->s_op = &tux3_super_ops; sb->s_time_gran = 1; mutex_init(&sbi->loglock); INIT_LIST_HEAD(&sbi->alloc_inodes); err = -EIO; blocksize = sb_min_blocksize(sb, BLOCK_SIZE); if (!blocksize) { if (!silent) printk(KERN_ERR "TUX3: unable to set blocksize\n"); goto error; } if ((err = load_sb(tux_sb(sb)))) { if (!silent) { if (err == -EINVAL) warn("invalid superblock [%Lx]", (L)from_be_u64(*(be_u64 *)sbi->super.magic)); else warn("Unable to read superblock"); } goto error; } if (sbi->blocksize != blocksize) { if (!sb_set_blocksize(sb, sbi->blocksize)) { printk(KERN_ERR "TUX3: blocksize too small for device.\n"); goto error; } } warn("s_blocksize %lu", sb->s_blocksize); err = -ENOMEM; sbi->volmap = tux_new_volmap(tux_sb(sb)); if (!sbi->volmap) goto error; insert_inode_hash(sbi->volmap); sbi->logmap = tux_new_logmap(tux_sb(sb)); if (!sbi->logmap) goto error_logmap; err = load_itable(sbi); if (err) goto error_bitmap; // struct inode *vtable; sbi->bitmap = tux3_iget(sb, TUX_BITMAP_INO); err = PTR_ERR(sbi->bitmap); if (IS_ERR(sbi->bitmap)) goto error_bitmap; sbi->rootdir = tux3_iget(sb, TUX_ROOTDIR_INO); err = PTR_ERR(sbi->rootdir); if (IS_ERR(sbi->rootdir)) goto error_rootdir; sbi->atable = tux3_iget(sb, TUX_ATABLE_INO); err = PTR_ERR(sbi->atable); if (IS_ERR(sbi->atable)) goto error_atable; sb->s_root = d_alloc_root(sbi->rootdir); if (!sb->s_root) goto error_alloc_root; return 0; error_alloc_root: iput(sbi->atable); error_atable: iput(sbi->rootdir); error_rootdir: iput(sbi->bitmap); error_bitmap: iput(sbi->logmap); error_logmap: iput(sbi->volmap); error: kfree(sbi); return err; }