static void *ileaf_resize(struct btree *btree, tuxkey_t inum, vleaf *base, unsigned newsize) { assert(ileaf_sniff(btree, base)); struct ileaf *leaf = base; assert(inum >= ibase(leaf)); be_u16 *dict = base + btree->sb->blocksize; unsigned at = inum - ibase(leaf); if (at >= btree->entries_per_leaf) return NULL; unsigned extend_empty = at < icount(leaf) ? 0 : at - icount(leaf) + 1; unsigned offset = at && icount(leaf) ? from_be_u16(*(dict - (at < icount(leaf) ? at : icount(leaf)))) : 0; unsigned size = at < icount(leaf) ? from_be_u16(*(dict - at - 1)) - offset : 0; int more = newsize - size; if (more > 0 && sizeof(*dict) * extend_empty + more > ileaf_free(btree, leaf)) return NULL; for (; extend_empty--; leaf->count = to_be_u16(from_be_u16(leaf->count) + 1)) *(dict - icount(leaf) - 1) = to_be_u16(atdict(dict, icount(leaf))); assert(icount(leaf)); unsigned itop = from_be_u16(*(dict - icount(leaf))); void *attrs = leaf->table + offset; printf("resize inum 0x%Lx at 0x%x from %x to %x\n", (L)inum, offset, size, newsize); assert(itop >= offset + size); memmove(attrs + newsize, attrs + size, itop - offset - size); for (int i = at + 1; i <= icount(leaf); i++) add_idict(dict - i, more); return attrs; }
/* * Find free inum * (callback for btree_traverse()) * * return value: * 1 - found * 0 - not found */ int ileaf_find_free(struct btree *btree, tuxkey_t key_bottom, tuxkey_t key_limit, void *leaf, tuxkey_t key, u64 len, void *data) { tuxkey_t at = key - ibase(leaf); unsigned count = icount(leaf); key_limit = min(key_limit, key + len); if (at < count) { __be16 *dict = ileaf_dict(btree, leaf); unsigned limit, offset = atdict(dict, at); while (at < count) { at++; limit = __atdict(dict, at); if (offset == limit) { at--; break; } offset = limit; } } if (ibase(leaf) + at < key_limit) { *(inum_t *)data = ibase(leaf) + at; return 1; } return 0; }
static tuxkey_t ileaf_split(struct btree *btree, tuxkey_t hint, void *from, void *into) { assert(ileaf_sniff(btree, from)); struct ileaf *leaf = from, *dest = into; __be16 *dict = ileaf_dict(btree, from); __be16 *destdict = ileaf_dict(btree, into); #ifdef SPLIT_AT_INUM /* * This is to prevent to have same ibase on both of from and into * FIXME: we would want to split at better position. */ if (hint == ibase(leaf)) hint++; trace("split at inum 0x%Lx", hint); unsigned at = min_t(tuxkey_t, hint - ibase(leaf), icount(leaf)); #else /* binsearch inum starting nearest middle of block */ unsigned at = 1, hi = icount(leaf); while (at < hi) { int mid = (at + hi) / 2; if (*(dict - mid) < (btree->sb->blocksize / 2)) at = mid + 1; else hi = mid; } #endif /* should trim leading empty inodes on copy */ unsigned split = atdict(dict, at), free = atdict(dict, icount(leaf)); trace("split at %x of %x", at, icount(leaf)); trace("copy out %x bytes at %x", free - split, split); assert(free >= split); memcpy(dest->table, leaf->table + split, free - split); dest->count = cpu_to_be16(icount(leaf) - at); veccopy(destdict - icount(dest), dict - icount(leaf), icount(dest)); for (int i = 1; i <= icount(dest); i++) add_idict(destdict - i, -split); #ifdef SPLIT_AT_INUM /* round down to multiple of 64 above ibase */ inum_t round = hint & ~(inum_t)(btree->entries_per_leaf - 1); dest->ibase = cpu_to_be64(round > ibase(leaf) + icount(leaf) ? round : hint); #else dest->ibase = cpu_to_be64(ibase(leaf) + at); #endif leaf->count = cpu_to_be16(at); memset(leaf->table + split, 0, (char *)(dict - icount(leaf)) - (leaf->table + split)); ileaf_trim(btree, leaf); return ibase(dest); }
inum_t find_empty_inode(struct btree *btree, struct ileaf *leaf, inum_t goal) { assert(goal >= ibase(leaf)); goal -= ibase(leaf); //printf("find empty inode starting at %Lx, base %Lx\n", (L)goal, (L)ibase(leaf)); be_u16 *dict = (void *)leaf + btree->sb->blocksize; unsigned i, offset = goal && goal < icount(leaf) ? from_be_u16(*(dict - goal)) : 0; for (i = goal; i < icount(leaf); i++) { unsigned limit = from_be_u16(*(dict - i - 1)); if (offset == limit) break; offset = limit; } return i + ibase(leaf); }
/* * Chop inums * return value: * < 0 - error * 1 - modified * 0 - not modified */ static int ileaf_chop(struct btree *btree, tuxkey_t start, u64 len, void *leaf) { struct ileaf *ileaf = leaf; __be16 *dict = ileaf_dict(btree, leaf); tuxkey_t base = ibase(ileaf); unsigned count = icount(ileaf); tuxkey_t at = start - base; void *startp, *endp, *tailp; unsigned size; if (at + 1 > count) return 0; len = min_t(u64, len, count - at); startp = ileaf->table + atdict(dict, at); endp = ileaf->table + atdict(dict, at + len); if (startp == endp) return 0; /* Remove data */ tailp = ileaf->table + atdict(dict, count); memmove(startp, endp, tailp - endp); /* Adjust dict */ size = endp - startp; while (at < count) { at++; add_idict(dict - at, -size); } ileaf_trim(btree, leaf); return 1; }
void *ileaf_lookup(struct btree *btree, inum_t inum, struct ileaf *leaf, unsigned *result) { assert(inum >= ibase(leaf)); tuxkey_t at = inum - ibase(leaf), size = 0; void *attrs = NULL; trace("lookup inode 0x%Lx, %Lx + %Lx", inum, ibase(leaf), at); if (at < icount(leaf)) { __be16 *dict = ileaf_dict(btree, leaf); unsigned offset = atdict(dict, at); if ((size = __atdict(dict, at + 1) - offset)) attrs = leaf->table + offset; } *result = size; return attrs; }
static int ileaf_merge(struct btree *btree, void *vinto, void *vfrom) { struct ileaf *into = vinto, *from = vfrom; unsigned fromcount = icount(from); /* If "from" is empty, does nothing */ if (!fromcount) return 1; assert(ibase(from) > ibase(into)); tuxkey_t fromibase = ibase(from); unsigned count = icount(into); int hole = fromibase - ibase(into) + count; __be16 *dict = ileaf_dict(btree, into); __be16 *fromdict = ileaf_dict(btree, from); int need_size = hole * sizeof(*dict) + ileaf_need(btree, from); if (ileaf_free(btree, into) < need_size) return 0; /* Fill hole of dict until from_ibase */ unsigned limit = atdict(dict, count); __be16 __limit = cpu_to_be16(limit); while (hole--) { count++; *(dict - count) = __limit; } /* Copy data from "from" */ unsigned fromlimit = atdict(fromdict, fromcount); memcpy(into->table + limit, from->table, fromlimit); /* Adjust copying fromdict */ if (limit) { int i; for (i = 1; i <= fromcount; i++) add_idict(dict - i, limit); } veccopy(dict - count - fromcount, fromdict - fromcount, fromcount); into->count = cpu_to_be16(count + fromcount); return 1; }
int ileaf_purge(struct btree *btree, inum_t inum, struct ileaf *leaf) { if (inum < ibase(leaf) || inum - ibase(leaf) >= btree->entries_per_leaf) return -EINVAL; be_u16 *dict = (void *)leaf + btree->sb->blocksize; unsigned at = inum - ibase(leaf); unsigned offset = atdict(dict, at); unsigned size = from_be_u16(*(dict - at - 1)) - offset; printf("delete inode %Lx from %p[%x/%x]\n", (L)inum, leaf, at, size); if (!size) return -ENOENT; unsigned free = from_be_u16(*(dict - icount(leaf))), tail = free - offset - size; assert(offset + size + tail <= free); memmove(leaf->table + offset, leaf->table + offset + size, tail); for (int i = at + 1; i <= icount(leaf); i++) add_idict(dict - i, -size); ileaf_trim(btree, leaf); return 0; }
static tuxkey_t ileaf_split_hint(struct btree *btree, struct ileaf *ileaf, tuxkey_t key, int size) { /* * FIXME: make sure there is space for size. * FIXME: better split position? */ tuxkey_t base = ibase(ileaf); unsigned count = icount(ileaf); if (key >= base + count) return key & ~(btree->entries_per_leaf - 1); return base + count / 2; }
static void ileaf_dump(struct btree *btree, void *vleaf) { if (!tux3_trace) return; struct ileaf_attr_ops *attr_ops = btree->ops->private_ops; struct ileaf *leaf = vleaf; inum_t inum = ibase(leaf); __be16 *dict = ileaf_dict(btree, leaf); unsigned offset = 0; trace_on("inode table block 0x%Lx/%i (%x bytes free)", ibase(leaf), icount(leaf), ileaf_free(btree, leaf)); for (int i = 0; i < icount(leaf); i++, inum++) { int limit = __atdict(dict, i + 1), size = limit - offset; if (!size) continue; if (size < 0) trace_on(" 0x%Lx: <corrupt>\n", inum); else if (!size) trace_on(" 0x%Lx: <empty>\n", inum); else if (attr_ops == &iattr_ops) { /* FIXME: this doesn't work in kernel */ struct tux3_inode tuxnode = {}; struct inode *inode = &tuxnode.vfs_inode; void *attrs = leaf->table + offset; inode->i_sb = vfs_sb(btree->sb), attr_ops->decode(btree, inode, attrs, size); free_xcache(inode); } offset = limit; } }
static void ileaf_dump(struct btree *btree, vleaf *vleaf) { struct sb *sb = btree->sb; struct ileaf *leaf = vleaf; inum_t inum = ibase(leaf); be_u16 *dict = vleaf + sb->blocksize; unsigned offset = 0; printf("inode table block 0x%Lx/%i (%x bytes free)\n", (L)ibase(leaf), icount(leaf), ileaf_free(btree, leaf)); //hexdump(dict - icount(leaf), icount(leaf) * 2); for (int i = -1; -i <= icount(leaf); i--, inum++) { int limit = from_be_u16(dict[i]), size = limit - offset; if (!size) continue; printf(" 0x%Lx: ", (L)inum); //printf("[%x] ", offset); if (size < 0) printf("<corrupt>\n"); else if (!size) printf("<empty>\n"); else { /* FIXME: this doesn't work in kernel */ struct inode inode = { .i_sb = vfs_sb(btree->sb) }; unsigned xsize = decode_xsize(&inode, leaf->table + offset, size); tux_inode(&inode)->xcache = xsize ? new_xcache(xsize) : NULL; decode_attrs(&inode, leaf->table + offset, size); dump_attrs(&inode); xcache_dump(&inode); free(tux_inode(&inode)->xcache); } offset = limit; } } void *ileaf_lookup(struct btree *btree, inum_t inum, struct ileaf *leaf, unsigned *result) { assert(inum >= ibase(leaf)); assert(inum < ibase(leaf) + btree->entries_per_leaf); unsigned at = inum - ibase(leaf), size = 0; void *attrs = NULL; printf("lookup inode 0x%Lx, %Lx + %x\n", (L)inum, (L)ibase(leaf), at); if (at < icount(leaf)) { be_u16 *dict = (void *)leaf + btree->sb->blocksize; unsigned offset = atdict(dict, at); if ((size = from_be_u16(*(dict - at - 1)) - offset)) attrs = leaf->table + offset; } *result = size; return attrs; }
/* * Enumerate inum * (callback for btree_traverse()) */ int ileaf_enumerate(struct btree *btree, tuxkey_t key_bottom, tuxkey_t key_limit, void *leaf, tuxkey_t key, u64 len, void *data) { struct ileaf *ileaf = leaf; __be16 *dict = ileaf_dict(btree, ileaf); struct ileaf_enumrate_cb *cb = data; tuxkey_t at, base = ibase(ileaf); unsigned count; at = key - base; count = min_t(u64, key + len - base, icount(ileaf)); if (at < count) { unsigned offset = atdict(dict, at); for (; at < count; at++) { unsigned size, limit; inum_t inum; void *attrs; int err; limit = __atdict(dict, at + 1); if (limit <= offset) continue; attrs = ileaf->table + offset; size = limit - offset; inum = base + at; err = cb->callback(btree, inum, attrs, size, cb->data); if (err) return err; offset = limit; } } return 0; }
static tuxkey_t ileaf_split(struct btree *btree, tuxkey_t inum, vleaf *from, vleaf *into) { assert(ileaf_sniff(btree, from)); struct ileaf *leaf = from, *dest = into; be_u16 *dict = from + btree->sb->blocksize, *destdict = into + btree->sb->blocksize; #ifdef SPLIT_AT_INUM printf("split at inum 0x%Lx\n", (L)inum); assert(inum >= ibase(leaf)); unsigned at = inum - ibase(leaf) < icount(leaf) ? inum - ibase(leaf) : icount(leaf); #else /* binsearch inum starting nearest middle of block */ unsigned at = 1, hi = icount(leaf); while (at < hi) { int mid = (at + hi) / 2; if (*(dict - mid) < (btree->sb->blocksize / 2)) at = mid + 1; else hi = mid; } #endif /* should trim leading empty inodes on copy */ unsigned split = atdict(dict, at), free = from_be_u16(*(dict - icount(leaf))); printf("split at %x of %x\n", at, icount(leaf)); printf("copy out %x bytes at %x\n", free - split, split); assert(free >= split); memcpy(dest->table, leaf->table + split, free - split); dest->count = to_be_u16(icount(leaf) - at); veccopy(destdict - icount(dest), dict - icount(leaf), icount(dest)); for (int i = 1; i <= icount(dest); i++) add_idict(destdict - i, -split); #ifdef SPLIT_AT_INUM /* round down to multiple of 64 above ibase */ inum_t round = inum & ~(inum_t)(btree->entries_per_leaf - 1); dest->ibase = to_be_u64(round > ibase(leaf) + icount(leaf) ? round : inum); #else dest->ibase = to_be_u64(ibase(leaf) + at); #endif leaf->count = to_be_u16(at); memset(leaf->table + split, 0, (char *)(dict - icount(leaf)) - (leaf->table + split)); ileaf_trim(btree, leaf); return ibase(dest); }
static void *ileaf_resize(struct btree *btree, tuxkey_t inum, void *vleaf, int newsize) { struct ileaf *ileaf = vleaf; __be16 *dict = ileaf_dict(btree, ileaf); unsigned count = icount(ileaf); tuxkey_t at = inum - ibase(ileaf); int extend_dict, offset, size; assert(inum >= ibase(ileaf)); /* Get existent attributes, and calculate expand/shrink size */ if (at + 1 > count) { /* Check size roughly to avoid overflow int */ if ((at + 1) * sizeof(*dict) >= btree->sb->blocksize) goto overflow; /* Need to extend dict */ extend_dict = (at + 1 - count) * sizeof(*dict); offset = atdict(dict, count); size = 0; } else { /* "at" is in dict, so get attr size */ extend_dict = 0; offset = atdict(dict, at); size = __atdict(dict, at + 1) - offset; } if (ileaf_free(btree, ileaf) < newsize - size + extend_dict) { overflow: return NULL; } /* Extend dict */ if (extend_dict) { __be16 limit = cpu_to_be16(atdict(dict, count)); while (count < at + 1) { count++; *(dict - count) = limit; } ileaf->count = cpu_to_be16(count); } void *attrs = ileaf->table + offset; if (newsize != size) { /* Expand/Shrink attr space */ unsigned limit = __atdict(dict, count); assert(limit >= offset + size); memmove(attrs + newsize, attrs + size, limit - offset - size); /* Adjust dict */ int diff = newsize - size; at++; while (at <= count) { add_idict(dict - at, diff); at++; } } return attrs; }