/* * EFFECT: * - split leaf&lmb into two leaves:a & b * a&b are both the half of the lmb * * PROCESS: * - leaf: * +-----------------------------------+ * | 0 | 1 | 2 | 3 | 4 | 5 | * +-----------------------------------+ * * - split: * root * +--------+ * | 2 | * +--------+ * / \ * +-----------------+ +------------------+ * | 0 | 1 | 2 | | 3 | 4 | 5 | * +-----------------+ +------------------+ * nodea nodeb * * ENTER: * - leaf is already locked (L_WRITE) * EXITS: * - a is locked * - b is locked */ void leaf_split(void *tree, struct node *node, struct node **a, struct node **b, struct msg **split_key) { struct partition *pa; struct partition *pb; struct node *leafa; struct node *leafb; struct lmb *mb; struct lmb *mba; struct lmb *mbb; struct msg *sp_key = NULL; struct buftree *t = (struct buftree*)tree; leafa = node; pa = &leafa->parts[0]; /* split lmb of leaf to mba & mbb */ mb = pa->msgbuf; lmb_split(mb, &mba, &mbb, &sp_key); lmb_free(mb); /* reset leafa buffer */ pa->msgbuf = mba; /* new leafb */ NID nid = hdr_next_nid(t->hdr); leaf_new(t->hdr, nid, 0, 1, &leafb); leaf_msgbuf_init(leafb); cache_put_and_pin(t->cf, nid, leafb); pb = &leafb->parts[0]; lmb_free(pb->msgbuf); pb->msgbuf = mbb; /* set dirty */ node_set_dirty(leafa); node_set_dirty(leafb); *a = leafa; *b = leafb; *split_key = sp_key; }
/* * EFFECT: * - split the fissible root */ void _root_fissible(struct tree *t, struct node *root) { struct node *new_root; uint32_t new_root_height = 1; uint32_t new_root_children = 2; /* alloc a nonleaf node with 2 children */ NID nid = hdr_next_nid(t->hdr); node_create(nid, new_root_height, new_root_children, t->hdr->version, t->e, &new_root); cache_put_and_pin(t->cf, nid, new_root); _root_split(t, new_root, root); cache_unpin(t->cf, root->cpair, make_cpair_attr(root)); cache_unpin(t->cf, new_root->cpair, make_cpair_attr(new_root)); }
struct tree *tree_open(const char *dbname, struct env *e, struct tree_callback *tcb) { int fd; int flag; mode_t mode; int is_create = 0; struct tree *t; struct node *root; struct cache_file *cf; t = xcalloc(1, sizeof(*t)); t->e = e; mode = S_IRWXU | S_IRWXG | S_IRWXO; flag = O_RDWR | O_BINARY; if (e->use_directio) fd = ness_os_open_direct(dbname, flag, mode); else fd = ness_os_open(dbname, flag, mode); if (fd == -1) { if (e->use_directio) fd = ness_os_open(dbname, flag | O_CREAT, mode); else fd = ness_os_open_direct(dbname, flag | O_CREAT, mode); if (fd == -1) goto ERR; is_create = 1; } t->fd = fd; t->hdr = hdr_new(e); /* tree header */ if (!is_create) { tcb->fetch_hdr_cb(fd, t->hdr); } /* create cache file */ cf = cache_file_create(e->cache, t->fd, t->hdr, tcb); t->cf = cf; /* tree root node */ if (is_create) { NID nid = hdr_next_nid(t->hdr); node_create(nid, 0, 1, t->hdr->version, t->e, &root); cache_put_and_pin(cf, nid, root); root->isroot = 1; node_set_dirty(root); cache_unpin(cf, root->cpair, make_cpair_attr(root)); t->hdr->root_nid = root->nid; __DEBUG("create new root, NID %"PRIu64, root->nid); } else { /* get the root node */ if (cache_get_and_pin(cf, t->hdr->root_nid, (void**)&root, L_READ) != NESS_OK) __PANIC("get root from cache error [%" PRIu64 "]", t->hdr->root_nid); root->isroot = 1; cache_unpin(cf, root->cpair, make_cpair_attr(root)); __DEBUG("fetch root, NID %"PRIu64, root->nid); } return t; ERR: xfree(t); return NESS_ERR; }
/* * +-----------------------------------------------+ * | 5 | 7 | 9 | * +-----------------------------------------------+ * | * +---------------+ * | 60 | 61 | 62 | * +---------------+ * * +---------------------------------------------------------------+ * | 5 | 60 | 7 | 9 | * +---------------------------------------------------------------+ * | | * +--------+ +---------+ * | 60 | | 61 | 62 | * +--------+ +---------+ * * * ENTER: * - node is already locked(L_WRITE) * EXITS: * - a is locked(L_WRITE) * - b is locked(L_WRITE) */ static void _node_split(struct tree *t, struct node *node, struct node **a, struct node **b, struct msg **split_key) { int i; int pivots_old; int pivots_in_a; int pivots_in_b; struct node *nodea; struct node *nodeb; struct msg *spk; __DEBUG("nonleaf split begin, NID %"PRIu64"" ", nodesz %d" ", nodec %d" ", children %d" , node->nid , node_size(node) , node_count(node) , node->n_children); nodea = node; pivots_old = node->n_children - 1; nassert(pivots_old > 2); pivots_in_a = pivots_old / 2; pivots_in_b = pivots_old - pivots_in_a; /* node a */ nodea->n_children = pivots_in_a + 1; /* node b */ NID nid = hdr_next_nid(t->hdr); node_create_light(nid, node->height > 0 ? 1 : 0, pivots_in_b + 1, t->hdr->version, t->e, &nodeb); cache_put_and_pin(t->cf, nid, nodeb); for (i = 0; i < (pivots_in_b); i++) nodeb->pivots[i] = nodea->pivots[pivots_in_a + i]; for (i = 0; i < (pivots_in_b + 1); i++) nodeb->parts[i] = nodea->parts[pivots_in_a + i]; /* the rightest partition of nodea */ struct child_pointer *ptr = &nodea->parts[pivots_in_a].ptr; if (nodea->height > 0) ptr->u.nonleaf = create_nonleaf(t->e); else ptr->u.leaf = create_leaf(t->e); /* split key */ spk = msgdup(&node->pivots[pivots_in_a - 1]); node_set_dirty(nodea); node_set_dirty(nodeb); __DEBUG("nonleaf split end, nodea NID %"PRIu64"" ", nodesz %d" ", nodec %d" ", children %d" , nodea->nid , node_size(nodea) , node_count(nodea) , nodea->n_children); __DEBUG("nonleaf split end, nodeb NID %"PRIu64"" ", nodesz %d" ", nodec %d" ", children %d" , nodeb->nid , node_size(nodeb) , node_count(nodeb) , nodeb->n_children); *a = nodea; *b = nodeb; *split_key = spk; }
/* * EFFECT: * - split leaf&lmb into two leaves:a & b * a&b are both the half of the lmb * * PROCESS: * - leaf: * +-----------------------------------+ * | 0 | 1 | 2 | 3 | 4 | 5 | * +-----------------------------------+ * * - split: * root * +--------+ * | 2 | * +--------+ * / \ * +-----------------+ +------------------+ * | 0 | 1 | 2 | | 3 | 4 | 5 | * +-----------------+ +------------------+ * nodea nodeb * * ENTER: * - leaf is already locked (L_WRITE) * EXITS: * - a is locked * - b is locked */ static void _leaf_and_lmb_split(struct tree *t, struct node *leaf, struct node **a, struct node **b, struct msg **split_key) { struct child_pointer *cptra; struct child_pointer *cptrb; struct node *leafa; struct node *leafb; struct lmb *mb; struct lmb *mba; struct lmb *mbb; struct msg *sp_key = NULL; __DEBUG("leaf split begin, NID %"PRIu64"" ", nodesz %d" ", nodec %d" ", children %d" , leaf->nid , node_size(leaf) , node_count(leaf) , leaf->n_children); leafa = leaf; cptra = &leafa->parts[0].ptr; /* split lmb of leaf to mba & mbb */ mb = cptra->u.leaf->buffer; lmb_split(mb, &mba, &mbb, &sp_key); lmb_free(mb); /* reset leafa buffer */ cptra->u.leaf->buffer = mba; /* new leafb */ NID nid = hdr_next_nid(t->hdr); node_create(nid, 0, 1, t->hdr->version, t->e, &leafb); cache_put_and_pin(t->cf, nid, leafb); cptrb = &leafb->parts[0].ptr; lmb_free(cptrb->u.leaf->buffer); cptrb->u.leaf->buffer = mbb; /* set dirty */ node_set_dirty(leafa); node_set_dirty(leafb); __DEBUG("leaf split end, leafa NID %"PRIu64"" ", nodesz %d" ", nodec %d" ", children %d" , leafa->nid , node_size(leafa) , node_count(leafa) , leafa->n_children); __DEBUG("leaf split end, leafb NID %"PRIu64"" ", nodesz %d" ", nodec %d" ", children %d" , leafb->nid , node_size(leafb) , node_count(leafb) , leafb->n_children); *a = leafa; *b = leafb; *split_key = sp_key; status_increment(&t->e->status->tree_leaf_split_nums); }