Exemple #1
0
/* search in a node's child */
int _search_child(struct cursor *cur,
                  struct search *so,
                  struct node *n,
                  int childnum)
{
	int ret;
	NID child_nid;
	int child_to_search;
	struct node *child;

	nassert(n->height > 0);
	ancestors_append(cur, n->parts[childnum].msgbuf);

	child_nid = n->parts[childnum].child_nid;
	if (!cache_get_and_pin(cur->tree->cf, child_nid, (void**)&child, L_READ)) {
		__ERROR("cache get node error, nid [%" PRIu64 "]", child_nid);

		return NESS_ERR;
	}

	child_to_search = _search_in_which_child(so, child);
	ret = _search_node(cur, so, child, child_to_search);

	/* unpin */
	cache_unpin(cur->tree->cf, child->cpair);

	return ret;
}
Exemple #2
0
/*
 * EFFECT:
 *	- flush in background thread
 * ENTER:
 *	- parent is already locked
 * EXIT:
 *	- nodes are all unlocked
 */
void tree_flush_node_on_background(struct tree *t, struct node *parent)
{
    LOG;
	int childnum;
	enum reactivity re;
	struct node *child;
	struct partition *part;

	nassert(parent->height > 0);
	childnum = node_find_heaviest_idx(parent);
	part = &parent->parts[childnum];

	/* pin the child */
	if (cache_get_and_pin(t->cf, part->child_nid, (void**)&child, L_WRITE) != NESS_OK) {
		__ERROR("cache get node error, nid [%" PRIu64 "]", part->child_nid);
		return;
	}

	re = get_reactivity(t, child);
	if (re == STABLE) {
		/* detach buffer from parent */
		struct nmb *buf = part->ptr.u.nonleaf->buffer;
		node_set_dirty(parent);
		part->ptr.u.nonleaf->buffer = nmb_new(t->e);

		/* flush it in background thread */
		_place_node_and_buffer_on_background(t, child, buf);
		cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent));
	} else {
		/* the child is reactive, we deal it in main thread */
		_child_maybe_reactivity(t, parent, child);
	}
}
Exemple #3
0
/*
 * PROCESS:
 *	- put cmd to root
 *	- check root reactivity
 *	    -- FISSIBLE: split root
 *	    -- FLUSHABLE: flush root in background
 * ENTER:
 *	- no nodes are locked
 * EXITS:
 *	- all nodes are unlocked
 */
int root_put_cmd(struct tree *t, struct bt_cmd *cmd)
{
    LOG;
	struct node *root;
	enum reactivity re;
	volatile int hasput = 0;
	enum lock_type locktype = L_READ;
    /* printf("||||%s", (char *)cmd->val->data); */

CHANGE_LOCK_TYPE:
	if (!cache_get_and_pin(t->cf, t->hdr->root_nid, (void**)&root, locktype))
		return NESS_ERR;
    /* printf("|||%lld", root->nid); */
	if (!hasput) {
		node_put_cmd(t, root, cmd);
		hasput = 1;
	}

	re = get_reactivity(t, root);
	switch (re) {
	case STABLE:
        printf("|||STABLE");
		cache_unpin(t->cf, root->cpair, make_cpair_attr(root));
		break;
	case FISSIBLE:
        printf("|||FISSIBLE");
		if (locktype == L_READ) {
			cache_unpin(t->cf, root->cpair, make_cpair_attr(root));
			locktype = L_WRITE;
			goto CHANGE_LOCK_TYPE;
		}
		_root_fissible(t, root);
		break;
	case FLUSHBLE:
        printf("|||FLUSHBLE");
		if (locktype == L_READ) {
			cache_unpin(t->cf, root->cpair, make_cpair_attr(root));
			locktype = L_WRITE;
			goto CHANGE_LOCK_TYPE;
		}
		tree_flush_node_on_background(t, root);
		break;
	}

	return NESS_OK;
}
Exemple #4
0
/*
 *			|key44, key88|
 *			/		\
 *		  |key10, key20|	|key90|
 *	       /	|	 \	      \
 * |msgbuf0|	   |msgbuf1| |msgbuf2|    |msgbuf3|
 *
 *		      (a tree with height 2)
 *
 * cursor search is very similar to depth-first-search algorithm.
 * for cursor_seektofirst operation, the root-to-leaf path is:
 * key44 -> key10 -> msgbuf0
 * and do the inner sliding along with the msgbuf.
 * if we get the end of one leaf, CURSOR_EOF will be returned to upper on,
 * and we also set search->pivot_bound = key10, for the next time,
 * the root-to-leaf path(restart with a jump) will be:
 * key44 -> key10 -> msgbuf1
 */
void _tree_search(struct cursor * cur, struct search * so)
{
	int r;
	NID root_nid;
	int child_to_search;
	struct buftree *t;
	struct node *root;

	t = cur->tree;

TRY_AGAIN:
	root_nid = t->hdr->root_nid;
	if (!cache_get_and_pin(t->cf, root_nid, (void**)&root, L_READ)) {
		__ERROR("cache get root node error, nid [%" PRIu64 "]", root_nid);

		return;
	}

	child_to_search = _search_in_which_child(so, root);
	r = _search_node(cur, so, root, child_to_search);

	/* unpin */
	cache_unpin(t->cf, root->cpair);

	switch (r) {
	case CURSOR_CONTINUE:	  /* got the end of leaf */
		goto TRY_AGAIN;
		break;
	case CURSOR_TRY_AGAIN:	  /* got the end of node */
		goto TRY_AGAIN;
		break;
	case CURSOR_EOF:
		break;
	default:
		break;
	}
}
Exemple #5
0
/*
 *			|key44, key88|
 *			/		\
 *		  |key10, key20|	|key90|
 *	       /	|	 \	      \
 * |msgbuf0|	   |msgbuf1| |msgbuf2|    |msgbuf3|
 *
 *		      (a tree with height 2)
 *
 * cursor search is very similar to depth-first-search algorithm.
 * for cursor_seektofirst operation, the root-to-leaf path is:
 * key44 -> key10 -> msgbuf0
 * and do the inner sliding along with the msgbuf.
 * if we get the end of one leaf, CURSOR_EOF will be returned to upper on,
 * and we also set search->pivot_bound = key10, for the next time,
 * the root-to-leaf path(restart with a jump) will be:
 * key44 -> key10 -> msgbuf1
 */
void _tree_search(struct cursor * cur, struct search * so)
{
	int r;
	NID root_nid;
	int child_to_search;
	struct tree *t;
	struct node *root;

	t = cur->tree;

try_again:
	root_nid = t->hdr->root_nid;
	if (cache_get_and_pin(t->cf, root_nid, &root, L_READ) < 0) {
		__ERROR("cache get root node error, nid [%" PRIu64 "]",
		        root_nid);

		return;
	}

	child_to_search = _search_in_which_child(so, root);
	r = _search_node(cur, so, root, child_to_search);

	/* unpin */
	cache_unpin_readonly(t->cf, root);

	switch (r) {
	case CURSOR_CONTINUE:
		break;
	case CURSOR_TRY_AGAIN:
		goto try_again;
		break;
	case CURSOR_EOF:
		break;
	default:
		break;
	}
}
Exemple #6
0
struct tree *tree_open(const char *dbname,
                       struct env *e,
                       struct tree_callback *tcb) {
	int fd;
	int flag;
	mode_t mode;
	int is_create = 0;
	struct tree *t;
	struct node *root;
	struct cache_file *cf;

	t = xcalloc(1, sizeof(*t));
	t->e = e;

	mode = S_IRWXU | S_IRWXG | S_IRWXO;
	flag = O_RDWR | O_BINARY;
	if (e->use_directio)
		fd = ness_os_open_direct(dbname, flag, mode);
	else
		fd = ness_os_open(dbname, flag, mode);

	if (fd == -1) {
		if (e->use_directio)
			fd = ness_os_open(dbname, flag | O_CREAT, mode);
		else
			fd = ness_os_open_direct(dbname, flag | O_CREAT, mode);
		if (fd == -1)
			goto ERR;
		is_create = 1;
	}

	t->fd = fd;
	t->hdr = hdr_new(e);

	/* tree header */
	if (!is_create) {
		tcb->fetch_hdr_cb(fd, t->hdr);
	}

	/* create cache file */
	cf = cache_file_create(e->cache, t->fd, t->hdr, tcb);
	t->cf = cf;

	/* tree root node */
	if (is_create) {
		NID nid = hdr_next_nid(t->hdr);
		node_create(nid, 0, 1, t->hdr->version, t->e, &root);
		cache_put_and_pin(cf, nid, root);
		root->isroot = 1;
		node_set_dirty(root);

		cache_unpin(cf, root->cpair, make_cpair_attr(root));
		t->hdr->root_nid = root->nid;
		__DEBUG("create new root, NID %"PRIu64, root->nid);
	} else {
		/* get the root node */
		if (cache_get_and_pin(cf, t->hdr->root_nid, (void**)&root, L_READ) != NESS_OK)
			__PANIC("get root from cache error [%" PRIu64 "]", t->hdr->root_nid);

		root->isroot = 1;
		cache_unpin(cf, root->cpair, make_cpair_attr(root));
		__DEBUG("fetch root, NID %"PRIu64, root->nid);
	}

	return t;

ERR:
	xfree(t);
	return NESS_ERR;
}
Exemple #7
0
void _flush_buffer_to_child(struct tree *t, struct node *child, struct nmb *buf)
{
	struct mb_iter iter;

	mb_iter_init(&iter, buf->pma);
	while (mb_iter_next(&iter)) {
		/* TODO(BohuTANG): check msn */
		struct nmb_values nvalues;

		nmb_get_values(&iter, &nvalues);

		struct bt_cmd cmd = {
			.msn = nvalues.msn,
			.type = nvalues.type,
			.key = &nvalues.key,
			.val = &nvalues.val,
			.xidpair = nvalues.xidpair
		};
		node_put_cmd(t, child, &cmd);
	}
}

void _flush_some_child(struct tree *t, struct node *parent);

/*
 * PROCESS:
 *	- check child reactivity
 *	- if FISSIBLE: split child
 *	- if FLUSHBLE: flush buffer from child
 * ENTER:
 *	- parent is already locked
 *	- child is already locked
 * EXIT:
 *	- parent is unlocked
 *	- no nodes are locked
 */
void _child_maybe_reactivity(struct tree *t, struct node *parent, struct node *child)
{
	enum reactivity re = get_reactivity(t, child);

	switch (re) {
	case STABLE:
		cache_unpin(t->cf, child->cpair, make_cpair_attr(child));
		cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent));
		break;
	case FISSIBLE:
		node_split_child(t, parent, child);
		cache_unpin(t->cf, child->cpair, make_cpair_attr(child));
		cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent));
		break;
	case FLUSHBLE:
		cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent));
		_flush_some_child(t, child);
		break;
	}
}

/*
 * PROCESS:
 *	- pick a heaviest child of parent
 *	- flush from parent to child
 *	- maybe split/flush child recursively
 * ENTER:
 *	- parent is already locked
 * EXIT:
 *	- parent is unlocked
 *	- no nodes are locked
 */
void _flush_some_child(struct tree *t, struct node *parent)
{
	int childnum;
	enum reactivity re;
	struct node *child;
	struct partition *part;
	struct nmb *buffer;
	struct timespec t1, t2;

	childnum = node_find_heaviest_idx(parent);
	nassert(childnum < parent->n_children);
	part = &parent->parts[childnum];
	buffer = part->ptr.u.nonleaf->buffer;
	if (cache_get_and_pin(t->cf, part->child_nid, (void**)&child, L_WRITE) != NESS_OK) {
		__ERROR("cache get node error, nid [%" PRIu64 "]", part->child_nid);
		return;
	}

	ngettime(&t1);
	re = get_reactivity(t, child);
	if (re == STABLE) {
		node_set_dirty(parent);
		part->ptr.u.nonleaf->buffer = nmb_new(t->e);
		_flush_buffer_to_child(t, child, buffer);
		nmb_free(buffer);
	}
	ngettime(&t2);
	status_add(&t->e->status->tree_flush_child_costs, time_diff_ms(t1, t2));
	status_increment(&t->e->status->tree_flush_child_nums);

	_child_maybe_reactivity(t, parent, child);
}