Beispiel #1
0
/*
 * __wt_btcur_next_random --
 *	Move to a random record in the tree.
 */
int
__wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	btree = cbt->btree;
	WT_DSTAT_INCR(session, cursor_next);

retry:	WT_RET(__cursor_func_init(cbt, 1));
	__cursor_position_clear(cbt);

	/*
	 * Only supports row-store: applications can trivially select a random
	 * value from a column-store, if there were any reason to do so.
	 */
	WT_ERR(btree->type == BTREE_ROW ?
	    __wt_row_random(session, cbt) : ENOTSUP);
	ret = cbt->compare == 0 ?
	    __wt_kv_return(session, cbt) : WT_NOTFOUND;

err:	if (ret == WT_RESTART)
		goto retry;
	WT_TRET(__cursor_func_resolve(cbt, ret));
	return (ret);
}
Beispiel #2
0
/*
 * __wt_btcur_update --
 *	Update a record in the tree.
 */
int
__wt_btcur_update(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_SESSION_IMPL *session;
	int ret;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;
	WT_BSTAT_INCR(session, cursor_updates);

	if (btree->type == BTREE_ROW)
		WT_RET(__cursor_size_chk(session, &cursor->key));
	WT_RET(__cursor_size_chk(session, &cursor->value));

retry:	__cursor_func_init(cbt, 1);

	switch (btree->type) {
	case BTREE_COL_FIX:
		if (cursor->value.size != 1)
			WT_RET_MSG(session, EINVAL,
			    "item size of %" PRIu32 " does not match "
			    "fixed-length file requirement of 1 byte",
			    cursor->value.size);
		/* FALLTHROUGH */
	case BTREE_COL_VAR:
		WT_ERR(__wt_col_search(session, cbt, 1));

		/*
		 * Update the record if it exists.  Creating a record past the
		 * end of the tree in a fixed-length column-store implicitly
		 * fills the gap with empty records.  Update the record in that
		 * case, the record exists.
		 */
		if ((cbt->compare != 0 || __cursor_invalid(cbt)) &&
		    !__cursor_fix_implicit(btree, cbt))
			ret = WT_NOTFOUND;
		else if ((ret = __wt_col_modify(session, cbt, 3)) == WT_RESTART)
			goto retry;
		break;
	case BTREE_ROW:
		/* Update the record it it exists. */
		WT_ERR(__wt_row_search(session, cbt, 1));
		if (cbt->compare != 0 || __cursor_invalid(cbt))
			ret = WT_NOTFOUND;
		else if ((ret = __wt_row_modify(session, cbt, 0)) == WT_RESTART)
			goto retry;
		break;
	WT_ILLEGAL_VALUE(session);
	}

err:	__cursor_func_resolve(cbt, ret);

	return (ret);
}
Beispiel #3
0
/*
 * __wt_btcur_remove --
 *	Remove a record from the tree.
 */
int
__wt_btcur_remove(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_SESSION_IMPL *session;
	int ret;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;
	WT_BSTAT_INCR(session, cursor_removes);

	if (btree->type == BTREE_ROW)
		WT_RET(__cursor_size_chk(session, &cursor->key));

retry:	__cursor_func_init(cbt, 1);

	switch (btree->type) {
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		WT_ERR(__wt_col_search(session, cbt, 1));

		/*
		 * Remove the record if it exists.  Creating a record past the
		 * end of the tree in a fixed-length column-store implicitly
		 * fills the gap with empty records.  Return success in that
		 * case, the record was deleted successfully.
		 */
		if (cbt->compare != 0 || __cursor_invalid(cbt))
			ret =
			    __cursor_fix_implicit(btree, cbt) ? 0 : WT_NOTFOUND;
		else if ((ret = __wt_col_modify(session, cbt, 2)) == WT_RESTART)
			goto retry;
		break;
	case BTREE_ROW:
		/* Remove the record if it exists. */
		WT_ERR(__wt_row_search(session, cbt, 1));
		if (cbt->compare != 0 || __cursor_invalid(cbt))
			ret = WT_NOTFOUND;
		else if ((ret = __wt_row_modify(session, cbt, 1)) == WT_RESTART)
			goto retry;
		break;
	WT_ILLEGAL_VALUE(session);
	}

err:	__cursor_func_resolve(cbt, ret);

	return (ret);
}
Beispiel #4
0
/*
 * __wt_btcur_search --
 *	Search for a matching record in the tree.
 */
int
__wt_btcur_search(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_ITEM *val;
	WT_SESSION_IMPL *session;
	int ret;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;
	WT_BSTAT_INCR(session, cursor_read);

	if (btree->type == BTREE_ROW)
		WT_RET(__cursor_size_chk(session, &cursor->key));

	__cursor_func_init(cbt, 1);

	WT_ERR(btree->type == BTREE_ROW ?
	    __wt_row_search(session, cbt, 0) :
	    __wt_col_search(session, cbt, 0));
	if (cbt->compare != 0 || __cursor_invalid(cbt)) {
		/*
		 * Creating a record past the end of the tree in a fixed-length
		 * column-store implicitly fills the gap with empty records.
		 */
		if (__cursor_fix_implicit(btree, cbt)) {
			cbt->v = 0;
			val = &cbt->iface.value;
			val->data = &cbt->v;
			val->size = 1;
		} else
			ret = WT_NOTFOUND;
	} else
		ret = __wt_kv_return(session, cbt, 0);

err:	__cursor_func_resolve(cbt, ret);

	return (ret);
}
Beispiel #5
0
/*
 * __wt_btcur_prev --
 *	Move to the previous record in the tree.
 */
int
__wt_btcur_prev(WT_CURSOR_BTREE *cbt, int discard)
{
	WT_DECL_RET;
	WT_PAGE *page;
	WT_SESSION_IMPL *session;
	uint32_t flags;
	int newpage;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	WT_DSTAT_INCR(session, cursor_prev);

	flags = WT_TREE_SKIP_INTL | WT_TREE_PREV;	/* Tree walk flags. */
	if (discard)
		LF_SET(WT_TREE_DISCARD);

retry:	WT_RET(__cursor_func_init(cbt, 0));
	__cursor_position_clear(cbt);

	/*
	 * If we aren't already iterating in the right direction, there's
	 * some setup to do.
	 */
	if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV))
		__wt_btcur_iterate_setup(cbt, 0);

	/*
	 * If this is a modification, we're about to read information from the
	 * page, save the write generation.
	 */
	page = cbt->page;
	if (discard && page != NULL) {
		WT_ERR(__wt_page_modify_init(session, page));
		WT_ORDERED_READ(cbt->write_gen, page->modify->write_gen);
	}

	/*
	 * Walk any page we're holding until the underlying call returns not-
	 * found.  Then, move to the previous page, until we reach the start
	 * of the file.
	 */
	for (newpage = 0;; newpage = 1) {
		if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
			switch (page->type) {
			case WT_PAGE_COL_FIX:
				ret = __cursor_fix_append_prev(cbt, newpage);
				break;
			case WT_PAGE_COL_VAR:
				ret = __cursor_var_append_prev(cbt, newpage);
				break;
			WT_ILLEGAL_VALUE_ERR(session);
			}
			if (ret == 0)
				break;
			F_CLR(cbt, WT_CBT_ITERATE_APPEND);
			if (ret != WT_NOTFOUND)
				break;
			newpage = 1;
		}
		if (page != NULL) {
			switch (page->type) {
			case WT_PAGE_COL_FIX:
				ret = __cursor_fix_prev(cbt, newpage);
				break;
			case WT_PAGE_COL_VAR:
				ret = __cursor_var_prev(cbt, newpage);
				break;
			case WT_PAGE_ROW_LEAF:
				ret = __cursor_row_prev(cbt, newpage);
				break;
			WT_ILLEGAL_VALUE_ERR(session);
			}
			if (ret != WT_NOTFOUND)
				break;
		}

		cbt->page = NULL;
		WT_ERR(__wt_tree_walk(session, &page, flags));
		WT_ERR_TEST(page == NULL, WT_NOTFOUND);
		WT_ASSERT(session,
		    page->type != WT_PAGE_COL_INT &&
		    page->type != WT_PAGE_ROW_INT);
		cbt->page = page;

		/* Initialize the page's modification information */
		if (discard) {
			WT_ERR(__wt_page_modify_init(session, page));
			WT_ORDERED_READ(
			    cbt->write_gen, page->modify->write_gen);
		}

		/*
		 * The last page in a column-store has appended entries.
		 * We handle it separately from the usual cursor code:
		 * it's only that one page and it's in a simple format.
		 */
		if (page->type != WT_PAGE_ROW_LEAF &&
		    (cbt->ins_head = WT_COL_APPEND(page)) != NULL)
			F_SET(cbt, WT_CBT_ITERATE_APPEND);
	}

err:	if (ret == WT_RESTART)
		goto retry;
	WT_TRET(__cursor_func_resolve(cbt, ret));
	return (ret);
}
Beispiel #6
0
/*
 * __wt_btcur_prev --
 *	Move to the previous record in the tree.
 */
int
__wt_btcur_prev(WT_CURSOR_BTREE *cbt)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	int newpage;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	WT_BSTAT_INCR(session, cursor_read_prev);

	__cursor_func_init(cbt, 0);

	/*
	 * If we aren't already iterating in the right direction, there's
	 * some setup to do.
	 */
	if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV))
		__wt_btcur_iterate_setup(cbt, 0);

	/*
	 * Walk any page we're holding until the underlying call returns not-
	 * found.  Then, move to the previous page, until we reach the start
	 * of the file.
	 */
	for (newpage = 0;; newpage = 1) {
		if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
			switch (cbt->page->type) {
			case WT_PAGE_COL_FIX:
				ret = __cursor_fix_append_prev(cbt, newpage);
				break;
			case WT_PAGE_COL_VAR:
				ret = __cursor_var_append_prev(cbt, newpage);
				break;
			WT_ILLEGAL_VALUE_ERR(session);
			}
			if (ret == 0)
				break;
			F_CLR(cbt, WT_CBT_ITERATE_APPEND);
			if (ret != WT_NOTFOUND)
				break;
			newpage = 1;
		}
		if (cbt->page != NULL) {
			switch (cbt->page->type) {
			case WT_PAGE_COL_FIX:
				ret = __cursor_fix_prev(cbt, newpage);
				break;
			case WT_PAGE_COL_VAR:
				ret = __cursor_var_prev(cbt, newpage);
				break;
			case WT_PAGE_ROW_LEAF:
				ret = __cursor_row_prev(cbt, newpage);
				break;
			WT_ILLEGAL_VALUE_ERR(session);
			}
			if (ret != WT_NOTFOUND)
				break;
		}

		do {
			WT_ERR(__wt_tree_np(session, &cbt->page, 0, 0));
			WT_ERR_TEST(cbt->page == NULL, WT_NOTFOUND);
		} while (
		    cbt->page->type == WT_PAGE_COL_INT ||
		    cbt->page->type == WT_PAGE_ROW_INT);

		/*
		 * The last page in a column-store has appended entries.
		 * We handle it separately from the usual cursor code:
		 * it's only that one page and it's in a simple format.
		 */
		if (cbt->page->type != WT_PAGE_ROW_LEAF &&
		    (cbt->ins_head = WT_COL_APPEND(cbt->page)) != NULL)
			F_SET(cbt, WT_CBT_ITERATE_APPEND);
	}

err:	__cursor_func_resolve(cbt, ret);
	return (ret);
}
Beispiel #7
0
/*
 * __wt_btcur_insert --
 *	Insert a record into the tree.
 */
int
__wt_btcur_insert(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_CURSOR *cursor;
	WT_SESSION_IMPL *session;
	int ret;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;
	WT_BSTAT_INCR(session, cursor_inserts);

	if (btree->type == BTREE_ROW)
		WT_RET(__cursor_size_chk(session, &cursor->key));
	WT_RET(__cursor_size_chk(session, &cursor->value));

retry:	__cursor_func_init(cbt, 1);

	switch (btree->type) {
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		/*
		 * If WT_CURSTD_APPEND is set, insert a new record (ignoring
		 * the application's record number).  First we search for the
		 * maximum possible record number so the search ends on the
		 * last page.  The real record number is assigned by the
		 * serialized append operation.
		 * __wt_col_append_serial_func
		 */
		if (F_ISSET(cursor, WT_CURSTD_APPEND))
			cbt->iface.recno = UINT64_MAX;

		WT_ERR(__wt_col_search(session, cbt, 1));

		if (F_ISSET(cursor, WT_CURSTD_APPEND))
			cbt->iface.recno = 0;

		/*
		 * If WT_CURSTD_OVERWRITE set, insert/update the key/value pair.
		 *
		 * If WT_CURSTD_OVERWRITE not set, fail if the key exists, else
		 * insert the key/value pair.  Creating a record past the end
		 * of the tree in a fixed-length column-store implicitly fills
		 * the gap with empty records.  Fail in that case, the record
		 * exists.
		 */
		if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
		    ((cbt->compare == 0 && !__cursor_invalid(cbt)) ||
		    (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) {
			ret = WT_DUPLICATE_KEY;
			break;
		}
		if ((ret = __wt_col_modify(session, cbt, 3)) == WT_RESTART)
			goto retry;
		if (F_ISSET(cursor, WT_CURSTD_APPEND) && ret == 0)
			cbt->iface.recno = cbt->recno;
		break;
	case BTREE_ROW:
		/*
		 * If WT_CURSTD_OVERWRITE not set, fail if the key exists, else
		 * insert the key/value pair.
		 *
		 * If WT_CURSTD_OVERWRITE set, insert/update the key/value pair.
		 */
		WT_ERR(__wt_row_search(session, cbt, 1));
		if (cbt->compare == 0 &&
		    !__cursor_invalid(cbt) &&
		    !F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
			ret = WT_DUPLICATE_KEY;
			break;
		}
		if ((ret = __wt_row_modify(session, cbt, 0)) == WT_RESTART)
			goto retry;
		break;
	WT_ILLEGAL_VALUE(session);
	}

err:	__cursor_func_resolve(cbt, ret);

	return (ret);
}
Beispiel #8
0
/*
 * __wt_btcur_search_near --
 *	Search for a record in the tree.
 */
int
__wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exact)
{
	WT_BTREE *btree;
	WT_ITEM *val;
	WT_CURSOR *cursor;
	WT_SESSION_IMPL *session;
	int ret;

	btree = cbt->btree;
	cursor = &cbt->iface;
	session = (WT_SESSION_IMPL *)cursor->session;
	WT_BSTAT_INCR(session, cursor_read_near);

	if (btree->type == BTREE_ROW)
		WT_RET(__cursor_size_chk(session, &cursor->key));

	__cursor_func_init(cbt, 1);

	WT_ERR(btree->type == BTREE_ROW ?
	    __wt_row_search(session, cbt, 0) :
	    __wt_col_search(session, cbt, 0));

	/*
	 * Creating a record past the end of the tree in a fixed-length column-
	 * store implicitly fills the gap with empty records.  In this case, we
	 * instantiate the empty record, it's an exact match.
	 *
	 * Else, if we find a valid key (one that wasn't deleted), return it.
	 *
	 * Else, if we found a deleted key, try to move to the next key in the
	 * tree (bias for prefix searches).  Cursor next skips deleted records,
	 * so we don't have to test for them again.
	 *
	 * Else if there's no larger tree key, redo the search and try and find
	 * an earlier record.  If that fails, quit, there's no record to return.
	 */
	if (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)) {
		cbt->v = 0;
		val = &cbt->iface.value;
		val->data = &cbt->v;
		val->size = 1;
		*exact = 0;
	} else if (!__cursor_invalid(cbt)) {
		*exact = cbt->compare;
		ret = __wt_kv_return(session, cbt, cbt->compare == 0 ? 0 : 1);
	} else if ((ret = __wt_btcur_next(cbt)) != WT_NOTFOUND)
		*exact = 1;
	else {
		WT_ERR(btree->type == BTREE_ROW ?
		    __wt_row_search(session, cbt, 0) :
		    __wt_col_search(session, cbt, 0));
		if (!__cursor_invalid(cbt)) {
			*exact = cbt->compare;
			ret = __wt_kv_return(
			    session, cbt, cbt->compare == 0 ? 0 : 1);
		} else if ((ret = __wt_btcur_prev(cbt)) != WT_NOTFOUND)
			*exact = -1;
	}

err:	__cursor_func_resolve(cbt, ret);
	return (ret);
}