示例#1
0
/*
 * __clsm_compare --
 *	WT_CURSOR->compare implementation for the LSM cursor type.
 */
static int
__clsm_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
	WT_CURSOR_LSM *alsm;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	/* There's no need to sync with the LSM tree, avoid WT_LSM_ENTER. */
	alsm = (WT_CURSOR_LSM *)a;
	CURSOR_API_CALL(a, session, compare, NULL);

	/*
	 * Confirm both cursors refer to the same source and have keys, then
	 * compare the keys.
	 */
	if (strcmp(a->uri, b->uri) != 0)
		WT_ERR_MSG(session, EINVAL,
		    "comparison method cursors must reference the same object");

	WT_CURSOR_NEEDKEY(a);
	WT_CURSOR_NEEDKEY(b);

	WT_ERR(__wt_compare(
	    session, alsm->lsm_tree->collator, &a->key, &b->key, cmpp));

err:	API_END_RET(session, ret);
}
示例#2
0
文件: bt_cursor.c 项目: 3rf/mongo
/*
 * __wt_btcur_compare --
 *	Return a comparison between two cursors.
 */
int
__wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp)
{
	WT_BTREE *btree;
	WT_CURSOR *a, *b;
	WT_SESSION_IMPL *session;

	a = (WT_CURSOR *)a_arg;
	b = (WT_CURSOR *)b_arg;
	btree = a_arg->btree;
	session = (WT_SESSION_IMPL *)a->session;

	switch (btree->type) {
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		/*
		 * Compare the interface's cursor record, not the underlying
		 * cursor reference: the interface's cursor reference is the
		 * one being returned to the application.
		 */
		if (a->recno < b->recno)
			*cmpp = -1;
		else if (a->recno == b->recno)
			*cmpp = 0;
		else
			*cmpp = 1;
		break;
	case BTREE_ROW:
		WT_RET(__wt_compare(
		    session, btree->collator, &a->key, &b->key, cmpp));
		break;
	WT_ILLEGAL_VALUE(session);
	}
	return (0);
}
示例#3
0
文件: cur_index.c 项目: ksuarz/mongo
/*
 * __curindex_search_near --
 *	WT_CURSOR->search_near method for index cursors.
 */
static int
__curindex_search_near(WT_CURSOR *cursor, int *exact)
{
	WT_CURSOR *child;
	WT_CURSOR_INDEX *cindex;
	WT_DECL_RET;
	WT_ITEM found_key;
	WT_SESSION_IMPL *session;
	int cmp;

	cindex = (WT_CURSOR_INDEX *)cursor;
	child = cindex->child;
	JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);

	/*
	 * We are searching using the application-specified key, which
	 * (usually) doesn't contain the primary key, so it is just a prefix of
	 * any matching index key.  That said, if there is an exact match, we
	 * want to find the first matching index entry and set exact equal to
	 * zero.
	 *
	 * Do a search_near, and if we find an entry that is too small, step to
	 * the next one.  In the unlikely event of a search past the end of the
	 * tree, go back to the last key.
	 */
	__wt_cursor_set_raw_key(child, &cursor->key);
	WT_ERR(child->search_near(child, &cmp));

	if (cmp < 0) {
		if ((ret = child->next(child)) == WT_NOTFOUND)
			ret = child->prev(child);
		WT_ERR(ret);
	}

	/*
	 * We expect partial matches, and want the smallest record with a key
	 * greater than or equal to the search key.
	 *
	 * If the found key starts with the search key, we indicate a match by
	 * setting exact equal to zero.
	 *
	 * The compare function expects application-supplied keys to come first
	 * so we flip the sign of the result to match what callers expect.
	 */
	found_key = child->key;
	if (found_key.size > cursor->key.size)
		found_key.size = cursor->key.size;

	WT_ERR(__wt_compare(
	    session, cindex->index->collator, &cursor->key, &found_key, exact));
	*exact = -*exact;

	WT_ERR(__curindex_move(cindex));

	if (0) {
err:		F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
	}

	API_END_RET(session, ret);
}
示例#4
0
/*
 * __curjoin_entry_in_range --
 *	Check if a key is in the range specified by the entry, returning
 *	WT_NOTFOUND if not.
 */
static int
__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
    WT_ITEM *curkey, bool skip_left)
{
	WT_COLLATOR *collator;
	WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
	WT_DECL_RET;
	int cmp;

	collator = (entry->index != NULL) ? entry->index->collator : NULL;
	endmax = &entry->ends[entry->ends_next];
	for (end = &entry->ends[skip_left ? 1 : 0]; end < endmax; end++) {
		WT_ERR(__wt_compare(session, collator, curkey, &end->key,
		    &cmp));
		if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
			if (cmp < 0 ||
			    (cmp == 0 &&
			    !F_ISSET(end, WT_CURJOIN_END_EQ)) ||
			    (cmp > 0 && !F_ISSET(end, WT_CURJOIN_END_GT)))
				WT_ERR(WT_NOTFOUND);
		} else {
			if (cmp > 0 ||
			    (cmp == 0 &&
			    !F_ISSET(end, WT_CURJOIN_END_EQ)) ||
			    (cmp < 0 && !F_ISSET(end, WT_CURJOIN_END_LT)))
				WT_ERR(WT_NOTFOUND);
		}
	}
err:	return (ret);
}
示例#5
0
文件: cur_index.c 项目: ksuarz/mongo
/*
 * __curindex_search --
 *	WT_CURSOR->search method for index cursors.
 */
static int
__curindex_search(WT_CURSOR *cursor)
{
	WT_CURSOR *child;
	WT_CURSOR_INDEX *cindex;
	WT_DECL_RET;
	WT_ITEM found_key;
	WT_SESSION_IMPL *session;
	int cmp;

	cindex = (WT_CURSOR_INDEX *)cursor;
	child = cindex->child;
	JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);

	/*
	 * We are searching using the application-specified key, which
	 * (usually) doesn't contain the primary key, so it is just a prefix of
	 * any matching index key.  Do a search_near, step to the next entry if
	 * we land on one that is too small, then check that the prefix
	 * matches.
	 */
	__wt_cursor_set_raw_key(child, &cursor->key);
	WT_ERR(child->search_near(child, &cmp));

	if (cmp < 0)
		WT_ERR(child->next(child));

	/*
	 * We expect partial matches, and want the smallest record with a key
	 * greater than or equal to the search key.
	 *
	 * If the key we find is shorter than the search key, it can't possibly
	 * match.
	 *
	 * The only way for the key to be exactly equal is if there is an index
	 * on the primary key, because otherwise the primary key columns will
	 * be appended to the index key, but we don't disallow that (odd) case.
	 */
	found_key = child->key;
	if (found_key.size < cursor->key.size)
		WT_ERR(WT_NOTFOUND);
	found_key.size = cursor->key.size;

	WT_ERR(__wt_compare(
	    session, cindex->index->collator, &cursor->key, &found_key, &cmp));
	if (cmp != 0) {
		ret = WT_NOTFOUND;
		goto err;
	}

	WT_ERR(__curindex_move(cindex));

	if (0) {
err:		F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
	}

	API_END_RET(session, ret);
}
示例#6
0
文件: bt_split.c 项目: Jonekee/mongo
/*
 * __split_verify_intl_key_order --
 *	Verify the key order on an internal page after a split, diagnostic only.
 */
static void
__split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
{
	WT_BTREE *btree;
	WT_ITEM *next, _next, *last, _last, *tmp;
	WT_REF *ref;
	uint64_t recno;
	int cmp;
	bool first;

	btree = S2BT(session);

	switch (page->type) {
	case WT_PAGE_COL_INT:
		recno = 0;		/* Less than any valid record number. */
		WT_INTL_FOREACH_BEGIN(session, page, ref) {
			WT_ASSERT(session, ref->key.recno > recno);
			recno = ref->key.recno;
		} WT_INTL_FOREACH_END;
		break;
	case WT_PAGE_ROW_INT:
		next = &_next;
		WT_CLEAR(_next);
		last = &_last;
		WT_CLEAR(_last);

		first = true;
		WT_INTL_FOREACH_BEGIN(session, page, ref) {
			__wt_ref_key(page, ref, &next->data, &next->size);
			if (last->size == 0) {
				if (first)
					first = false;
				else {
					WT_ASSERT(session, __wt_compare(
					    session, btree->collator, last,
					    next, &cmp) == 0);
					WT_ASSERT(session, cmp < 0);
				}
			}
			tmp = last;
			last = next;
			next = tmp;
		} WT_INTL_FOREACH_END;
示例#7
0
/*
 * __curbulk_insert_row --
 *	Row-store bulk cursor insert, with key-sort checks.
 */
static int
__curbulk_insert_row(WT_CURSOR *cursor)
{
	WT_BTREE *btree;
	WT_CURSOR_BULK *cbulk;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	int cmp;

	cbulk = (WT_CURSOR_BULK *)cursor;
	btree = cbulk->cbt.btree;

	/*
	 * Bulk cursor inserts are updates, but don't need auto-commit
	 * transactions because they are single-threaded and not visible
	 * until the bulk cursor is closed.
	 */
	CURSOR_API_CALL(cursor, session, insert, btree);
	WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk);

	WT_CURSOR_CHECKKEY(cursor);
	WT_CURSOR_CHECKVALUE(cursor);

	/*
	 * If this isn't the first key inserted, compare it against the last key
	 * to ensure the application doesn't accidentally corrupt the table.
	 */
	if (!cbulk->first_insert) {
		WT_ERR(__wt_compare(session,
		    btree->collator, &cursor->key, &cbulk->last, &cmp));
		if (cmp <= 0)
			WT_ERR(__bulk_row_keycmp_err(cbulk));
	} else
		cbulk->first_insert = false;

	/* Save a copy of the key for the next comparison. */
	WT_ERR(__wt_buf_set(session,
	    &cbulk->last, cursor->key.data, cursor->key.size));

	ret = __wt_bulk_insert_row(session, cbulk);

err:	API_END_RET(session, ret);
}
示例#8
0
/*
 * __curds_compare --
 *	WT_CURSOR.compare method for the data-source cursor type.
 */
static int
__curds_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
	WT_COLLATOR *collator;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	CURSOR_API_CALL(a, session, compare, NULL);

	/*
	 * Confirm both cursors refer to the same source and have keys, then
	 * compare them.
	 */
	if (strcmp(a->internal_uri, b->internal_uri) != 0)
		WT_ERR_MSG(session, EINVAL,
		    "Cursors must reference the same object");

	WT_ERR(__cursor_needkey(a));
	WT_ERR(__cursor_needkey(b));

	if (WT_CURSOR_RECNO(a)) {
		if (a->recno < b->recno)
			*cmpp = -1;
		else if (a->recno == b->recno)
			*cmpp = 0;
		else
			*cmpp = 1;
	} else {
		/*
		 * The assumption is data-sources don't provide WiredTiger with
		 * WT_CURSOR.compare methods, instead, we'll copy the key/value
		 * out of the underlying data-source cursor and any comparison
		 * to be done can be done at this level.
		 */
		collator = ((WT_CURSOR_DATA_SOURCE *)a)->collator;
		WT_ERR(__wt_compare(
		    session, collator, &a->key, &b->key, cmpp));
	}

err:	API_END_RET(session, ret);
}
示例#9
0
文件: bt_curnext.c 项目: DINKIN/mongo
/*
 * __cursor_key_order_check_row --
 *	Check key ordering for row-store cursor movements.
 */
static int
__cursor_key_order_check_row(
    WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
{
	WT_BTREE *btree;
	WT_ITEM *key;
	WT_DECL_RET;
	WT_DECL_ITEM(a);
	WT_DECL_ITEM(b);
	int cmp;

	btree = S2BT(session);
	key = &cbt->iface.key;
	cmp = 0;			/* -Werror=maybe-uninitialized */

	if (cbt->lastkey->size != 0)
		WT_RET(__wt_compare(
		    session, btree->collator, cbt->lastkey, key, &cmp));

	if (cbt->lastkey->size == 0 || (next && cmp < 0) || (!next && cmp > 0))
		return (__wt_buf_set(session,
		    cbt->lastkey, cbt->iface.key.data, cbt->iface.key.size));

	WT_ERR(__wt_scr_alloc(session, 512, &a));
	WT_ERR(__wt_scr_alloc(session, 512, &b));

	WT_PANIC_ERR(session, EINVAL,
	    "WT_CURSOR.%s out-of-order returns: returned key %s then key %s",
	    next ? "next" : "prev",
	    __wt_buf_set_printable(
	    session, cbt->lastkey->data, cbt->lastkey->size, a),
	    __wt_buf_set_printable(session, key->data, key->size, b));

err:	__wt_scr_free(session, &a);
	__wt_scr_free(session, &b);

	return (ret);
}
示例#10
0
文件: bt_cursor.c 项目: ksuarz/mongo
/*
 * __wt_btcur_compare --
 *	Return a comparison between two cursors.
 */
int
__wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp)
{
	WT_CURSOR *a, *b;
	WT_SESSION_IMPL *session;

	a = (WT_CURSOR *)a_arg;
	b = (WT_CURSOR *)b_arg;
	session = (WT_SESSION_IMPL *)a->session;

	/* Confirm both cursors reference the same object. */
	if (a_arg->btree != b_arg->btree)
		WT_RET_MSG(
		    session, EINVAL, "Cursors must reference the same object");

	switch (a_arg->btree->type) {
	case BTREE_COL_FIX:
	case BTREE_COL_VAR:
		/*
		 * Compare the interface's cursor record, not the underlying
		 * cursor reference: the interface's cursor reference is the
		 * one being returned to the application.
		 */
		if (a->recno < b->recno)
			*cmpp = -1;
		else if (a->recno == b->recno)
			*cmpp = 0;
		else
			*cmpp = 1;
		break;
	case BTREE_ROW:
		WT_RET(__wt_compare(
		    session, a_arg->btree->collator, &a->key, &b->key, cmpp));
		break;
	}
	return (0);
}
示例#11
0
文件: cur_index.c 项目: ksuarz/mongo
/*
 * __curindex_compare --
 *	WT_CURSOR->compare method for the index cursor type.
 */
static int
__curindex_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp)
{
	WT_CURSOR_INDEX *cindex;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	cindex = (WT_CURSOR_INDEX *)a;
	JOINABLE_CURSOR_API_CALL(a, session, compare, NULL);

	/* Check both cursors are "index:" type. */
	if (!WT_PREFIX_MATCH(a->uri, "index:") ||
	    strcmp(a->uri, b->uri) != 0)
		WT_ERR_MSG(session, EINVAL,
		    "Cursors must reference the same object");

	WT_CURSOR_CHECKKEY(a);
	WT_CURSOR_CHECKKEY(b);

	ret = __wt_compare(
	    session, cindex->index->collator, &a->key, &b->key, cmpp);

err:	API_END_RET(session, ret);
}
示例#12
0
文件: bt_vrfy_dsk.c 项目: radik/mongo
/*
 * __verify_dsk_row --
 *	Walk a WT_PAGE_ROW_INT or WT_PAGE_ROW_LEAF disk page and verify it.
 */
static int
__verify_dsk_row(
    WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk)
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_CELL *cell;
	WT_CELL_UNPACK *unpack, _unpack;
	WT_DECL_ITEM(current);
	WT_DECL_ITEM(last_ovfl);
	WT_DECL_ITEM(last_pfx);
	WT_DECL_RET;
	WT_ITEM *last;
	enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type;
	void *huffman;
	uint32_t cell_num, cell_type, i, key_cnt, prefix;
	uint8_t *end;
	int cmp;

	btree = S2BT(session);
	bm = btree->bm;
	unpack = &_unpack;
	huffman = dsk->type == WT_PAGE_ROW_INT ? NULL : btree->huffman_key;

	WT_ERR(__wt_scr_alloc(session, 0, &current));
	WT_ERR(__wt_scr_alloc(session, 0, &last_pfx));
	WT_ERR(__wt_scr_alloc(session, 0, &last_ovfl));
	last = last_ovfl;

	end = (uint8_t *)dsk + dsk->mem_size;

	last_cell_type = FIRST;
	cell_num = 0;
	key_cnt = 0;
	WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
		++cell_num;

		/* Carefully unpack the cell. */
		if (__wt_cell_unpack_safe(cell, unpack, end) != 0) {
			ret = __err_cell_corrupted(session, cell_num, tag);
			goto err;
		}

		/* Check the raw and collapsed cell types. */
		WT_ERR(__err_cell_type(
		    session, cell_num, tag, unpack->raw, dsk->type));
		WT_ERR(__err_cell_type(
		    session, cell_num, tag, unpack->type, dsk->type));
		cell_type = unpack->type;

		/*
		 * Check ordering relationships between the WT_CELL entries.
		 * For row-store internal pages, check for:
		 *	two values in a row,
		 *	two keys in a row,
		 *	a value as the first cell on a page.
		 * For row-store leaf pages, check for:
		 *	two values in a row,
		 *	a value as the first cell on a page.
		 */
		switch (cell_type) {
		case WT_CELL_KEY:
		case WT_CELL_KEY_OVFL:
			++key_cnt;
			switch (last_cell_type) {
			case FIRST:
			case WAS_VALUE:
				break;
			case WAS_KEY:
				if (dsk->type == WT_PAGE_ROW_LEAF)
					break;
				WT_ERR_VRFY(session,
				    "cell %" PRIu32 " on page at %s is the "
				    "first of two adjacent keys",
				    cell_num - 1, tag);
			}
			last_cell_type = WAS_KEY;
			break;
		case WT_CELL_ADDR_DEL:
		case WT_CELL_ADDR_INT:
		case WT_CELL_ADDR_LEAF:
		case WT_CELL_ADDR_LEAF_NO:
		case WT_CELL_VALUE:
		case WT_CELL_VALUE_OVFL:
			switch (last_cell_type) {
			case FIRST:
				WT_ERR_VRFY(session,
				    "page at %s begins with a value", tag);
			case WAS_KEY:
				break;
			case WAS_VALUE:
				WT_ERR_VRFY(session,
				    "cell %" PRIu32 " on page at %s is the "
				    "first of two adjacent values",
				    cell_num - 1, tag);
			}
			last_cell_type = WAS_VALUE;
			break;
		}

		/* Check if any referenced item has a valid address. */
		switch (cell_type) {
		case WT_CELL_ADDR_DEL:
		case WT_CELL_ADDR_INT:
		case WT_CELL_ADDR_LEAF:
		case WT_CELL_ADDR_LEAF_NO:
		case WT_CELL_KEY_OVFL:
		case WT_CELL_VALUE_OVFL:
			if (!bm->addr_valid(bm,
			    session, unpack->data, unpack->size))
				goto eof;
			break;
		}

		/*
		 * Remaining checks are for key order and prefix compression.
		 * If this cell isn't a key, we're done, move to the next cell.
		 * If this cell is an overflow item, instantiate the key and
		 * compare it with the last key. Otherwise, we have to deal with
		 * prefix compression.
		 */
		switch (cell_type) {
		case WT_CELL_KEY:
			break;
		case WT_CELL_KEY_OVFL:
			WT_ERR(__wt_dsk_cell_data_ref(
			    session, dsk->type, unpack, current));
			goto key_compare;
		default:
			/* Not a key -- continue with the next cell. */
			continue;
		}

		/*
		 * Prefix compression checks.
		 *
		 * Confirm the first non-overflow key on a page has a zero
		 * prefix compression count.
		 */
		prefix = unpack->prefix;
		if (last_pfx->size == 0 && prefix != 0)
			WT_ERR_VRFY(session,
			    "the %" PRIu32 " key on page at %s is the first "
			    "non-overflow key on the page and has a non-zero "
			    "prefix compression value",
			    cell_num, tag);

		/* Confirm the prefix compression count is possible. */
		if (cell_num > 1 && prefix > last->size)
			WT_ERR_VRFY(session,
			    "key %" PRIu32 " on page at %s has a prefix "
			    "compression count of %" PRIu32 ", larger than "
			    "the length of the previous key, %" WT_SIZET_FMT,
			    cell_num, tag, prefix, last->size);

		/*
		 * If Huffman decoding required, unpack the cell to build the
		 * key, then resolve the prefix.  Else, we can do it faster
		 * internally because we don't have to shuffle memory around as
		 * much.
		 */
		if (huffman != NULL) {
			WT_ERR(__wt_dsk_cell_data_ref(
			    session, dsk->type, unpack, current));

			/*
			 * If there's a prefix, make sure there's enough buffer
			 * space, then shift the decoded data past the prefix
			 * and copy the prefix into place.  Take care with the
			 * pointers: current->data may be pointing inside the
			 * buffer.
			 */
			if (prefix != 0) {
				WT_ERR(__wt_buf_grow(
				    session, current, prefix + current->size));
				memmove((uint8_t *)current->mem + prefix,
				    current->data, current->size);
				memcpy(current->mem, last->data, prefix);
				current->data = current->mem;
				current->size += prefix;
			}
		} else {
			/*
			 * Get the cell's data/length and make sure we have
			 * enough buffer space.
			 */
			WT_ERR(__wt_buf_init(
			    session, current, prefix + unpack->size));

			/* Copy the prefix then the data into place. */
			if (prefix != 0)
				memcpy(current->mem, last->data, prefix);
			memcpy((uint8_t *)current->mem + prefix, unpack->data,
			    unpack->size);
			current->size = prefix + unpack->size;
		}

key_compare:	/*
		 * Compare the current key against the last key.
		 *
		 * Be careful about the 0th key on internal pages: we only store
		 * the first byte and custom collators may not be able to handle
		 * truncated keys.
		 */
		if ((dsk->type == WT_PAGE_ROW_INT && cell_num > 3) ||
		    (dsk->type != WT_PAGE_ROW_INT && cell_num > 1)) {
			WT_ERR(__wt_compare(
			    session, btree->collator, last, current, &cmp));
			if (cmp >= 0)
				WT_ERR_VRFY(session,
				    "the %" PRIu32 " and %" PRIu32 " keys on "
				    "page at %s are incorrectly sorted",
				    cell_num - 2, cell_num, tag);
		}

		/*
		 * Swap the buffers: last always references the last key entry,
		 * last_pfx and last_ovfl reference the last prefix-compressed
		 * and last overflow key entries.  Current gets pointed to the
		 * buffer we're not using this time around, which is where the
		 * next key goes.
		 */
		last = current;
		if (cell_type == WT_CELL_KEY) {
			current = last_pfx;
			last_pfx = last;
		} else {
			current = last_ovfl;
			last_ovfl = last;
		}
		WT_ASSERT(session, last != current);
	}
示例#13
0
文件: cur_join.c 项目: mongodb/mongo
/*
 * __curjoin_init_bloom --
 *	Populate Bloom filters
 */
static int
__curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
    WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom)
{
	WT_COLLATOR *collator;
	WT_CURSOR *c;
	WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
	WT_DECL_ITEM(uribuf);
	WT_DECL_RET;
	WT_ITEM curkey, curvalue;
	size_t size;
	u_int skip;
	int cmp;
	const char *uri;
	const char *raw_cfg[] = { WT_CONFIG_BASE(
	    session, WT_SESSION_open_cursor), "raw", NULL };

	c = NULL;
	skip = 0;

	if (entry->index != NULL)
		/*
		 * Open the raw index.  We're avoiding any references
		 * to the main table, they may be expensive.
		 */
		uri = entry->index->source;
	else {
		/*
		 * For joins on the main table, we just need the primary
		 * key for comparison, we don't need any values.
		 */
		size = strlen(cjoin->table->iface.name) + 3;
		WT_ERR(__wt_scr_alloc(session, size, &uribuf));
		WT_ERR(__wt_buf_fmt(session, uribuf, "%s()",
		    cjoin->table->iface.name));
		uri = uribuf->data;
	}
	WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c));

	/* Initially position the cursor if necessary. */
	endmax = &entry->ends[entry->ends_next];
	if ((end = &entry->ends[0]) < endmax) {
		if (F_ISSET(end, WT_CURJOIN_END_GT) ||
		    WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) {
			WT_ERR(__wt_cursor_dup_position(end->cursor, c));
			if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE)
				skip = 1;
		} else if (F_ISSET(end, WT_CURJOIN_END_LT)) {
			if ((ret = c->next(c)) == WT_NOTFOUND)
				goto done;
			WT_ERR(ret);
		} else
			WT_PANIC_ERR(session, EINVAL,
			    "fatal error in join cursor position state");
	}
	collator = (entry->index == NULL) ? NULL : entry->index->collator;
	while (ret == 0) {
		WT_ERR(c->get_key(c, &curkey));
		entry->stats.iterated++;
		if (entry->index != NULL) {
			/*
			 * Repack so it's comparable to the
			 * reference endpoints.
			 */
			WT_ERR(__wt_struct_repack(session,
			    c->key_format,
			    (entry->repack_format != NULL ?
			    entry->repack_format : entry->index->idxkey_format),
			    &c->key, &curkey));
		}
		for (end = &entry->ends[skip]; end < endmax; end++) {
			WT_ERR(__wt_compare(session, collator, &curkey,
			    &end->key, &cmp));
			if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) {
				/* if condition satisfied, insert immediately */
				switch (WT_CURJOIN_END_RANGE(end)) {
				case WT_CURJOIN_END_EQ:
					if (cmp == 0)
						goto insert;
					break;
				case WT_CURJOIN_END_GT:
					if (cmp > 0) {
						/* skip this check next time */
						skip = entry->ends_next;
						goto insert;
					}
					break;
				case WT_CURJOIN_END_GE:
					if (cmp >= 0)
						goto insert;
					break;
				case WT_CURJOIN_END_LT:
					if (cmp < 0)
						goto insert;
					break;
				case WT_CURJOIN_END_LE:
					if (cmp <= 0)
						goto insert;
					break;
				}
			} else if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
				if (cmp < 0 || (cmp == 0 &&
				    !F_ISSET(end, WT_CURJOIN_END_EQ)))
					goto advance;
				if (cmp > 0) {
					if (F_ISSET(end, WT_CURJOIN_END_GT))
						skip = 1;
					else
						goto done;
				}
			} else {
				if (cmp > 0 || (cmp == 0 &&
				    !F_ISSET(end, WT_CURJOIN_END_EQ)))
					goto done;
			}
		}
		/*
		 * Either it's a disjunction that hasn't satisfied any
		 * condition, or it's a conjunction that has satisfied all
		 * conditions.
		 */
		if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
			goto advance;
insert:
		if (entry->index != NULL) {
			curvalue.data =
			    (unsigned char *)curkey.data + curkey.size;
			WT_ASSERT(session, c->key.size > curkey.size);
			curvalue.size = c->key.size - curkey.size;
		}
		else
			WT_ERR(c->get_key(c, &curvalue));
		__wt_bloom_insert(bloom, &curvalue);
		entry->stats.bloom_insert++;
advance:
		if ((ret = c->next(c)) == WT_NOTFOUND)
			break;
	}
done:
	WT_ERR_NOTFOUND_OK(ret);

err:	if (c != NULL)
		WT_TRET(c->close(c));
	__wt_scr_free(session, &uribuf);
	return (ret);
}
示例#14
0
文件: cur_join.c 项目: mongodb/mongo
/*
 * __curjoin_entry_in_range --
 *	Check if a key is in the range specified by the entry, returning
 *	WT_NOTFOUND if not.
 */
static int
__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
    WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter)
{
	WT_COLLATOR *collator;
	WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
	u_int pos;
	int cmp;
	bool disjunction, passed;

	collator = (entry->index != NULL) ? entry->index->collator : NULL;
	endmax = &entry->ends[entry->ends_next];
	disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION);

	/*
	 * The iterator may have already satisfied some endpoint conditions.
	 * If so and we're a disjunction, we're done.  If so and we're a
	 * conjunction, we can start past the satisfied conditions.
	 */
	if (iter == NULL)
		pos = 0;
	else {
		if (disjunction && iter->end_skip)
			return (0);
		pos = iter->end_pos + iter->end_skip;
	}

	for (end = &entry->ends[pos]; end < endmax; end++) {
		WT_RET(__wt_compare(session, collator, curkey, &end->key,
		    &cmp));
		switch (WT_CURJOIN_END_RANGE(end)) {
		case WT_CURJOIN_END_EQ:
			passed = (cmp == 0);
			break;

		case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ:
			passed = (cmp >= 0);
			WT_ASSERT(session, iter == NULL);
			break;

		case WT_CURJOIN_END_GT:
			passed = (cmp > 0);
			if (passed && iter != NULL && pos == 0)
				iter->end_skip = 1;
			break;

		case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ:
			passed = (cmp <= 0);
			break;

		case WT_CURJOIN_END_LT:
			passed = (cmp < 0);
			break;

		WT_ILLEGAL_VALUE(session, WT_CURJOIN_END_RANGE(end));
		}

		if (!passed) {
			if (iter != NULL &&
			    (iter->is_equal ||
			    F_ISSET(end, WT_CURJOIN_END_LT))) {
				WT_RET(__curjoin_iter_bump(iter));
				return (WT_NOTFOUND);
			}
			if (!disjunction)
				return (WT_NOTFOUND);
			iter = NULL;
		} else if (disjunction)
			break;
	}
	if (disjunction && end == endmax)
		return (WT_NOTFOUND);
	return (0);
}
示例#15
0
/*
 * __curjoin_init_bloom --
 *	Populate Bloom filters
 */
static int
__curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
    WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom)
{
	WT_COLLATOR *collator;
	WT_CURSOR *c;
	WT_CURSOR_INDEX *cindex;
	WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
	WT_DECL_RET;
	WT_DECL_ITEM(uribuf);
	WT_ITEM curkey, curvalue, *k;
	WT_TABLE *maintable;
	const char *raw_cfg[] = { WT_CONFIG_BASE(
	    session, WT_SESSION_open_cursor), "raw", NULL };
	const char *mainkey_str, *p;
	void *allocbuf;
	size_t mainkey_len, size;
	u_int i;
	int cmp, skip;

	c = NULL;
	allocbuf = NULL;
	skip = 0;

	if (entry->index != NULL) {
		/*
		 * Open a cursor having a projection of the keys of the
		 * index we're comparing against.  Open it raw, we're
		 * going to compare it to the raw keys of the
		 * reference cursors.
		 */
		maintable = ((WT_CURSOR_TABLE *)entry->main)->table;
		mainkey_str = maintable->colconf.str + 1;
		for (p = mainkey_str, i = 0;
		     p != NULL && i < maintable->nkey_columns; i++)
			p = strchr(p + 1, ',');
		WT_ASSERT(session, p != 0);
		mainkey_len = WT_PTRDIFF(p, mainkey_str);
		size = strlen(entry->index->name) + mainkey_len + 3;
		WT_ERR(__wt_scr_alloc(session, size, &uribuf));
		WT_ERR(__wt_buf_fmt(session, uribuf, "%s(%.*s)",
		    entry->index->name, (int)mainkey_len, mainkey_str));
	} else {
		/*
		 * For joins on the main table, we just need the primary
		 * key for comparison, we don't need any values.
		 */
		size = strlen(cjoin->table->name) + 3;
		WT_ERR(__wt_scr_alloc(session, size, &uribuf));
		WT_ERR(__wt_buf_fmt(session, uribuf, "%s()",
		    cjoin->table->name));
	}
	WT_ERR(__wt_open_cursor(
	    session, uribuf->data, &cjoin->iface, raw_cfg, &c));

	/* Initially position the cursor if necessary. */
	endmax = &entry->ends[entry->ends_next];
	if ((end = &entry->ends[0]) < endmax &&
	    F_ISSET(end, WT_CURJOIN_END_GE)) {
		WT_ERR(__wt_cursor_dup_position(end->cursor, c));
		if (end->flags == WT_CURJOIN_END_GE)
			skip = 1;
	}
	collator = (entry->index == NULL) ? NULL : entry->index->collator;
	while (ret == 0) {
		WT_ERR(c->get_key(c, &curkey));
		if (entry->index != NULL) {
			cindex = (WT_CURSOR_INDEX *)c;
			if (cindex->index->extractor == NULL) {
				/*
				 * Repack so it's comparable to the
				 * reference endpoints.
				 */
				k = &cindex->child->key;
				WT_ERR(__wt_struct_repack(session,
				    cindex->child->key_format,
				    entry->main->value_format, k, &curkey,
				    &allocbuf));
			} else
				curkey = cindex->child->key;
		}
		for (end = &entry->ends[skip]; end < endmax; end++) {
			WT_ERR(__wt_compare(session, collator, &curkey,
			    &end->key, &cmp));
			if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
				if (cmp < 0 || (cmp == 0 &&
				    !F_ISSET(end, WT_CURJOIN_END_EQ)))
					goto advance;
				if (cmp > 0) {
					if (F_ISSET(end, WT_CURJOIN_END_GT))
						skip = 1;
					else
						goto done;
				}
			} else {
				if (cmp > 0 || (cmp == 0 &&
				    !F_ISSET(end, WT_CURJOIN_END_EQ)))
					goto done;
			}
		}
		if (entry->index != NULL)
			WT_ERR(c->get_value(c, &curvalue));
		else
			WT_ERR(c->get_key(c, &curvalue));
		WT_ERR(__wt_bloom_insert(bloom, &curvalue));
		entry->stats.actual_count++;
advance:
		if ((ret = c->next(c)) == WT_NOTFOUND)
			break;
	}
done:
	WT_ERR_NOTFOUND_OK(ret);

err:	if (c != NULL)
		WT_TRET(c->close(c));
	__wt_scr_free(session, &uribuf);
	__wt_free(session, allocbuf);
	return (ret);
}