/* * __clsm_compare -- * WT_CURSOR->compare implementation for the LSM cursor type. */ static int __clsm_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) { WT_CURSOR_LSM *alsm; WT_DECL_RET; WT_SESSION_IMPL *session; /* There's no need to sync with the LSM tree, avoid WT_LSM_ENTER. */ alsm = (WT_CURSOR_LSM *)a; CURSOR_API_CALL(a, session, compare, NULL); /* * Confirm both cursors refer to the same source and have keys, then * compare the keys. */ if (strcmp(a->uri, b->uri) != 0) WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object"); WT_CURSOR_NEEDKEY(a); WT_CURSOR_NEEDKEY(b); WT_ERR(__wt_compare( session, alsm->lsm_tree->collator, &a->key, &b->key, cmpp)); err: API_END_RET(session, ret); }
/* * __wt_btcur_compare -- * Return a comparison between two cursors. */ int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) { WT_BTREE *btree; WT_CURSOR *a, *b; WT_SESSION_IMPL *session; a = (WT_CURSOR *)a_arg; b = (WT_CURSOR *)b_arg; btree = a_arg->btree; session = (WT_SESSION_IMPL *)a->session; switch (btree->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: /* * Compare the interface's cursor record, not the underlying * cursor reference: the interface's cursor reference is the * one being returned to the application. */ if (a->recno < b->recno) *cmpp = -1; else if (a->recno == b->recno) *cmpp = 0; else *cmpp = 1; break; case BTREE_ROW: WT_RET(__wt_compare( session, btree->collator, &a->key, &b->key, cmpp)); break; WT_ILLEGAL_VALUE(session); } return (0); }
/* * __curindex_search_near -- * WT_CURSOR->search_near method for index cursors. */ static int __curindex_search_near(WT_CURSOR *cursor, int *exact) { WT_CURSOR *child; WT_CURSOR_INDEX *cindex; WT_DECL_RET; WT_ITEM found_key; WT_SESSION_IMPL *session; int cmp; cindex = (WT_CURSOR_INDEX *)cursor; child = cindex->child; JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL); /* * We are searching using the application-specified key, which * (usually) doesn't contain the primary key, so it is just a prefix of * any matching index key. That said, if there is an exact match, we * want to find the first matching index entry and set exact equal to * zero. * * Do a search_near, and if we find an entry that is too small, step to * the next one. In the unlikely event of a search past the end of the * tree, go back to the last key. */ __wt_cursor_set_raw_key(child, &cursor->key); WT_ERR(child->search_near(child, &cmp)); if (cmp < 0) { if ((ret = child->next(child)) == WT_NOTFOUND) ret = child->prev(child); WT_ERR(ret); } /* * We expect partial matches, and want the smallest record with a key * greater than or equal to the search key. * * If the found key starts with the search key, we indicate a match by * setting exact equal to zero. * * The compare function expects application-supplied keys to come first * so we flip the sign of the result to match what callers expect. */ found_key = child->key; if (found_key.size > cursor->key.size) found_key.size = cursor->key.size; WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, exact)); *exact = -*exact; WT_ERR(__curindex_move(cindex)); if (0) { err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } API_END_RET(session, ret); }
/* * __curjoin_entry_in_range -- * Check if a key is in the range specified by the entry, returning * WT_NOTFOUND if not. */ static int __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_ITEM *curkey, bool skip_left) { WT_COLLATOR *collator; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_RET; int cmp; collator = (entry->index != NULL) ? entry->index->collator : NULL; endmax = &entry->ends[entry->ends_next]; for (end = &entry->ends[skip_left ? 1 : 0]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, curkey, &end->key, &cmp)); if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ)) || (cmp > 0 && !F_ISSET(end, WT_CURJOIN_END_GT))) WT_ERR(WT_NOTFOUND); } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ)) || (cmp < 0 && !F_ISSET(end, WT_CURJOIN_END_LT))) WT_ERR(WT_NOTFOUND); } } err: return (ret); }
/* * __curindex_search -- * WT_CURSOR->search method for index cursors. */ static int __curindex_search(WT_CURSOR *cursor) { WT_CURSOR *child; WT_CURSOR_INDEX *cindex; WT_DECL_RET; WT_ITEM found_key; WT_SESSION_IMPL *session; int cmp; cindex = (WT_CURSOR_INDEX *)cursor; child = cindex->child; JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL); /* * We are searching using the application-specified key, which * (usually) doesn't contain the primary key, so it is just a prefix of * any matching index key. Do a search_near, step to the next entry if * we land on one that is too small, then check that the prefix * matches. */ __wt_cursor_set_raw_key(child, &cursor->key); WT_ERR(child->search_near(child, &cmp)); if (cmp < 0) WT_ERR(child->next(child)); /* * We expect partial matches, and want the smallest record with a key * greater than or equal to the search key. * * If the key we find is shorter than the search key, it can't possibly * match. * * The only way for the key to be exactly equal is if there is an index * on the primary key, because otherwise the primary key columns will * be appended to the index key, but we don't disallow that (odd) case. */ found_key = child->key; if (found_key.size < cursor->key.size) WT_ERR(WT_NOTFOUND); found_key.size = cursor->key.size; WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, &cmp)); if (cmp != 0) { ret = WT_NOTFOUND; goto err; } WT_ERR(__curindex_move(cindex)); if (0) { err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } API_END_RET(session, ret); }
/* * __split_verify_intl_key_order -- * Verify the key order on an internal page after a split, diagnostic only. */ static void __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BTREE *btree; WT_ITEM *next, _next, *last, _last, *tmp; WT_REF *ref; uint64_t recno; int cmp; bool first; btree = S2BT(session); switch (page->type) { case WT_PAGE_COL_INT: recno = 0; /* Less than any valid record number. */ WT_INTL_FOREACH_BEGIN(session, page, ref) { WT_ASSERT(session, ref->key.recno > recno); recno = ref->key.recno; } WT_INTL_FOREACH_END; break; case WT_PAGE_ROW_INT: next = &_next; WT_CLEAR(_next); last = &_last; WT_CLEAR(_last); first = true; WT_INTL_FOREACH_BEGIN(session, page, ref) { __wt_ref_key(page, ref, &next->data, &next->size); if (last->size == 0) { if (first) first = false; else { WT_ASSERT(session, __wt_compare( session, btree->collator, last, next, &cmp) == 0); WT_ASSERT(session, cmp < 0); } } tmp = last; last = next; next = tmp; } WT_INTL_FOREACH_END;
/* * __curbulk_insert_row -- * Row-store bulk cursor insert, with key-sort checks. */ static int __curbulk_insert_row(WT_CURSOR *cursor) { WT_BTREE *btree; WT_CURSOR_BULK *cbulk; WT_DECL_RET; WT_SESSION_IMPL *session; int cmp; cbulk = (WT_CURSOR_BULK *)cursor; btree = cbulk->cbt.btree; /* * Bulk cursor inserts are updates, but don't need auto-commit * transactions because they are single-threaded and not visible * until the bulk cursor is closed. */ CURSOR_API_CALL(cursor, session, insert, btree); WT_STAT_FAST_DATA_INCR(session, cursor_insert_bulk); WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_CHECKVALUE(cursor); /* * If this isn't the first key inserted, compare it against the last key * to ensure the application doesn't accidentally corrupt the table. */ if (!cbulk->first_insert) { WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &cbulk->last, &cmp)); if (cmp <= 0) WT_ERR(__bulk_row_keycmp_err(cbulk)); } else cbulk->first_insert = false; /* Save a copy of the key for the next comparison. */ WT_ERR(__wt_buf_set(session, &cbulk->last, cursor->key.data, cursor->key.size)); ret = __wt_bulk_insert_row(session, cbulk); err: API_END_RET(session, ret); }
/* * __curds_compare -- * WT_CURSOR.compare method for the data-source cursor type. */ static int __curds_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) { WT_COLLATOR *collator; WT_DECL_RET; WT_SESSION_IMPL *session; CURSOR_API_CALL(a, session, compare, NULL); /* * Confirm both cursors refer to the same source and have keys, then * compare them. */ if (strcmp(a->internal_uri, b->internal_uri) != 0) WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object"); WT_ERR(__cursor_needkey(a)); WT_ERR(__cursor_needkey(b)); if (WT_CURSOR_RECNO(a)) { if (a->recno < b->recno) *cmpp = -1; else if (a->recno == b->recno) *cmpp = 0; else *cmpp = 1; } else { /* * The assumption is data-sources don't provide WiredTiger with * WT_CURSOR.compare methods, instead, we'll copy the key/value * out of the underlying data-source cursor and any comparison * to be done can be done at this level. */ collator = ((WT_CURSOR_DATA_SOURCE *)a)->collator; WT_ERR(__wt_compare( session, collator, &a->key, &b->key, cmpp)); } err: API_END_RET(session, ret); }
/* * __cursor_key_order_check_row -- * Check key ordering for row-store cursor movements. */ static int __cursor_key_order_check_row( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) { WT_BTREE *btree; WT_ITEM *key; WT_DECL_RET; WT_DECL_ITEM(a); WT_DECL_ITEM(b); int cmp; btree = S2BT(session); key = &cbt->iface.key; cmp = 0; /* -Werror=maybe-uninitialized */ if (cbt->lastkey->size != 0) WT_RET(__wt_compare( session, btree->collator, cbt->lastkey, key, &cmp)); if (cbt->lastkey->size == 0 || (next && cmp < 0) || (!next && cmp > 0)) return (__wt_buf_set(session, cbt->lastkey, cbt->iface.key.data, cbt->iface.key.size)); WT_ERR(__wt_scr_alloc(session, 512, &a)); WT_ERR(__wt_scr_alloc(session, 512, &b)); WT_PANIC_ERR(session, EINVAL, "WT_CURSOR.%s out-of-order returns: returned key %s then key %s", next ? "next" : "prev", __wt_buf_set_printable( session, cbt->lastkey->data, cbt->lastkey->size, a), __wt_buf_set_printable(session, key->data, key->size, b)); err: __wt_scr_free(session, &a); __wt_scr_free(session, &b); return (ret); }
/* * __wt_btcur_compare -- * Return a comparison between two cursors. */ int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) { WT_CURSOR *a, *b; WT_SESSION_IMPL *session; a = (WT_CURSOR *)a_arg; b = (WT_CURSOR *)b_arg; session = (WT_SESSION_IMPL *)a->session; /* Confirm both cursors reference the same object. */ if (a_arg->btree != b_arg->btree) WT_RET_MSG( session, EINVAL, "Cursors must reference the same object"); switch (a_arg->btree->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: /* * Compare the interface's cursor record, not the underlying * cursor reference: the interface's cursor reference is the * one being returned to the application. */ if (a->recno < b->recno) *cmpp = -1; else if (a->recno == b->recno) *cmpp = 0; else *cmpp = 1; break; case BTREE_ROW: WT_RET(__wt_compare( session, a_arg->btree->collator, &a->key, &b->key, cmpp)); break; } return (0); }
/* * __curindex_compare -- * WT_CURSOR->compare method for the index cursor type. */ static int __curindex_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) { WT_CURSOR_INDEX *cindex; WT_DECL_RET; WT_SESSION_IMPL *session; cindex = (WT_CURSOR_INDEX *)a; JOINABLE_CURSOR_API_CALL(a, session, compare, NULL); /* Check both cursors are "index:" type. */ if (!WT_PREFIX_MATCH(a->uri, "index:") || strcmp(a->uri, b->uri) != 0) WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object"); WT_CURSOR_CHECKKEY(a); WT_CURSOR_CHECKKEY(b); ret = __wt_compare( session, cindex->index->collator, &a->key, &b->key, cmpp); err: API_END_RET(session, ret); }
/* * __verify_dsk_row -- * Walk a WT_PAGE_ROW_INT or WT_PAGE_ROW_LEAF disk page and verify it. */ static int __verify_dsk_row( WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk) { WT_BM *bm; WT_BTREE *btree; WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; WT_DECL_ITEM(current); WT_DECL_ITEM(last_ovfl); WT_DECL_ITEM(last_pfx); WT_DECL_RET; WT_ITEM *last; enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type; void *huffman; uint32_t cell_num, cell_type, i, key_cnt, prefix; uint8_t *end; int cmp; btree = S2BT(session); bm = btree->bm; unpack = &_unpack; huffman = dsk->type == WT_PAGE_ROW_INT ? NULL : btree->huffman_key; WT_ERR(__wt_scr_alloc(session, 0, ¤t)); WT_ERR(__wt_scr_alloc(session, 0, &last_pfx)); WT_ERR(__wt_scr_alloc(session, 0, &last_ovfl)); last = last_ovfl; end = (uint8_t *)dsk + dsk->mem_size; last_cell_type = FIRST; cell_num = 0; key_cnt = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { ++cell_num; /* Carefully unpack the cell. */ if (__wt_cell_unpack_safe(cell, unpack, end) != 0) { ret = __err_cell_corrupted(session, cell_num, tag); goto err; } /* Check the raw and collapsed cell types. */ WT_ERR(__err_cell_type( session, cell_num, tag, unpack->raw, dsk->type)); WT_ERR(__err_cell_type( session, cell_num, tag, unpack->type, dsk->type)); cell_type = unpack->type; /* * Check ordering relationships between the WT_CELL entries. * For row-store internal pages, check for: * two values in a row, * two keys in a row, * a value as the first cell on a page. * For row-store leaf pages, check for: * two values in a row, * a value as the first cell on a page. */ switch (cell_type) { case WT_CELL_KEY: case WT_CELL_KEY_OVFL: ++key_cnt; switch (last_cell_type) { case FIRST: case WAS_VALUE: break; case WAS_KEY: if (dsk->type == WT_PAGE_ROW_LEAF) break; WT_ERR_VRFY(session, "cell %" PRIu32 " on page at %s is the " "first of two adjacent keys", cell_num - 1, tag); } last_cell_type = WAS_KEY; break; case WT_CELL_ADDR_DEL: case WT_CELL_ADDR_INT: case WT_CELL_ADDR_LEAF: case WT_CELL_ADDR_LEAF_NO: case WT_CELL_VALUE: case WT_CELL_VALUE_OVFL: switch (last_cell_type) { case FIRST: WT_ERR_VRFY(session, "page at %s begins with a value", tag); case WAS_KEY: break; case WAS_VALUE: WT_ERR_VRFY(session, "cell %" PRIu32 " on page at %s is the " "first of two adjacent values", cell_num - 1, tag); } last_cell_type = WAS_VALUE; break; } /* Check if any referenced item has a valid address. */ switch (cell_type) { case WT_CELL_ADDR_DEL: case WT_CELL_ADDR_INT: case WT_CELL_ADDR_LEAF: case WT_CELL_ADDR_LEAF_NO: case WT_CELL_KEY_OVFL: case WT_CELL_VALUE_OVFL: if (!bm->addr_valid(bm, session, unpack->data, unpack->size)) goto eof; break; } /* * Remaining checks are for key order and prefix compression. * If this cell isn't a key, we're done, move to the next cell. * If this cell is an overflow item, instantiate the key and * compare it with the last key. Otherwise, we have to deal with * prefix compression. */ switch (cell_type) { case WT_CELL_KEY: break; case WT_CELL_KEY_OVFL: WT_ERR(__wt_dsk_cell_data_ref( session, dsk->type, unpack, current)); goto key_compare; default: /* Not a key -- continue with the next cell. */ continue; } /* * Prefix compression checks. * * Confirm the first non-overflow key on a page has a zero * prefix compression count. */ prefix = unpack->prefix; if (last_pfx->size == 0 && prefix != 0) WT_ERR_VRFY(session, "the %" PRIu32 " key on page at %s is the first " "non-overflow key on the page and has a non-zero " "prefix compression value", cell_num, tag); /* Confirm the prefix compression count is possible. */ if (cell_num > 1 && prefix > last->size) WT_ERR_VRFY(session, "key %" PRIu32 " on page at %s has a prefix " "compression count of %" PRIu32 ", larger than " "the length of the previous key, %" WT_SIZET_FMT, cell_num, tag, prefix, last->size); /* * If Huffman decoding required, unpack the cell to build the * key, then resolve the prefix. Else, we can do it faster * internally because we don't have to shuffle memory around as * much. */ if (huffman != NULL) { WT_ERR(__wt_dsk_cell_data_ref( session, dsk->type, unpack, current)); /* * If there's a prefix, make sure there's enough buffer * space, then shift the decoded data past the prefix * and copy the prefix into place. Take care with the * pointers: current->data may be pointing inside the * buffer. */ if (prefix != 0) { WT_ERR(__wt_buf_grow( session, current, prefix + current->size)); memmove((uint8_t *)current->mem + prefix, current->data, current->size); memcpy(current->mem, last->data, prefix); current->data = current->mem; current->size += prefix; } } else { /* * Get the cell's data/length and make sure we have * enough buffer space. */ WT_ERR(__wt_buf_init( session, current, prefix + unpack->size)); /* Copy the prefix then the data into place. */ if (prefix != 0) memcpy(current->mem, last->data, prefix); memcpy((uint8_t *)current->mem + prefix, unpack->data, unpack->size); current->size = prefix + unpack->size; } key_compare: /* * Compare the current key against the last key. * * Be careful about the 0th key on internal pages: we only store * the first byte and custom collators may not be able to handle * truncated keys. */ if ((dsk->type == WT_PAGE_ROW_INT && cell_num > 3) || (dsk->type != WT_PAGE_ROW_INT && cell_num > 1)) { WT_ERR(__wt_compare( session, btree->collator, last, current, &cmp)); if (cmp >= 0) WT_ERR_VRFY(session, "the %" PRIu32 " and %" PRIu32 " keys on " "page at %s are incorrectly sorted", cell_num - 2, cell_num, tag); } /* * Swap the buffers: last always references the last key entry, * last_pfx and last_ovfl reference the last prefix-compressed * and last overflow key entries. Current gets pointed to the * buffer we're not using this time around, which is where the * next key goes. */ last = current; if (cell_type == WT_CELL_KEY) { current = last_pfx; last_pfx = last; } else { current = last_ovfl; last_ovfl = last; } WT_ASSERT(session, last != current); }
/* * __curjoin_init_bloom -- * Populate Bloom filters */ static int __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) { WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_ITEM(uribuf); WT_DECL_RET; WT_ITEM curkey, curvalue; size_t size; u_int skip; int cmp; const char *uri; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; c = NULL; skip = 0; if (entry->index != NULL) /* * Open the raw index. We're avoiding any references * to the main table, they may be expensive. */ uri = entry->index->source; else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. */ size = strlen(cjoin->table->iface.name) + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->iface.name)); uri = uribuf->data; } WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; if ((end = &entry->ends[0]) < endmax) { if (F_ISSET(end, WT_CURJOIN_END_GT) || WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) { WT_ERR(__wt_cursor_dup_position(end->cursor, c)); if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) skip = 1; } else if (F_ISSET(end, WT_CURJOIN_END_LT)) { if ((ret = c->next(c)) == WT_NOTFOUND) goto done; WT_ERR(ret); } else WT_PANIC_ERR(session, EINVAL, "fatal error in join cursor position state"); } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); entry->stats.iterated++; if (entry->index != NULL) { /* * Repack so it's comparable to the * reference endpoints. */ WT_ERR(__wt_struct_repack(session, c->key_format, (entry->repack_format != NULL ? entry->repack_format : entry->index->idxkey_format), &c->key, &curkey)); } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) { /* if condition satisfied, insert immediately */ switch (WT_CURJOIN_END_RANGE(end)) { case WT_CURJOIN_END_EQ: if (cmp == 0) goto insert; break; case WT_CURJOIN_END_GT: if (cmp > 0) { /* skip this check next time */ skip = entry->ends_next; goto insert; } break; case WT_CURJOIN_END_GE: if (cmp >= 0) goto insert; break; case WT_CURJOIN_END_LT: if (cmp < 0) goto insert; break; case WT_CURJOIN_END_LE: if (cmp <= 0) goto insert; break; } } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; if (cmp > 0) { if (F_ISSET(end, WT_CURJOIN_END_GT)) skip = 1; else goto done; } } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto done; } } /* * Either it's a disjunction that hasn't satisfied any * condition, or it's a conjunction that has satisfied all * conditions. */ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) goto advance; insert: if (entry->index != NULL) { curvalue.data = (unsigned char *)curkey.data + curkey.size; WT_ASSERT(session, c->key.size > curkey.size); curvalue.size = c->key.size - curkey.size; } else WT_ERR(c->get_key(c, &curvalue)); __wt_bloom_insert(bloom, &curvalue); entry->stats.bloom_insert++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; } done: WT_ERR_NOTFOUND_OK(ret); err: if (c != NULL) WT_TRET(c->close(c)); __wt_scr_free(session, &uribuf); return (ret); }
/* * __curjoin_entry_in_range -- * Check if a key is in the range specified by the entry, returning * WT_NOTFOUND if not. */ static int __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter) { WT_COLLATOR *collator; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; u_int pos; int cmp; bool disjunction, passed; collator = (entry->index != NULL) ? entry->index->collator : NULL; endmax = &entry->ends[entry->ends_next]; disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION); /* * The iterator may have already satisfied some endpoint conditions. * If so and we're a disjunction, we're done. If so and we're a * conjunction, we can start past the satisfied conditions. */ if (iter == NULL) pos = 0; else { if (disjunction && iter->end_skip) return (0); pos = iter->end_pos + iter->end_skip; } for (end = &entry->ends[pos]; end < endmax; end++) { WT_RET(__wt_compare(session, collator, curkey, &end->key, &cmp)); switch (WT_CURJOIN_END_RANGE(end)) { case WT_CURJOIN_END_EQ: passed = (cmp == 0); break; case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ: passed = (cmp >= 0); WT_ASSERT(session, iter == NULL); break; case WT_CURJOIN_END_GT: passed = (cmp > 0); if (passed && iter != NULL && pos == 0) iter->end_skip = 1; break; case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ: passed = (cmp <= 0); break; case WT_CURJOIN_END_LT: passed = (cmp < 0); break; WT_ILLEGAL_VALUE(session, WT_CURJOIN_END_RANGE(end)); } if (!passed) { if (iter != NULL && (iter->is_equal || F_ISSET(end, WT_CURJOIN_END_LT))) { WT_RET(__curjoin_iter_bump(iter)); return (WT_NOTFOUND); } if (!disjunction) return (WT_NOTFOUND); iter = NULL; } else if (disjunction) break; } if (disjunction && end == endmax) return (WT_NOTFOUND); return (0); }
/* * __curjoin_init_bloom -- * Populate Bloom filters */ static int __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) { WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_INDEX *cindex; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_RET; WT_DECL_ITEM(uribuf); WT_ITEM curkey, curvalue, *k; WT_TABLE *maintable; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; const char *mainkey_str, *p; void *allocbuf; size_t mainkey_len, size; u_int i; int cmp, skip; c = NULL; allocbuf = NULL; skip = 0; if (entry->index != NULL) { /* * Open a cursor having a projection of the keys of the * index we're comparing against. Open it raw, we're * going to compare it to the raw keys of the * reference cursors. */ maintable = ((WT_CURSOR_TABLE *)entry->main)->table; mainkey_str = maintable->colconf.str + 1; for (p = mainkey_str, i = 0; p != NULL && i < maintable->nkey_columns; i++) p = strchr(p + 1, ','); WT_ASSERT(session, p != 0); mainkey_len = WT_PTRDIFF(p, mainkey_str); size = strlen(entry->index->name) + mainkey_len + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s(%.*s)", entry->index->name, (int)mainkey_len, mainkey_str)); } else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. */ size = strlen(cjoin->table->name) + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->name)); } WT_ERR(__wt_open_cursor( session, uribuf->data, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; if ((end = &entry->ends[0]) < endmax && F_ISSET(end, WT_CURJOIN_END_GE)) { WT_ERR(__wt_cursor_dup_position(end->cursor, c)); if (end->flags == WT_CURJOIN_END_GE) skip = 1; } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); if (entry->index != NULL) { cindex = (WT_CURSOR_INDEX *)c; if (cindex->index->extractor == NULL) { /* * Repack so it's comparable to the * reference endpoints. */ k = &cindex->child->key; WT_ERR(__wt_struct_repack(session, cindex->child->key_format, entry->main->value_format, k, &curkey, &allocbuf)); } else curkey = cindex->child->key; } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; if (cmp > 0) { if (F_ISSET(end, WT_CURJOIN_END_GT)) skip = 1; else goto done; } } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto done; } } if (entry->index != NULL) WT_ERR(c->get_value(c, &curvalue)); else WT_ERR(c->get_key(c, &curvalue)); WT_ERR(__wt_bloom_insert(bloom, &curvalue)); entry->stats.actual_count++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; } done: WT_ERR_NOTFOUND_OK(ret); err: if (c != NULL) WT_TRET(c->close(c)); __wt_scr_free(session, &uribuf); __wt_free(session, allocbuf); return (ret); }