/* * __curjoin_endpoint_init_key -- * Set the key in the reference endpoint. */ static int __curjoin_endpoint_init_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint) { WT_CURSOR *cursor; WT_CURSOR_INDEX *cindex; WT_ITEM *k; uint64_t r; if ((cursor = endpoint->cursor) != NULL) { if (entry->index != NULL) { /* Extract and save the index's logical key. */ cindex = (WT_CURSOR_INDEX *)endpoint->cursor; WT_RET(__wt_struct_repack(session, cindex->child->key_format, (entry->repack_format != NULL ? entry->repack_format : cindex->iface.key_format), &cindex->child->key, &endpoint->key)); } else { k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key; if (WT_CURSOR_RECNO(cursor)) { r = *(uint64_t *)k->data; WT_RET(__curjoin_pack_recno(session, r, endpoint->recno_buf, sizeof(endpoint->recno_buf), &endpoint->key)); } else endpoint->key = *k; } } return (0); }
/* * __curjoin_split_key -- * Copy the primary key from a cursor (either main table or index) * to another cursor. When copying from an index file, the index * key is also returned. * */ static int __curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur, const char *repack_fmt, bool isindex) { WT_CURSOR *firstcg_cur; WT_CURSOR_INDEX *cindex; WT_ITEM *keyp; const uint8_t *p; if (isindex) { cindex = ((WT_CURSOR_INDEX *)fromcur); /* * Repack tells us where the index key ends; advance past * that to get where the raw primary key starts. */ WT_RET(__wt_struct_repack(session, cindex->child->key_format, repack_fmt != NULL ? repack_fmt : cindex->iface.key_format, &cindex->child->key, idxkey)); WT_ASSERT(session, cindex->child->key.size > idxkey->size); tocur->key.data = (uint8_t *)idxkey->data + idxkey->size; tocur->key.size = cindex->child->key.size - idxkey->size; if (WT_CURSOR_RECNO(tocur)) { p = (const uint8_t *)tocur->key.data; WT_RET(__wt_vunpack_uint(&p, tocur->key.size, &tocur->recno)); } else tocur->recno = 0; } else { firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0]; keyp = &firstcg_cur->key; if (WT_CURSOR_RECNO(tocur)) { WT_ASSERT(session, keyp->size == sizeof(uint64_t)); tocur->recno = *(uint64_t *)keyp->data; WT_RET(__curjoin_pack_recno(session, tocur->recno, cjoin->recno_buf, sizeof(cjoin->recno_buf), &tocur->key)); } else { WT_ITEM_SET(tocur->key, *keyp); tocur->recno = 0; } idxkey->data = NULL; idxkey->size = 0; } return (0); }
/* * __curjoin_endpoint_init_key -- * Set the key in the reference endpoint. */ static int __curjoin_endpoint_init_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint) { WT_CURSOR *cursor; WT_CURSOR_INDEX *cindex; WT_DECL_RET; WT_ITEM *k; uint64_t r; void *allocbuf; allocbuf = NULL; if ((cursor = endpoint->cursor) != NULL) { if (entry->index != NULL) { cindex = (WT_CURSOR_INDEX *)endpoint->cursor; if (cindex->index->extractor == NULL) { WT_ERR(__wt_struct_repack(session, cindex->child->key_format, entry->main->value_format, &cindex->child->key, &endpoint->key, &allocbuf)); if (allocbuf != NULL) F_SET(endpoint, WT_CURJOIN_END_OWN_KEY); } else endpoint->key = cindex->child->key; } else { k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key; if (WT_CURSOR_RECNO(cursor)) { r = *(uint64_t *)k->data; WT_ERR(__curjoin_pack_recno(session, r, endpoint->recno_buf, sizeof(endpoint->recno_buf), &endpoint->key)); } else endpoint->key = *k; } } if (0) { err: __wt_free(session, allocbuf); } return (ret); }
/* * __curjoin_init_bloom -- * Populate Bloom filters */ static int __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) { WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_ITEM(uribuf); WT_DECL_RET; WT_ITEM curkey, curvalue; size_t size; u_int skip; int cmp; const char *uri; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; c = NULL; skip = 0; if (entry->index != NULL) /* * Open the raw index. We're avoiding any references * to the main table, they may be expensive. */ uri = entry->index->source; else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. */ size = strlen(cjoin->table->iface.name) + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->iface.name)); uri = uribuf->data; } WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; if ((end = &entry->ends[0]) < endmax) { if (F_ISSET(end, WT_CURJOIN_END_GT) || WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) { WT_ERR(__wt_cursor_dup_position(end->cursor, c)); if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) skip = 1; } else if (F_ISSET(end, WT_CURJOIN_END_LT)) { if ((ret = c->next(c)) == WT_NOTFOUND) goto done; WT_ERR(ret); } else WT_PANIC_ERR(session, EINVAL, "fatal error in join cursor position state"); } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); entry->stats.iterated++; if (entry->index != NULL) { /* * Repack so it's comparable to the * reference endpoints. */ WT_ERR(__wt_struct_repack(session, c->key_format, (entry->repack_format != NULL ? entry->repack_format : entry->index->idxkey_format), &c->key, &curkey)); } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) { /* if condition satisfied, insert immediately */ switch (WT_CURJOIN_END_RANGE(end)) { case WT_CURJOIN_END_EQ: if (cmp == 0) goto insert; break; case WT_CURJOIN_END_GT: if (cmp > 0) { /* skip this check next time */ skip = entry->ends_next; goto insert; } break; case WT_CURJOIN_END_GE: if (cmp >= 0) goto insert; break; case WT_CURJOIN_END_LT: if (cmp < 0) goto insert; break; case WT_CURJOIN_END_LE: if (cmp <= 0) goto insert; break; } } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; if (cmp > 0) { if (F_ISSET(end, WT_CURJOIN_END_GT)) skip = 1; else goto done; } } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto done; } } /* * Either it's a disjunction that hasn't satisfied any * condition, or it's a conjunction that has satisfied all * conditions. */ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) goto advance; insert: if (entry->index != NULL) { curvalue.data = (unsigned char *)curkey.data + curkey.size; WT_ASSERT(session, c->key.size > curkey.size); curvalue.size = c->key.size - curkey.size; } else WT_ERR(c->get_key(c, &curvalue)); __wt_bloom_insert(bloom, &curvalue); entry->stats.bloom_insert++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; } done: WT_ERR_NOTFOUND_OK(ret); err: if (c != NULL) WT_TRET(c->close(c)); __wt_scr_free(session, &uribuf); return (ret); }
/* * __curindex_search_near -- * WT_CURSOR->search_near method for index cursors. */ static int __curindex_search_near(WT_CURSOR *cursor, int *exact) { WT_CURSOR *child; WT_CURSOR_INDEX *cindex; WT_DECL_RET; WT_ITEM found_key; WT_SESSION_IMPL *session; int cmp; cindex = (WT_CURSOR_INDEX *)cursor; child = cindex->child; JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL); /* * We are searching using the application-specified key, which * (usually) doesn't contain the primary key, so it is just a prefix of * any matching index key. That said, if there is an exact match, we * want to find the first matching index entry and set exact equal to * zero. * * Do a search_near, and if we find an entry that is too small, step to * the next one. In the unlikely event of a search past the end of the * tree, go back to the last key. */ __wt_cursor_set_raw_key(child, &cursor->key); WT_ERR(child->search_near(child, &cmp)); if (cmp < 0) { if ((ret = child->next(child)) == WT_NOTFOUND) ret = child->prev(child); WT_ERR(ret); } /* * We expect partial matches, and want the smallest record with a key * greater than or equal to the search key. * * If the found key starts with the search key, we indicate a match by * setting exact equal to zero. * * The compare function expects application-supplied keys to come first * so we flip the sign of the result to match what callers expect. */ found_key = child->key; if (found_key.size > cursor->key.size) { /* * Custom collators expect to see complete keys, pass an item * containing all the visible fields so it unpacks correctly. */ if (cindex->index->collator != NULL) WT_ERR(__wt_struct_repack(session, cindex->child->key_format, cindex->iface.key_format, &child->key, &found_key)); else found_key.size = cursor->key.size; } WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, exact)); *exact = -*exact; WT_ERR(__curindex_move(cindex)); if (0) { err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } API_END_RET(session, ret); }
/* * __curindex_search -- * WT_CURSOR->search method for index cursors. */ static int __curindex_search(WT_CURSOR *cursor) { WT_CURSOR *child; WT_CURSOR_INDEX *cindex; WT_DECL_RET; WT_ITEM found_key; WT_SESSION_IMPL *session; int cmp; cindex = (WT_CURSOR_INDEX *)cursor; child = cindex->child; JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL); /* * We are searching using the application-specified key, which * (usually) doesn't contain the primary key, so it is just a prefix of * any matching index key. Do a search_near, step to the next entry if * we land on one that is too small, then check that the prefix * matches. */ __wt_cursor_set_raw_key(child, &cursor->key); WT_ERR(child->search_near(child, &cmp)); if (cmp < 0) WT_ERR(child->next(child)); /* * We expect partial matches, and want the smallest record with a key * greater than or equal to the search key. * * If the key we find is shorter than the search key, it can't possibly * match. * * The only way for the key to be exactly equal is if there is an index * on the primary key, because otherwise the primary key columns will * be appended to the index key, but we don't disallow that (odd) case. */ found_key = child->key; if (found_key.size < cursor->key.size) WT_ERR(WT_NOTFOUND); /* * Custom collators expect to see complete keys, pass an item containing * all the visible fields so it unpacks correctly. */ if (cindex->index->collator != NULL && !F_ISSET(cursor, WT_CURSTD_RAW_SEARCH)) WT_ERR(__wt_struct_repack(session, child->key_format, cindex->iface.key_format, &child->key, &found_key)); else found_key.size = cursor->key.size; WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, &cmp)); if (cmp != 0) { ret = WT_NOTFOUND; goto err; } WT_ERR(__curindex_move(cindex)); if (0) { err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } API_END_RET(session, ret); }
/* * __curjoin_init_bloom -- * Populate Bloom filters */ static int __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) { WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_INDEX *cindex; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_RET; WT_DECL_ITEM(uribuf); WT_ITEM curkey, curvalue, *k; WT_TABLE *maintable; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; const char *mainkey_str, *p; void *allocbuf; size_t mainkey_len, size; u_int i; int cmp, skip; c = NULL; allocbuf = NULL; skip = 0; if (entry->index != NULL) { /* * Open a cursor having a projection of the keys of the * index we're comparing against. Open it raw, we're * going to compare it to the raw keys of the * reference cursors. */ maintable = ((WT_CURSOR_TABLE *)entry->main)->table; mainkey_str = maintable->colconf.str + 1; for (p = mainkey_str, i = 0; p != NULL && i < maintable->nkey_columns; i++) p = strchr(p + 1, ','); WT_ASSERT(session, p != 0); mainkey_len = WT_PTRDIFF(p, mainkey_str); size = strlen(entry->index->name) + mainkey_len + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s(%.*s)", entry->index->name, (int)mainkey_len, mainkey_str)); } else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. */ size = strlen(cjoin->table->name) + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->name)); } WT_ERR(__wt_open_cursor( session, uribuf->data, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; if ((end = &entry->ends[0]) < endmax && F_ISSET(end, WT_CURJOIN_END_GE)) { WT_ERR(__wt_cursor_dup_position(end->cursor, c)); if (end->flags == WT_CURJOIN_END_GE) skip = 1; } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); if (entry->index != NULL) { cindex = (WT_CURSOR_INDEX *)c; if (cindex->index->extractor == NULL) { /* * Repack so it's comparable to the * reference endpoints. */ k = &cindex->child->key; WT_ERR(__wt_struct_repack(session, cindex->child->key_format, entry->main->value_format, k, &curkey, &allocbuf)); } else curkey = cindex->child->key; } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; if (cmp > 0) { if (F_ISSET(end, WT_CURJOIN_END_GT)) skip = 1; else goto done; } } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto done; } } if (entry->index != NULL) WT_ERR(c->get_value(c, &curvalue)); else WT_ERR(c->get_key(c, &curvalue)); WT_ERR(__wt_bloom_insert(bloom, &curvalue)); entry->stats.actual_count++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; } done: WT_ERR_NOTFOUND_OK(ret); err: if (c != NULL) WT_TRET(c->close(c)); __wt_scr_free(session, &uribuf); __wt_free(session, allocbuf); return (ret); }