/* * __curjoin_iter_bump -- * Called to advance the iterator to the next endpoint, which may in turn * advance to the next entry. */ static int __curjoin_iter_bump(WT_CURSOR_JOIN_ITER *iter) { WT_CURSOR_JOIN_ENTRY *entry; WT_SESSION_IMPL *session; session = iter->session; iter->positioned = false; entry = iter->entry; if (entry->subjoin == NULL && iter->is_equal && ++iter->end_pos < iter->end_count) { WT_RET(__wt_cursor_dup_position( entry->ends[iter->end_pos].cursor, iter->cursor)); return (0); } iter->end_pos = iter->end_count = iter->end_skip = 0; if (entry->subjoin != NULL && entry->subjoin->iter != NULL) WT_RET(__curjoin_iter_close_all(entry->subjoin->iter)); if (++iter->entry_pos >= iter->entry_count) { iter->entry = NULL; return (0); } iter->entry = ++entry; if (entry->subjoin != NULL) { WT_RET(__curjoin_iter_init(session, entry->subjoin, &iter->child)); return (0); } WT_RET(__curjoin_iter_set_entry(iter, iter->entry_pos)); return (0); }
/* * __curjoin_entry_iter_init -- * Initialize an iteration for the index managed by a join entry. * */ static int __curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ITER **iterp) { WT_CURSOR *newcur; WT_CURSOR *to_dup; WT_DECL_RET; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; const char *def_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), NULL }; const char *uri, **config; char *uribuf; WT_CURSOR_JOIN_ITER *iter; size_t size; iter = NULL; uribuf = NULL; to_dup = entry->ends[0].cursor; uri = to_dup->uri; if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW)) config = &raw_cfg[0]; else config = &def_cfg[0]; if (cjoin->projection != NULL) { size = strlen(uri) + strlen(cjoin->projection) + 1; WT_ERR(__wt_calloc(session, size, 1, &uribuf)); snprintf(uribuf, size, "%s%s", uri, cjoin->projection); uri = uribuf; } WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config, &newcur)); WT_ERR(__wt_cursor_dup_position(to_dup, newcur)); WT_ERR(__wt_calloc_one(session, &iter)); iter->cjoin = cjoin; iter->session = session; iter->entry = entry; iter->cursor = newcur; iter->advance = false; *iterp = iter; if (0) { err: __wt_free(session, iter); } __wt_free(session, uribuf); return (ret); }
/* * __curjoin_entry_iter_reset -- * Reset an iteration to the starting point. * */ static int __curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter) { WT_DECL_RET; if (iter->advance) { WT_ERR(iter->cursor->reset(iter->cursor)); WT_ERR(__wt_cursor_dup_position( iter->cjoin->entries[0].ends[0].cursor, iter->cursor)); iter->advance = false; iter->entry->stats.actual_count = 0; } err: return (ret); }
/* * __curjoin_init_bloom -- * Populate Bloom filters */ static int __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) { WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_ITEM(uribuf); WT_DECL_RET; WT_ITEM curkey, curvalue; size_t size; u_int skip; int cmp; const char *uri; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; c = NULL; skip = 0; if (entry->index != NULL) /* * Open the raw index. We're avoiding any references * to the main table, they may be expensive. */ uri = entry->index->source; else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. */ size = strlen(cjoin->table->iface.name) + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->iface.name)); uri = uribuf->data; } WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; if ((end = &entry->ends[0]) < endmax) { if (F_ISSET(end, WT_CURJOIN_END_GT) || WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) { WT_ERR(__wt_cursor_dup_position(end->cursor, c)); if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) skip = 1; } else if (F_ISSET(end, WT_CURJOIN_END_LT)) { if ((ret = c->next(c)) == WT_NOTFOUND) goto done; WT_ERR(ret); } else WT_PANIC_ERR(session, EINVAL, "fatal error in join cursor position state"); } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); entry->stats.iterated++; if (entry->index != NULL) { /* * Repack so it's comparable to the * reference endpoints. */ WT_ERR(__wt_struct_repack(session, c->key_format, (entry->repack_format != NULL ? entry->repack_format : entry->index->idxkey_format), &c->key, &curkey)); } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) { /* if condition satisfied, insert immediately */ switch (WT_CURJOIN_END_RANGE(end)) { case WT_CURJOIN_END_EQ: if (cmp == 0) goto insert; break; case WT_CURJOIN_END_GT: if (cmp > 0) { /* skip this check next time */ skip = entry->ends_next; goto insert; } break; case WT_CURJOIN_END_GE: if (cmp >= 0) goto insert; break; case WT_CURJOIN_END_LT: if (cmp < 0) goto insert; break; case WT_CURJOIN_END_LE: if (cmp <= 0) goto insert; break; } } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; if (cmp > 0) { if (F_ISSET(end, WT_CURJOIN_END_GT)) skip = 1; else goto done; } } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto done; } } /* * Either it's a disjunction that hasn't satisfied any * condition, or it's a conjunction that has satisfied all * conditions. */ if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) goto advance; insert: if (entry->index != NULL) { curvalue.data = (unsigned char *)curkey.data + curkey.size; WT_ASSERT(session, c->key.size > curkey.size); curvalue.size = c->key.size - curkey.size; } else WT_ERR(c->get_key(c, &curvalue)); __wt_bloom_insert(bloom, &curvalue); entry->stats.bloom_insert++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; } done: WT_ERR_NOTFOUND_OK(ret); err: if (c != NULL) WT_TRET(c->close(c)); __wt_scr_free(session, &uribuf); return (ret); }
/* * __curjoin_iter_set_entry -- * Set the current entry for an iterator. */ static int __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos) { WT_CURSOR *c, *to_dup; WT_CURSOR_JOIN *cjoin, *topjoin; WT_CURSOR_JOIN_ENTRY *entry; WT_DECL_RET; WT_SESSION_IMPL *session; size_t size; const char *raw_cfg[] = { WT_CONFIG_BASE( iter->session, WT_SESSION_open_cursor), "raw", NULL }; const char *def_cfg[] = { WT_CONFIG_BASE( iter->session, WT_SESSION_open_cursor), NULL }; const char **config; char *uri; session = iter->session; cjoin = iter->cjoin; uri = NULL; entry = iter->entry = &cjoin->entries[entry_pos]; iter->positioned = false; iter->entry_pos = entry_pos; iter->end_pos = 0; iter->is_equal = (entry->ends_next == 1 && WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ); iter->end_skip = (entry->ends_next > 0 && WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_GE) ? 1 : 0; iter->end_count = WT_MIN(1, entry->ends_next); if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) { iter->entry_count = cjoin->entries_next; if (iter->is_equal) iter->end_count = entry->ends_next; } else iter->entry_count = 1; WT_ASSERT(iter->session, iter->entry_pos < iter->entry_count); entry->stats.iterated = 0; if (entry->subjoin == NULL) { for (topjoin = iter->cjoin; topjoin->parent != NULL; topjoin = topjoin->parent) ; to_dup = entry->ends[0].cursor; if (F_ISSET((WT_CURSOR *)topjoin, WT_CURSTD_RAW)) config = &raw_cfg[0]; else config = &def_cfg[0]; size = strlen(to_dup->internal_uri) + 3; WT_ERR(__wt_calloc(session, size, 1, &uri)); WT_ERR(__wt_snprintf(uri, size, "%s()", to_dup->internal_uri)); if ((c = iter->cursor) == NULL || strcmp(c->uri, uri) != 0) { iter->cursor = NULL; if (c != NULL) WT_ERR(c->close(c)); WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)topjoin, config, &iter->cursor)); } WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor)); } else if (iter->cursor != NULL) { WT_ERR(iter->cursor->close(iter->cursor)); iter->cursor = NULL; } err: __wt_free(session, uri); return (ret); }
/* * __curjoin_init_bloom -- * Populate Bloom filters */ static int __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) { WT_COLLATOR *collator; WT_CURSOR *c; WT_CURSOR_INDEX *cindex; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_RET; WT_DECL_ITEM(uribuf); WT_ITEM curkey, curvalue, *k; WT_TABLE *maintable; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; const char *mainkey_str, *p; void *allocbuf; size_t mainkey_len, size; u_int i; int cmp, skip; c = NULL; allocbuf = NULL; skip = 0; if (entry->index != NULL) { /* * Open a cursor having a projection of the keys of the * index we're comparing against. Open it raw, we're * going to compare it to the raw keys of the * reference cursors. */ maintable = ((WT_CURSOR_TABLE *)entry->main)->table; mainkey_str = maintable->colconf.str + 1; for (p = mainkey_str, i = 0; p != NULL && i < maintable->nkey_columns; i++) p = strchr(p + 1, ','); WT_ASSERT(session, p != 0); mainkey_len = WT_PTRDIFF(p, mainkey_str); size = strlen(entry->index->name) + mainkey_len + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s(%.*s)", entry->index->name, (int)mainkey_len, mainkey_str)); } else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. */ size = strlen(cjoin->table->name) + 3; WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->name)); } WT_ERR(__wt_open_cursor( session, uribuf->data, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; if ((end = &entry->ends[0]) < endmax && F_ISSET(end, WT_CURJOIN_END_GE)) { WT_ERR(__wt_cursor_dup_position(end->cursor, c)); if (end->flags == WT_CURJOIN_END_GE) skip = 1; } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); if (entry->index != NULL) { cindex = (WT_CURSOR_INDEX *)c; if (cindex->index->extractor == NULL) { /* * Repack so it's comparable to the * reference endpoints. */ k = &cindex->child->key; WT_ERR(__wt_struct_repack(session, cindex->child->key_format, entry->main->value_format, k, &curkey, &allocbuf)); } else curkey = cindex->child->key; } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; if (cmp > 0) { if (F_ISSET(end, WT_CURJOIN_END_GT)) skip = 1; else goto done; } } else { if (cmp > 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto done; } } if (entry->index != NULL) WT_ERR(c->get_value(c, &curvalue)); else WT_ERR(c->get_key(c, &curvalue)); WT_ERR(__wt_bloom_insert(bloom, &curvalue)); entry->stats.actual_count++; advance: if ((ret = c->next(c)) == WT_NOTFOUND) break; } done: WT_ERR_NOTFOUND_OK(ret); err: if (c != NULL) WT_TRET(c->close(c)); __wt_scr_free(session, &uribuf); __wt_free(session, allocbuf); return (ret); }