/* * __wt_lsm_tree_setup_chunk -- * Initialize a chunk of an LSM tree. */ int __wt_lsm_tree_setup_chunk( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) { WT_ITEM buf; const char *cfg[] = { WT_CONFIG_BASE(session, session_drop), "force", NULL }; int exists; WT_CLEAR(buf); WT_RET(__wt_epoch(session, &chunk->create_ts)); WT_RET(__wt_lsm_tree_chunk_name(session, lsm_tree, chunk->id, &buf)); chunk->uri = __wt_buf_steal(session, &buf, NULL); /* * If the underlying file exists, drop the chunk first - there may be * some content hanging over from an aborted merge or checkpoint. * * Don't do this for the very first chunk: we are called during * WT_SESSION::create, and doing a drop inside there does interesting * things with handle locks and metadata tracking. It can never have * been the result of an interrupted merge, anyway. */ if (chunk->id > 1) { WT_RET(__wt_exist( session, chunk->uri + strlen("file:"), &exists)); if (exists) WT_RET(__wt_schema_drop(session, chunk->uri, cfg)); } return (__wt_schema_create(session, chunk->uri, lsm_tree->file_config)); }
/* * __wt_lsm_tree_setup_chunk -- * Initialize a chunk of an LSM tree. */ int __wt_lsm_tree_setup_chunk( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) { WT_ITEM buf; const char *cfg[] = API_CONF_DEFAULTS(session, drop, "force"); WT_CLEAR(buf); WT_RET(__wt_lsm_tree_chunk_name(session, lsm_tree, chunk->id, &buf)); chunk->uri = __wt_buf_steal(session, &buf, NULL); /* * Drop the chunk first - there may be some content hanging over from * an aborted merge or checkpoint. * * Don't do this for the very first chunk: we are called during * WT_SESSION::create, and doing a drop inside there does interesting * things with handle locks and metadata tracking. It can never have * been the result of an interrupted merge, anyway. */ if (chunk->id > 1) WT_RET(__wt_schema_drop(session, chunk->uri, cfg)); return (__wt_schema_create(session, chunk->uri, lsm_tree->file_config)); }
/* * __wt_lsm_tree_rename -- * Rename an LSM tree. */ int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *oldname, const char *newname, const char *cfg[]) { WT_DECL_RET; WT_ITEM buf; WT_LSM_CHUNK *chunk; WT_LSM_TREE *lsm_tree; const char *old; u_int i; int locked; old = NULL; WT_CLEAR(buf); locked = 0; /* Get the LSM tree. */ WT_RET(__wt_lsm_tree_get(session, oldname, 1, &lsm_tree)); /* Shut down the LSM worker. */ WT_ERR(__lsm_tree_close(session, lsm_tree)); /* Prevent any new opens. */ WT_ERR(__wt_try_writelock(session, lsm_tree->rwlock)); locked = 1; /* Set the new name. */ WT_ERR(__lsm_tree_set_name(session, lsm_tree, newname)); /* Rename the chunks. */ for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; old = chunk->uri; chunk->uri = NULL; WT_ERR(__wt_lsm_tree_chunk_name( session, lsm_tree, chunk->id, &buf)); chunk->uri = __wt_buf_steal(session, &buf, NULL); WT_ERR(__wt_schema_rename(session, old, chunk->uri, cfg)); __wt_free(session, old); if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { old = chunk->bloom_uri; chunk->bloom_uri = NULL; WT_ERR(__wt_lsm_tree_bloom_name( session, lsm_tree, chunk->id, &buf)); chunk->bloom_uri = __wt_buf_steal(session, &buf, NULL); F_SET(chunk, WT_LSM_CHUNK_BLOOM); WT_ERR(__wt_schema_rename( session, old, chunk->uri, cfg)); __wt_free(session, old); } } ret = __wt_rwunlock(session, lsm_tree->rwlock); locked = 0; if (ret == 0) ret = __wt_lsm_meta_write(session, lsm_tree); if (ret == 0) ret = __wt_metadata_remove(session, oldname); err: if (locked) WT_TRET(__wt_rwunlock(session, lsm_tree->rwlock)); if (old != NULL) __wt_free(session, old); /* * Discard this LSM tree structure. The first operation on the renamed * tree will create a new one. */ WT_TRET(__lsm_tree_discard(session, lsm_tree)); return (ret); }
/* * __wt_lsm_tree_create -- * Create an LSM tree structure for the given name. */ int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, int exclusive, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_TREE *lsm_tree; const char *cfg[] = API_CONF_DEFAULTS(session, create, config); const char *tmpconfig; /* If the tree is open, it already exists. */ if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) { __wt_lsm_tree_release(session, lsm_tree); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); /* * If the tree has metadata, it already exists. * * !!! * Use a local variable: we don't care what the existing configuration * is, but we don't want to overwrite the real config. */ if (__wt_metadata_read(session, uri, &tmpconfig) == 0) { __wt_free(session, tmpconfig); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); WT_RET(__wt_config_gets(session, cfg, "key_format", &cval)); if (WT_STRING_MATCH("r", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "LSM trees cannot be configured as column stores"); WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); WT_RET(__lsm_tree_set_name(session, lsm_tree, uri)); WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->key_format)); WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->value_format)); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom", &cval)); FLD_SET(lsm_tree->bloom, (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_newest", &cval)); if (cval.val != 0) FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_oldest", &cval)); if (cval.val != 0) FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST) || FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))) WT_ERR_MSG(session, EINVAL, "Bloom filters can only be created on newest and oldest " "chunks if bloom filters are enabled"); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_config", &cval)); if (cval.type == ITEM_STRUCT) { cval.str++; cval.len -= 2; } WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_bit_count", &cval)); lsm_tree->bloom_bit_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_hash_count", &cval)); lsm_tree->bloom_hash_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_chunk_size", &cval)); lsm_tree->chunk_size = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_max", &cval)); lsm_tree->merge_max = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_threads", &cval)); lsm_tree->merge_threads = (uint32_t)cval.val; /* Sanity check that api_data.py is in sync with lsm.h */ WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS); WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "%s,key_format=u,value_format=u", config)); lsm_tree->file_config = __wt_buf_steal(session, buf, NULL); /* Create the first chunk and flush the metadata. */ WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); /* Discard our partially populated handle. */ ret = __lsm_tree_discard(session, lsm_tree); lsm_tree = NULL; /* * Open our new tree and add it to the handle cache. Don't discard on * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ if (ret == 0) ret = __lsm_tree_open(session, uri, &lsm_tree); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); if (0) { err: WT_TRET(__lsm_tree_discard(session, lsm_tree)); } __wt_scr_free(&buf); return (ret); }
/* * __wt_schema_open_colgroups -- * Open the column groups for a table. */ int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) { WT_COLGROUP *colgroup; WT_CONFIG cparser; WT_CONFIG_ITEM ckey, cval; WT_DECL_RET; WT_DECL_ITEM(buf); const char *cgconfig; u_int i; if (table->cg_complete) return (0); WT_RET(__wt_scr_alloc(session, 0, &buf)); colgroup = NULL; WT_RET(__wt_config_subinit(session, &cparser, &table->cgconf)); /* Open each column group. */ for (i = 0; i < WT_COLGROUPS(table); i++) { if (table->ncolgroups > 0) WT_ERR(__wt_config_next(&cparser, &ckey, &cval)); else WT_CLEAR(ckey); /* * Always open from scratch: we may have failed part of the way * through opening a table, or column groups may have changed. */ if (table->cgroups[i] != NULL) { __wt_schema_destroy_colgroup( session, table->cgroups[i]); table->cgroups[i] = NULL; } WT_ERR(__wt_schema_colgroup_name(session, table, ckey.str, ckey.len, buf)); if ((ret = __wt_metadata_search( session, buf->data, &cgconfig)) != 0) { /* It is okay if the table is incomplete. */ if (ret == WT_NOTFOUND) ret = 0; goto err; } WT_ERR(__wt_calloc_def(session, 1, &colgroup)); colgroup->name = __wt_buf_steal(session, buf, NULL); colgroup->config = cgconfig; WT_ERR(__wt_config_getones(session, colgroup->config, "columns", &colgroup->colconf)); WT_ERR(__wt_config_getones( session, colgroup->config, "source", &cval)); WT_ERR(__wt_buf_fmt(session, buf, "%.*s", (int)cval.len, cval.str)); colgroup->source = __wt_buf_steal(session, buf, NULL); table->cgroups[i] = colgroup; colgroup = NULL; } if (!table->is_simple) { WT_ERR(__wt_table_check(session, table)); WT_ERR(__wt_struct_plan(session, table, table->colconf.str, table->colconf.len, 1, buf)); table->plan = __wt_buf_steal(session, buf, NULL); } table->cg_complete = 1; err: __wt_scr_free(&buf); if (colgroup != NULL) __wt_schema_destroy_colgroup(session, colgroup); return (ret); }
/* * __wt_schema_open_table -- * Open a named table. */ int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, WT_TABLE **tablep) { WT_CONFIG cparser; WT_CONFIG_ITEM ckey, cval; WT_CURSOR *cursor; WT_DECL_RET; WT_ITEM buf; WT_TABLE *table; const char *tconfig; char *tablename; cursor = NULL; table = NULL; WT_CLEAR(buf); WT_RET(__wt_buf_fmt(session, &buf, "table:%.*s", (int)namelen, name)); tablename = __wt_buf_steal(session, &buf, NULL); WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, tablename); WT_ERR(cursor->search(cursor)); WT_ERR(cursor->get_value(cursor, &tconfig)); WT_ERR(__wt_calloc_def(session, 1, &table)); table->name = tablename; tablename = NULL; WT_ERR(__wt_config_getones(session, tconfig, "columns", &cval)); WT_ERR(__wt_config_getones(session, tconfig, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->key_format)); WT_ERR(__wt_config_getones(session, tconfig, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &table->value_format)); WT_ERR(__wt_strdup(session, tconfig, &table->config)); /* Point to some items in the copy to save re-parsing. */ WT_ERR(__wt_config_getones(session, table->config, "columns", &table->colconf)); /* * Count the number of columns: tables are "simple" if the columns * are not named. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->colconf)); table->is_simple = 1; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) table->is_simple = 0; if (ret != WT_NOTFOUND) goto err; /* Check that the columns match the key and value formats. */ if (!table->is_simple) WT_ERR(__wt_schema_colcheck(session, table->key_format, table->value_format, &table->colconf, &table->nkey_columns, NULL)); WT_ERR(__wt_config_getones(session, table->config, "colgroups", &table->cgconf)); /* Count the number of column groups. */ WT_ERR(__wt_config_subinit(session, &cparser, &table->cgconf)); table->ncolgroups = 0; while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) ++table->ncolgroups; if (ret != WT_NOTFOUND) goto err; WT_ERR(__wt_calloc_def(session, WT_COLGROUPS(table), &table->cgroups)); WT_ERR(__wt_schema_open_colgroups(session, table)); *tablep = table; if (0) { err: if (table != NULL) __wt_schema_destroy_table(session, table); } if (cursor != NULL) WT_TRET(cursor->close(cursor)); __wt_free(session, tablename); return (ret); }
/* * ___open_index -- * Open an index. */ static int __open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx) { WT_CONFIG colconf; WT_CONFIG_ITEM ckey, cval; WT_DECL_ITEM(buf); WT_DECL_ITEM(plan); WT_DECL_RET; u_int cursor_key_cols, i; WT_ERR(__wt_scr_alloc(session, 0, &buf)); /* Get the data source from the index config. */ WT_ERR(__wt_config_getones(session, idx->config, "source", &cval)); WT_ERR(__wt_buf_fmt( session, buf, "%.*s", (int)cval.len, cval.str)); idx->source = __wt_buf_steal(session, buf, NULL); idx->need_value = WT_PREFIX_MATCH(idx->source, "lsm:"); WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval)); WT_ERR(__wt_buf_fmt( session, buf, "%.*s", (int)cval.len, cval.str)); idx->key_format = __wt_buf_steal(session, buf, NULL); /* * The key format for an index is somewhat subtle: the application * specifies a set of columns that it will use for the key, but the * engine usually adds some hidden columns in order to derive the * primary key. These hidden columns are part of the file's key. * * The file's key_format is stored persistently, we need to calculate * the index cursor key format (which will usually omit some of those * keys). */ WT_ERR(__wt_config_getones( session, idx->config, "columns", &idx->colconf)); /* Start with the declared index columns. */ WT_ERR(__wt_config_subinit(session, &colconf, &idx->colconf)); cursor_key_cols = 0; while ((ret = __wt_config_next(&colconf, &ckey, &cval)) == 0) { WT_ERR(__wt_buf_catfmt( session, buf, "%.*s,", (int)ckey.len, ckey.str)); ++cursor_key_cols; } if (ret != 0 && ret != WT_NOTFOUND) goto err; /* * Now add any primary key columns from the table that are not * already part of the index key. */ WT_ERR(__wt_config_subinit(session, &colconf, &table->colconf)); for (i = 0; i < table->nkey_columns && (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0; i++) { /* * If the primary key column is already in the secondary key, * don't add it again. */ if (__wt_config_subgetraw( session, &idx->colconf, &ckey, &cval) == 0) continue; WT_ERR(__wt_buf_catfmt( session, buf, "%.*s,", (int)ckey.len, ckey.str)); } if (ret != 0 && ret != WT_NOTFOUND) goto err; WT_ERR(__wt_scr_alloc(session, 0, &plan)); WT_ERR(__wt_struct_plan(session, table, buf->data, buf->size, 0, plan)); idx->key_plan = __wt_buf_steal(session, plan, NULL); /* Set up the cursor key format (the visible columns). */ WT_ERR(__wt_buf_init(session, buf, 0)); WT_ERR(__wt_struct_truncate(session, idx->key_format, cursor_key_cols, buf)); idx->idxkey_format = __wt_buf_steal(session, buf, NULL); /* By default, index cursor values are the table value columns. */ /* TODO Optimize to use index columns in preference to table lookups. */ WT_ERR(__wt_struct_plan(session, table, table->colconf.str, table->colconf.len, 1, plan)); idx->value_plan = __wt_buf_steal(session, plan, NULL); err: __wt_scr_free(&buf); __wt_scr_free(&plan); return (ret); }
/* * __wt_lsm_tree_create -- * Create an LSM tree structure for the given name. */ int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, int exclusive, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_TREE *lsm_tree; const char *cfg[] = { WT_CONFIG_BASE(session, session_create), config, NULL }; const char *tmpconfig; /* If the tree is open, it already exists. */ if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) { __wt_lsm_tree_release(session, lsm_tree); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); /* * If the tree has metadata, it already exists. * * !!! * Use a local variable: we don't care what the existing configuration * is, but we don't want to overwrite the real config. */ if (__wt_metadata_search(session, uri, &tmpconfig) == 0) { __wt_free(session, tmpconfig); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); WT_RET(__wt_config_gets(session, cfg, "key_format", &cval)); if (WT_STRING_MATCH("r", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "LSM trees cannot be configured as column stores"); WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->key_format)); WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->value_format)); WT_ERR(__wt_config_gets(session, cfg, "collator", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->collator_name)); WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval)); if (cval.val) F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); else F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval)); FLD_SET(lsm_tree->bloom, (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval)); if (cval.val != 0) FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)) WT_ERR_MSG(session, EINVAL, "Bloom filters can only be created on newest and oldest " "chunks if bloom filters are enabled"); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval)); if (cval.type == WT_CONFIG_ITEM_STRUCT) { cval.str++; cval.len -= 2; } WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval)); lsm_tree->bloom_bit_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval)); lsm_tree->bloom_hash_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval)); lsm_tree->chunk_max = (uint64_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval)); lsm_tree->chunk_size = (uint64_t)cval.val; if (lsm_tree->chunk_size > lsm_tree->chunk_max) WT_ERR_MSG(session, EINVAL, "Chunk size (chunk_size) must be smaller than or equal to " "the maximum chunk size (chunk_max)"); WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval)); lsm_tree->merge_max = (uint32_t)cval.val; lsm_tree->merge_min = lsm_tree->merge_max / 2; WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_threads", &cval)); lsm_tree->merge_threads = (uint32_t)cval.val; /* Sanity check that api_data.py is in sync with lsm.h */ WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS); /* * Set up the config for each chunk. If possible, avoid high latencies * from fsync by flushing the cache every 8MB (will be overridden by * any application setting). */ tmpconfig = ""; #ifdef HAVE_SYNC_FILE_RANGE if (!S2C(session)->direct_io) tmpconfig = "os_cache_dirty_max=8MB,"; #endif WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "%s%s,key_format=u,value_format=u", tmpconfig, config)); lsm_tree->file_config = __wt_buf_steal(session, buf, NULL); /* Create the first chunk and flush the metadata. */ WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); /* Discard our partially populated handle. */ ret = __lsm_tree_discard(session, lsm_tree); lsm_tree = NULL; /* * Open our new tree and add it to the handle cache. Don't discard on * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ if (ret == 0) ret = __lsm_tree_open(session, uri, &lsm_tree); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); if (0) { err: WT_TRET(__lsm_tree_discard(session, lsm_tree)); } __wt_scr_free(&buf); return (ret); }
/* * __lsm_bloom_create -- * Create a bloom filter for a chunk of the LSM tree that has been * checkpointed but not yet been merged. */ static int __lsm_bloom_create(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk, u_int chunk_off) { WT_BLOOM *bloom; WT_CURSOR *src; WT_DECL_RET; WT_ITEM buf, key; WT_SESSION *wt_session; uint64_t insert_count; int exist; /* * Normally, the Bloom URI is populated when the chunk struct is * allocated. After an open, however, it may not have been. * Deal with that here. */ if (chunk->bloom_uri == NULL) { WT_CLEAR(buf); WT_RET(__wt_lsm_tree_bloom_name( session, lsm_tree, chunk->id, &buf)); chunk->bloom_uri = __wt_buf_steal(session, &buf, NULL); } /* * Drop the bloom filter first - there may be some content hanging over * from an aborted merge or checkpoint. */ wt_session = &session->iface; WT_RET(__wt_exist(session, chunk->bloom_uri + strlen("file:"), &exist)); if (exist) WT_RET(wt_session->drop(wt_session, chunk->bloom_uri, "force")); bloom = NULL; /* * This is merge-like activity, and we don't want compacts to give up * because we are creating a bunch of bloom filters before merging. */ ++lsm_tree->merge_progressing; WT_RET(__wt_bloom_create(session, chunk->bloom_uri, lsm_tree->bloom_config, chunk->count, lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count, &bloom)); /* Open a special merge cursor just on this chunk. */ WT_ERR(__wt_open_cursor(session, lsm_tree->name, NULL, NULL, &src)); F_SET(src, WT_CURSTD_RAW); WT_ERR(__wt_clsm_init_merge(src, chunk_off, chunk->id, 1)); F_SET(session, WT_SESSION_NO_CACHE); for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) { WT_ERR(src->get_key(src, &key)); WT_ERR(__wt_bloom_insert(bloom, &key)); } WT_ERR_NOTFOUND_OK(ret); WT_TRET(src->close(src)); WT_TRET(__wt_bloom_finalize(bloom)); WT_ERR(ret); F_CLR(session, WT_SESSION_NO_CACHE); /* Load the new Bloom filter into cache. */ WT_CLEAR(key); WT_ERR_NOTFOUND_OK(__wt_bloom_get(bloom, &key)); WT_VERBOSE_ERR(session, lsm, "LSM worker created bloom filter %s. " "Expected %" PRIu64 " items, got %" PRIu64, chunk->bloom_uri, chunk->count, insert_count); /* Ensure the bloom filter is in the metadata. */ WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1)); F_SET_ATOMIC(chunk, WT_LSM_CHUNK_BLOOM); ret = __wt_lsm_meta_write(session, lsm_tree); ++lsm_tree->dsk_gen; WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); if (ret != 0) WT_ERR_MSG(session, ret, "LSM bloom worker metadata write"); err: if (bloom != NULL) WT_TRET(__wt_bloom_close(bloom)); F_CLR(session, WT_SESSION_NO_CACHE); return (ret); }