/* * __curindex_open_colgroups -- * Open cursors on the column groups required for an index cursor. */ static int __curindex_open_colgroups( WT_SESSION_IMPL *session, WT_CURSOR_INDEX *cindex, const char *cfg_arg[]) { WT_TABLE *table; WT_CURSOR **cp; u_long arg; /* Child cursors are opened with dump disabled. */ const char *cfg[] = { cfg_arg[0], cfg_arg[1], "dump=\"\"", NULL }; char *proj; size_t cgcnt; table = cindex->table; cgcnt = WT_COLGROUPS(table); WT_RET(__wt_calloc_def(session, cgcnt, &cindex->cg_needvalue)); WT_RET(__wt_calloc_def(session, cgcnt, &cp)); cindex->cg_cursors = cp; /* Work out which column groups we need. */ for (proj = (char *)cindex->value_plan; *proj != '\0'; proj++) { arg = strtoul(proj, &proj, 10); if (*proj == WT_PROJ_VALUE) cindex->cg_needvalue[arg] = 1; if ((*proj != WT_PROJ_KEY && *proj != WT_PROJ_VALUE) || cp[arg] != NULL) continue; WT_RET(__wt_open_cursor(session, table->cgroups[arg]->source, &cindex->iface, cfg, &cp[arg])); } return (0); }
/* * __wt_schema_create_final -- * Create a single configuration line from a set of configuration strings, * including all of the defaults declared for a session.create, and stripping * any configuration strings that don't belong in a session.create. Here for * the wt dump command utility, which reads a set of configuration strings and * needs to add in the defaults and then collapse them into single string for * a subsequent load. */ int __wt_schema_create_final( WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret) { WT_DECL_RET; u_int i; const char **cfg; /* * Count the entries in the original, * Allocate a copy with the defaults as the first entry, * Collapse the whole thing into a single configuration string (which * also strips any entries that don't appear in the first entry). */ for (i = 0; cfg_arg[i] != NULL; ++i) ; WT_RET(__wt_calloc_def(session, i + 2, &cfg)); cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_create); for (i = 0; cfg_arg[i] != NULL; ++i) cfg[i + 1] = cfg_arg[i]; cfg[i + 1] = NULL; ret = __wt_config_collapse(session, cfg, value_ret); __wt_free(session, cfg); return (ret); }
/* * __bloom_init -- * Allocate a WT_BLOOM handle. */ static int __bloom_init(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_BLOOM **bloomp) { WT_BLOOM *bloom; WT_DECL_RET; size_t len; *bloomp = NULL; WT_RET(__wt_calloc_one(session, &bloom)); WT_ERR(__wt_strdup(session, uri, &bloom->uri)); len = strlen(WT_BLOOM_TABLE_CONFIG) + 2; if (config != NULL) len += strlen(config); WT_ERR(__wt_calloc_def(session, len, &bloom->config)); /* Add the standard config at the end, so it overrides user settings. */ (void)snprintf(bloom->config, len, "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG); bloom->session = session; *bloomp = bloom; return (0); err: __wt_free(session, bloom->uri); __wt_free(session, bloom->config); __wt_free(session, bloom->bitstring); __wt_free(session, bloom); return (ret); }
/* * __conn_add_compressor -- * WT_CONNECTION->add_compressor method. */ static int __conn_add_compressor(WT_CONNECTION *wt_conn, const char *name, WT_COMPRESSOR *compressor, const char *config) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_NAMED_COMPRESSOR *ncomp; WT_SESSION_IMPL *session; WT_UNUSED(name); WT_UNUSED(compressor); ncomp = NULL; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, add_compressor, config, cfg); WT_UNUSED(cfg); WT_ERR(__wt_calloc_def(session, 1, &ncomp)); WT_ERR(__wt_strdup(session, name, &ncomp->name)); ncomp->compressor = compressor; __wt_spin_lock(session, &conn->api_lock); TAILQ_INSERT_TAIL(&conn->compqh, ncomp, q); ncomp = NULL; __wt_spin_unlock(session, &conn->api_lock); err: if (ncomp != NULL) { __wt_free(session, ncomp->name); __wt_free(session, ncomp); } API_END_NOTFOUND_MAP(session, ret); }
int __wt_stat_alloc_lsm_stats(WT_SESSION_IMPL *session, WT_LSM_STATS **statsp) { WT_LSM_STATS *stats; WT_RET(__wt_calloc_def(session, 1, &stats)); stats->bloom_cache_evict.desc = "Number of bloom pages evicted from cache"; stats->bloom_cache_read.desc = "Number of bloom pages read into cache"; stats->bloom_count.desc = "Number of bloom filters in the LSM tree"; stats->bloom_false_positives.desc = "Number of bloom filter false positives"; stats->bloom_hits.desc = "Number of bloom filter hits"; stats->bloom_misses.desc = "Number of bloom filter misses"; stats->bloom_space.desc = "Total space used by bloom filters"; stats->cache_evict.desc = "Number of pages evicted from cache"; stats->cache_evict_fail.desc = "Number of pages selected for eviction that could not be evicted"; stats->cache_read.desc = "Number of pages read into cache"; stats->cache_write.desc = "Number of pages written from cache"; stats->chunk_cache_evict.desc = "Number of pages evicted from LSM chunks"; stats->chunk_cache_read.desc = "Number of pages read into LSM chunks"; stats->chunk_count.desc = "Number of chunks in the LSM tree"; stats->generation_max.desc = "Highest merge generation in the LSM tree"; stats->search_miss_no_bloom.desc = "Number of queries that could have benefited from a bloom filter that did not exist"; *statsp = stats; return (0); }
/* * __wt_txn_init -- * Initialize a session's transaction data. */ int __wt_txn_init(WT_SESSION_IMPL *session) { WT_TXN *txn; txn = &session->txn; txn->id = WT_TXN_NONE; WT_RET(__wt_calloc_def(session, S2C(session)->session_size, &txn->snapshot)); #ifdef HAVE_DIAGNOSTIC if (S2C(session)->txn_global.states != NULL) { WT_TXN_STATE *txn_state; txn_state = WT_SESSION_TXN_STATE(session); WT_ASSERT(session, txn_state->snap_min == WT_TXN_NONE); } #endif /* * Take care to clean these out in case we are reusing the transaction * for eviction. */ txn->mod = NULL; txn->isolation = session->isolation; return (0); }
/* * __conn_add_data_source -- * WT_CONNECTION->add_data_source method. */ static int __conn_add_data_source(WT_CONNECTION *wt_conn, const char *prefix, WT_DATA_SOURCE *dsrc, const char *config) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; WT_NAMED_DATA_SOURCE *ndsrc; ndsrc = NULL; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, add_data_source, config, cfg); WT_UNUSED(cfg); WT_ERR(__wt_calloc_def(session, 1, &ndsrc)); WT_ERR(__wt_strdup(session, prefix, &ndsrc->prefix)); ndsrc->dsrc = dsrc; /* Link onto the environment's list of data sources. */ __wt_spin_lock(session, &conn->api_lock); TAILQ_INSERT_TAIL(&conn->dsrcqh, ndsrc, q); __wt_spin_unlock(session, &conn->api_lock); if (0) { err: if (ndsrc != NULL) __wt_free(session, ndsrc->prefix); __wt_free(session, ndsrc); } API_END_NOTFOUND_MAP(session, ret); }
/* * __wt_txn_global_init -- * Initialize the global transaction state. */ int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONNECTION_IMPL *conn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *s; u_int i; WT_UNUSED(cfg); conn = S2C(session); txn_global = &conn->txn_global; txn_global->current = txn_global->last_running = txn_global->oldest_id = WT_TXN_FIRST; WT_RET(__wt_rwlock_alloc(session, &txn_global->nsnap_rwlock, "named snapshot lock")); txn_global->nsnap_oldest_id = WT_TXN_NONE; TAILQ_INIT(&txn_global->nsnaph); WT_RET(__wt_calloc_def( session, conn->session_size, &txn_global->states)); for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++) s->id = s->snap_min = WT_TXN_NONE; return (0); }
/* * __conn_add_collator -- * WT_CONNECTION->add_collator method. */ static int __conn_add_collator(WT_CONNECTION *wt_conn, const char *name, WT_COLLATOR *collator, const char *config) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_NAMED_COLLATOR *ncoll; WT_SESSION_IMPL *session; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, add_collator, config, cfg); WT_UNUSED(cfg); WT_ERR(__wt_calloc_def(session, 1, &ncoll)); WT_ERR(__wt_strdup(session, name, &ncoll->name)); ncoll->collator = collator; __wt_spin_lock(session, &conn->api_lock); TAILQ_INSERT_TAIL(&conn->collqh, ncoll, q); __wt_spin_unlock(session, &conn->api_lock); ncoll = NULL; err: __wt_free(session, ncoll); API_END_NOTFOUND_MAP(session, ret); }
/* * __wt_lsm_tree_switch -- * Switch to a new in-memory tree. */ int __wt_lsm_tree_switch( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_RET; WT_LSM_CHUNK *chunk; uint32_t new_id; new_id = WT_ATOMIC_ADD(lsm_tree->last, 1); WT_VERBOSE_RET(session, lsm, "Tree switch to: %d", new_id); if ((lsm_tree->nchunks + 1) * sizeof(*lsm_tree->chunk) > lsm_tree->chunk_alloc) WT_ERR(__wt_realloc(session, &lsm_tree->chunk_alloc, WT_MAX(10 * sizeof(*lsm_tree->chunk), 2 * lsm_tree->chunk_alloc), &lsm_tree->chunk)); WT_ERR(__wt_calloc_def(session, 1, &chunk)); chunk->id = new_id; lsm_tree->chunk[lsm_tree->nchunks++] = chunk; WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); ++lsm_tree->dsk_gen; WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); err: /* TODO: mark lsm_tree bad on error(?) */ return (ret); }
/*创建一个connection evict cache*/ int __wt_cache_create(WT_SESSION_IMPL* session, const char* cfg[]) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); WT_RET(__wt_calloc_one(session, &conn->cache)); cache = conn->cache; /*对cache进行配置*/ WT_RET(__wt_cache_config(session, 0, cfg)); if (cache->eviction_target >= cache->eviction_trigger) WT_ERR_MSG(session, EINVAL, "eviction target must be lower than the eviction trigger"); /*创建evict cond信号量*/ WT_ERR(__wt_cond_alloc(session, "cache eviction server", 0, &cache->evict_cond)); WT_ERR(__wt_cond_alloc(session, "eviction waiters", 0, &cache->evict_waiter_cond)); WT_ERR(__wt_spin_init(session, &cache->evict_lock, "cache eviction")); WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk")); /* Allocate the LRU eviction queue. */ cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR; WT_ERR(__wt_calloc_def(session, cache->evict_slots, &cache->evict)); /*初始化cache stat统计模块*/ __wt_cache_stats_update(session); return 0; err: WT_RET(__wt_cache_destroy(session)); return ret; }
/* * __wt_dlopen -- * Open a dynamic library. */ int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) { WT_DECL_RET; WT_DLH *dlh; WT_RET(__wt_calloc_def(session, 1, &dlh)); WT_ERR(__wt_strdup(session, path, &dlh->name)); /* NULL means load from the current binary */ if (path == NULL) { ret = GetModuleHandleExA(0, NULL, &dlh->handle); if (ret == FALSE) WT_ERR_MSG(session, __wt_errno(), "GetModuleHandleEx(%s): %s", path, 0); } else { // TODO: load dll here DebugBreak(); } /* Windows returns 0 on failure, WT expects 0 on success */ ret = !ret; *dlhp = dlh; if (0) { err: __wt_free(session, dlh->name); __wt_free(session, dlh); } return (ret); }
/* * __wt_lsm_tree_switch -- * Switch to a new in-memory tree. */ int __wt_lsm_tree_switch( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_RET; WT_LSM_CHUNK *chunk, **cp; uint32_t in_memory, new_id; new_id = WT_ATOMIC_ADD(lsm_tree->last, 1); if ((lsm_tree->nchunks + 1) * sizeof(*lsm_tree->chunk) > lsm_tree->chunk_alloc) WT_ERR(__wt_realloc(session, &lsm_tree->chunk_alloc, WT_MAX(10 * sizeof(*lsm_tree->chunk), 2 * lsm_tree->chunk_alloc), &lsm_tree->chunk)); /* * In the steady state, we expect that the checkpoint worker thread * will keep up with inserts. If not, we throttle the insert rate to * avoid filling the cache with in-memory chunks. Threads sleep every * 100 operations, so take that into account in the calculation. */ for (in_memory = 1, cp = lsm_tree->chunk + lsm_tree->nchunks - 1; in_memory < lsm_tree->nchunks && !F_ISSET(*cp, WT_LSM_CHUNK_ONDISK); ++in_memory, --cp) ; if (!F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) || in_memory <= 2) lsm_tree->throttle_sleep = 0; else if (in_memory == lsm_tree->nchunks || F_ISSET(*cp, WT_LSM_CHUNK_STABLE)) { /* * No checkpoint has completed this run. Keep slowing down * inserts until one does. */ lsm_tree->throttle_sleep = WT_MAX(20, 2 * lsm_tree->throttle_sleep); } else { chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]; lsm_tree->throttle_sleep = (long)((in_memory - 2) * WT_TIMEDIFF(chunk->create_ts, (*cp)->create_ts) / (20 * in_memory * chunk->count)); } WT_VERBOSE_ERR(session, lsm, "Tree switch to: %d, throttle %d", new_id, (int)lsm_tree->throttle_sleep); WT_ERR(__wt_calloc_def(session, 1, &chunk)); chunk->id = new_id; lsm_tree->chunk[lsm_tree->nchunks++] = chunk; WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); ++lsm_tree->dsk_gen; F_CLR(lsm_tree, WT_LSM_TREE_NEED_SWITCH); WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); err: /* TODO: mark lsm_tree bad on error(?) */ return (ret); }
/* * __wt_async_op_init -- * Initialize all the op handles. */ int __wt_async_op_init(WT_SESSION_IMPL *session) { WT_ASYNC *async; WT_ASYNC_OP_IMPL *op; WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint32_t i; conn = S2C(session); async = conn->async; /* * Initialize the flush op structure. */ WT_RET(__async_op_init(conn, &async->flush_op, OPS_INVALID_INDEX)); /* * Allocate and initialize the work queue. This is sized so that * the ring buffer is known to be big enough such that the head * can never overlap the tail. Include extra for the flush op. */ async->async_qsize = conn->async_size + 2; WT_RET(__wt_calloc_def( session, async->async_qsize, &async->async_queue)); /* * Allocate and initialize all the user ops. */ WT_ERR(__wt_calloc_def(session, conn->async_size, &async->async_ops)); for (i = 0; i < conn->async_size; i++) { op = &async->async_ops[i]; WT_ERR(__async_op_init(conn, op, i)); } return (0); err: if (async->async_ops != NULL) { __wt_free(session, async->async_ops); async->async_ops = NULL; } if (async->async_queue != NULL) { __wt_free(session, async->async_queue); async->async_queue = NULL; } return (ret); }
/* * __wt_lsm_tree_truncate -- * Truncate an LSM tree. */ int __wt_lsm_tree_truncate( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) { WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_TREE *lsm_tree; int locked; WT_UNUSED(cfg); chunk = NULL; locked = 0; /* Get the LSM tree. */ WT_RET(__wt_lsm_tree_get(session, name, 1, &lsm_tree)); /* Shut down the LSM worker. */ WT_RET(__lsm_tree_close(session, lsm_tree)); /* Prevent any new opens. */ WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 1)); locked = 1; /* Create the new chunk. */ WT_ERR(__wt_calloc_def(session, 1, &chunk)); chunk->id = WT_ATOMIC_ADD(lsm_tree->last, 1); WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); /* Mark all chunks old. */ WT_ERR(__wt_lsm_merge_update_tree( session, lsm_tree, 0, lsm_tree->nchunks, chunk)); WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); WT_ERR(__lsm_tree_start_worker(session, lsm_tree)); locked = 0; WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); __wt_lsm_tree_release(session, lsm_tree); err: if (locked) WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); if (ret != 0) { if (chunk != NULL) { (void)__wt_schema_drop(session, chunk->uri, NULL); __wt_free(session, chunk); } /* * Discard the LSM tree structure on error. This will force the * LSM tree to be re-opened the next time it is accessed and * the last good version of the metadata will be used, resulting * in a valid (not truncated) tree. */ WT_TRET(__lsm_tree_discard(session, lsm_tree)); } return (ret); }
/* * __wt_cache_create -- * Create the underlying cache. */ int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); WT_ASSERT(session, conn->cache == NULL); WT_RET(__wt_calloc_one(session, &conn->cache)); cache = conn->cache; /* Use a common routine for run-time configuration options. */ WT_RET(__wt_cache_config(session, false, cfg)); /* * The lowest possible page read-generation has a special meaning, it * marks a page for forcible eviction; don't let it happen by accident. */ cache->read_gen = WT_READGEN_START_VALUE; /* * The target size must be lower than the trigger size or we will never * get any work done. */ if (cache->eviction_target >= cache->eviction_trigger) WT_ERR_MSG(session, EINVAL, "eviction target must be lower than the eviction trigger"); WT_ERR(__wt_cond_auto_alloc(session, "cache eviction server", false, 10000, WT_MILLION, &cache->evict_cond)); WT_ERR(__wt_cond_alloc(session, "eviction waiters", false, &cache->evict_waiter_cond)); WT_ERR(__wt_spin_init(session, &cache->evict_lock, "cache eviction")); WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk")); /* Allocate the LRU eviction queue. */ cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR; WT_ERR(__wt_calloc_def(session, cache->evict_slots, &cache->evict_queue)); /* * We get/set some values in the cache statistics (rather than have * two copies), configure them. */ __wt_cache_stats_update(session); return (0); err: WT_RET(__wt_cache_destroy(session)); return (ret); }
/* * __wt_curbackup_open -- * WT_SESSION->open_cursor method for the backup cursor type. */ int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ __wt_cursor_notsup, /* get-value */ __wt_cursor_notsup, /* set-key */ __wt_cursor_notsup, /* set-value */ __wt_cursor_notsup, /* compare */ __curbackup_next, /* next */ __wt_cursor_notsup, /* prev */ __curbackup_reset, /* reset */ __wt_cursor_notsup, /* search */ __wt_cursor_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __curbackup_close); /* close */ WT_CURSOR *cursor; WT_CURSOR_BACKUP *cb; WT_DECL_RET; WT_STATIC_ASSERT(offsetof(WT_CURSOR_BACKUP, iface) == 0); cb = NULL; WT_RET(__wt_calloc_def(session, 1, &cb)); cursor = &cb->iface; *cursor = iface; cursor->session = &session->iface; session->bkp_cursor = cb; cursor->key_format = "S"; /* Return the file names as the key. */ cursor->value_format = ""; /* No value. */ /* * Start the backup and fill in the cursor's list. Acquire the schema * lock, we need a consistent view when creating a copy. */ WT_WITH_SCHEMA_LOCK(session, ret = __backup_start(session, cb, cfg)); WT_ERR(ret); /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { err: __wt_free(session, cb); } return (ret); }
/* * __lsm_tree_open -- * Open an LSM tree structure. */ static int __lsm_tree_open( WT_SESSION_IMPL *session, const char *uri, WT_LSM_TREE **treep) { WT_DECL_RET; WT_LSM_TREE *lsm_tree; WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)); /* Make sure no one beat us to it. */ TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) if (strcmp(uri, lsm_tree->name) == 0) { *treep = lsm_tree; return (0); } /* Try to open the tree. */ WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); WT_ERR(__wt_rwlock_alloc(session, "lsm tree", &lsm_tree->rwlock)); WT_ERR(__wt_cond_alloc(session, "lsm ckpt", 0, &lsm_tree->work_cond)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); WT_ERR(__wt_lsm_meta_read(session, lsm_tree)); /* * Sanity check the configuration. Do it now since this is the first * time we have the LSM tree configuration. */ WT_ERR(__lsm_tree_open_check(session, lsm_tree)); if (lsm_tree->nchunks == 0) { F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH); WT_ERR(__wt_lsm_tree_switch(session, lsm_tree)); } /* Set the generation number so cursors are opened on first usage. */ lsm_tree->dsk_gen = 1; /* Now the tree is setup, make it visible to others. */ lsm_tree->refcnt = 1; TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q); F_SET(lsm_tree, WT_LSM_TREE_OPEN); WT_ERR(__lsm_tree_start_worker(session, lsm_tree)); *treep = lsm_tree; if (0) { err: WT_TRET(__lsm_tree_discard(session, lsm_tree)); } return (ret); }
/* * __wt_lsm_tree_truncate -- * Truncate an LSM tree. */ int __wt_lsm_tree_truncate( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) { WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_TREE *lsm_tree; int locked; WT_UNUSED(cfg); locked = 0; /* Get the LSM tree. */ WT_RET(__wt_lsm_tree_get(session, name, 1, &lsm_tree)); /* Shut down the LSM worker. */ WT_RET(__lsm_tree_close(session, lsm_tree)); /* Prevent any new opens. */ WT_RET(__wt_try_writelock(session, lsm_tree->rwlock)); locked = 1; /* Create the new chunk. */ WT_ERR(__wt_calloc_def(session, 1, &chunk)); chunk->id = WT_ATOMIC_ADD(lsm_tree->last, 1); WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); /* Mark all chunks old. */ WT_ERR(__wt_lsm_merge_update_tree( session, lsm_tree, 0, lsm_tree->nchunks, chunk)); WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); WT_ERR(__lsm_tree_start_worker(session, lsm_tree)); ret = __wt_rwunlock(session, lsm_tree->rwlock); locked = 0; if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); err: if (locked) WT_TRET(__wt_rwunlock(session, lsm_tree->rwlock)); /* * Don't discard the LSM tree structure unless there has been an * error. The handle remains valid for future operations. */ if (ret != 0) WT_TRET(__lsm_tree_discard(session, lsm_tree)); return (ret); }
/* * __wt_cache_create -- * Create the underlying cache. */ int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); WT_ASSERT(session, conn->cache == NULL); WT_RET(__wt_calloc_one(session, &conn->cache)); cache = conn->cache; /* Use a common routine for run-time configuration options. */ WT_RET(__wt_cache_config(session, 0, cfg)); /* * The target size must be lower than the trigger size or we will never * get any work done. */ if (cache->eviction_target >= cache->eviction_trigger) WT_ERR_MSG(session, EINVAL, "eviction target must be lower than the eviction trigger"); WT_ERR(__wt_cond_alloc(session, "cache eviction server", 0, &cache->evict_cond)); WT_ERR(__wt_cond_alloc(session, "eviction waiters", 0, &cache->evict_waiter_cond)); WT_ERR(__wt_spin_init(session, &cache->evict_lock, "cache eviction")); WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk")); /* Allocate the LRU eviction queue. */ cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR; WT_ERR(__wt_calloc_def(session, cache->evict_slots, &cache->evict)); /* * We get/set some values in the cache statistics (rather than have * two copies), configure them. */ __wt_cache_stats_update(session); return (0); err: WT_RET(__wt_cache_destroy(session)); return (ret); }
/* * __wt_lsm_tree_switch -- * Switch to a new in-memory tree. */ int __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_RET; WT_LSM_CHUNK *chunk; uint32_t nchunks, new_id; WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 1)); /* * Check if a switch is still needed: we may have raced while waiting * for a lock. */ if ((nchunks = lsm_tree->nchunks) != 0 && (chunk = lsm_tree->chunk[nchunks - 1]) != NULL && !F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK) && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) goto err; /* Update the throttle time. */ __wt_lsm_tree_throttle(session, lsm_tree); new_id = WT_ATOMIC_ADD(lsm_tree->last, 1); WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk)); WT_VERBOSE_ERR(session, lsm, "Tree switch to: %" PRIu32 ", throttle %ld", new_id, lsm_tree->throttle_sleep); WT_ERR(__wt_calloc_def(session, 1, &chunk)); chunk->id = new_id; chunk->txnid_max = WT_TXN_NONE; lsm_tree->chunk[lsm_tree->nchunks++] = chunk; WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); F_CLR(lsm_tree, WT_LSM_TREE_NEED_SWITCH); ++lsm_tree->dsk_gen; lsm_tree->modified = 1; err: /* TODO: mark lsm_tree bad on error(?) */ WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); return (ret); }
/* * __wt_lsm_start_worker -- * Start the worker thread for an LSM tree. */ static int __lsm_tree_start_worker(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_CONNECTION *wt_conn; WT_LSM_WORKER_ARGS *wargs; WT_SESSION *wt_session; WT_SESSION_IMPL *s; uint32_t i; wt_conn = &S2C(session)->iface; WT_RET(wt_conn->open_session(wt_conn, NULL, NULL, &wt_session)); lsm_tree->ckpt_session = (WT_SESSION_IMPL *)wt_session; F_SET(lsm_tree->ckpt_session, WT_SESSION_INTERNAL); F_SET(lsm_tree, WT_LSM_TREE_WORKING); /* The new thread will rely on the WORKING value being visible. */ WT_FULL_BARRIER(); if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) for (i = 0; i < lsm_tree->merge_threads; i++) { WT_RET(wt_conn->open_session( wt_conn, NULL, NULL, &wt_session)); s = (WT_SESSION_IMPL *)wt_session; F_SET(s, WT_SESSION_INTERNAL); lsm_tree->worker_sessions[i] = s; WT_RET(__wt_calloc_def(session, 1, &wargs)); wargs->lsm_tree = lsm_tree; wargs->id = i; WT_RET(__wt_thread_create(session, &lsm_tree->worker_tids[i], __wt_lsm_merge_worker, wargs)); } if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST)) { WT_RET(wt_conn->open_session(wt_conn, NULL, NULL, &wt_session)); lsm_tree->bloom_session = (WT_SESSION_IMPL *)wt_session; F_SET(lsm_tree->bloom_session, WT_SESSION_INTERNAL); WT_RET(__wt_thread_create(session, &lsm_tree->bloom_tid, __wt_lsm_bloom_worker, lsm_tree)); } WT_RET(__wt_thread_create(session, &lsm_tree->ckpt_tid, __wt_lsm_checkpoint_worker, lsm_tree)); return (0); }
/* * __lsm_tree_start_worker -- * Start the worker thread for an LSM tree. */ static int __lsm_tree_start_worker(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_CONNECTION *wt_conn; WT_LSM_WORKER_ARGS *wargs; WT_SESSION *wt_session; WT_SESSION_IMPL *s; uint32_t i; wt_conn = &S2C(session)->iface; /* * All the LSM worker threads do their operations on read-only files. * Use read-uncommitted isolation to avoid keeping updates in cache * unnecessarily. */ WT_RET(wt_conn->open_session( wt_conn, NULL, "isolation=read-uncommitted", &wt_session)); lsm_tree->ckpt_session = (WT_SESSION_IMPL *)wt_session; F_SET(lsm_tree->ckpt_session, WT_SESSION_INTERNAL); F_SET(lsm_tree, WT_LSM_TREE_WORKING); /* The new thread will rely on the WORKING value being visible. */ WT_FULL_BARRIER(); if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) for (i = 0; i < lsm_tree->merge_threads; i++) { WT_RET(wt_conn->open_session( wt_conn, NULL, "isolation=read-uncommitted", &wt_session)); s = (WT_SESSION_IMPL *)wt_session; F_SET(s, WT_SESSION_INTERNAL); lsm_tree->worker_sessions[i] = s; WT_RET(__wt_calloc_def(session, 1, &wargs)); wargs->lsm_tree = lsm_tree; wargs->id = i; WT_RET(__wt_thread_create(session, &lsm_tree->worker_tids[i], __wt_lsm_merge_worker, wargs)); } WT_RET(__wt_thread_create(session, &lsm_tree->ckpt_tid, __wt_lsm_checkpoint_worker, lsm_tree)); return (0); }
/* * __wt_salvage -- * Salvage a single file. */ int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CKPT *ckptbase; WT_DATA_HANDLE *dhandle; WT_DECL_RET; dhandle = session->dhandle; /* * XXX * The salvage process reads and discards previous checkpoints, so the * underlying block manager has to ignore any previous checkpoint * entries when creating a new checkpoint, in other words, we can't use * the metadata checkpoint list, it has all of those checkpoint listed * and we don't care about them. Build a clean checkpoint list and use * it instead. * * Don't first clear the metadata checkpoint list and call the function * to get a list of checkpoints: a crash between clearing the metadata * checkpoint list and creating a new checkpoint list would look like a * create or open of a file without a checkpoint to roll-forward from, * and the contents of the file would be discarded. */ WT_RET(__wt_calloc_def(session, 2, &ckptbase)); WT_ERR(__wt_strdup(session, WT_CHECKPOINT, &ckptbase[0].name)); F_SET(&ckptbase[0], WT_CKPT_ADD); WT_ERR(__wt_bt_salvage(session, ckptbase, cfg)); /* * If no checkpoint was created, well, it's probably bad news, but there * is nothing to do but clear any recorded checkpoints for the file. If * a checkpoint was created, life is good, replace any existing list of * checkpoints with the single new one. */ if (ckptbase[0].raw.data == NULL) WT_ERR(__wt_meta_checkpoint_clear(session, dhandle->name)); else WT_ERR(__wt_meta_ckptlist_set( session, dhandle->name, ckptbase, NULL)); err: __wt_meta_ckptlist_free(session, &ckptbase); return (ret); }
/* * __wt_curconfig_open -- * WT_SESSION->open_cursor method for config cursors. */ int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, NULL, /* get-key */ NULL, /* get-value */ NULL, /* set-key */ NULL, /* set-value */ NULL, /* compare */ __wt_cursor_notsup, /* next */ __wt_cursor_notsup, /* prev */ __wt_cursor_notsup, /* reset */ __wt_cursor_notsup, /* search */ __wt_cursor_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __curconfig_close); WT_CURSOR_CONFIG *cconfig; WT_CURSOR *cursor; WT_DECL_RET; STATIC_ASSERT(offsetof(WT_CURSOR_CONFIG, iface) == 0); WT_UNUSED(uri); WT_RET(__wt_calloc_def(session, 1, &cconfig)); cursor = &cconfig->iface; *cursor = iface; cursor->session = &session->iface; cursor->key_format = cursor->value_format = "S"; /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { err: __wt_free(session, cconfig); } return (ret); }
/* * __conn_btree_config_set -- * Set up a btree handle's configuration information. */ static int __conn_btree_config_set(WT_SESSION_IMPL *session) { WT_DATA_HANDLE *dhandle; WT_DECL_RET; char *metaconf; dhandle = session->dhandle; /* * Read the object's entry from the metadata file, we're done if we * don't find one. */ if ((ret = __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) { if (ret == WT_NOTFOUND) ret = ENOENT; WT_RET(ret); } /* * The defaults are included because underlying objects have persistent * configuration information stored in the metadata file. If defaults * are included in the configuration, we can add new configuration * strings without upgrading the metadata file or writing special code * in case a configuration string isn't initialized, as long as the new * configuration string has an appropriate default value. * * The error handling is a little odd, but be careful: we're holding a * chunk of allocated memory in metaconf. If we fail before we copy a * reference to it into the object's configuration array, we must free * it, after the copy, we don't want to free it. */ WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg)); WT_ERR(__wt_strdup( session, WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0])); dhandle->cfg[1] = metaconf; return (0); err: __wt_free(session, metaconf); return (ret); }
int __wt_conn_config_init(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; const WT_CONFIG_ENTRY *ep, **epp; conn = S2C(session); /* Build a list of pointers to the configuration information. */ WT_RET(__wt_calloc_def(session, WT_ELEMENTS(config_entries), &epp)); conn->config_entries = epp; /* Fill in the list to reference the default information. */ for (ep = config_entries;;) { *epp++ = ep++; if (ep->method == NULL) break; } return (0); }
/* * __wt_txn_global_init -- * Initialize the global transaction state. */ int __wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]) { WT_SESSION_IMPL *session; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *s; u_int i; WT_UNUSED(cfg); session = conn->default_session; txn_global = &conn->txn_global; txn_global->current = 1; WT_RET(__wt_calloc_def( session, conn->session_size, &txn_global->states)); for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++) s->id = s->snap_min = WT_TXN_NONE; return (0); }
/* * __wt_dlopen -- * Open a dynamic library. */ int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) { WT_DLH *dlh; int ret; WT_RET(__wt_calloc_def(session, 1, &dlh)); WT_ERR(__wt_strdup(session, path, &dlh->name)); if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL) WT_ERR_MSG( session, __wt_errno(), "dlopen(%s): %s", path, dlerror()); *dlhp = dlh; if (0) { err: __wt_free(session, dlh->name); __wt_free(session, dlh); } return (ret); }
/* * __wt_txn_init -- * Initialize a session's transaction data. */ int __wt_txn_init(WT_SESSION_IMPL *session) { WT_TXN *txn; txn = &session->txn; txn->id = WT_TXN_NONE; WT_RET(__wt_calloc_def(session, S2C(session)->session_size, &txn->snapshot)); /* * Take care to clean these out in case we are reusing the transaction * for eviction. */ txn->mod = NULL; txn->isolation = session->isolation; return (0); }