/* * __rename_file -- * WT_SESSION::rename for a file. */ static int __rename_file( WT_SESSION_IMPL *session, const char *uri, const char *newuri) { WT_DECL_RET; bool exist; const char *filename, *newfile; char *newvalue, *oldvalue; newvalue = oldvalue = NULL; filename = uri; newfile = newuri; if (!WT_PREFIX_SKIP(filename, "file:") || !WT_PREFIX_SKIP(newfile, "file:")) return (EINVAL); /* Close any btree handles in the file. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, false)); WT_ERR(ret); /* * First, check if the file being renamed exists in the system. Doing * this check first matches the table rename behavior because we return * WT_NOTFOUND when the renamed file doesn't exist (subsequently mapped * to ENOENT by the session layer). */ WT_ERR(__wt_metadata_search(session, uri, &oldvalue)); /* * Check to see if the proposed name is already in use, in either the * metadata or the filesystem. */ switch (ret = __wt_metadata_search(session, newuri, &newvalue)) { case 0: WT_ERR_MSG(session, EEXIST, "%s", newuri); /* NOTREACHED */ case WT_NOTFOUND: break; default: WT_ERR(ret); } WT_ERR(__wt_fs_exist(session, newfile, &exist)); if (exist) WT_ERR_MSG(session, EEXIST, "%s", newfile); /* Replace the old file entries with new file entries. */ WT_ERR(__wt_metadata_remove(session, uri)); WT_ERR(__wt_metadata_insert(session, newuri, oldvalue)); /* Rename the underlying file. */ WT_ERR(__wt_fs_rename(session, filename, newfile, false)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, uri, newuri)); err: __wt_free(session, newvalue); __wt_free(session, oldvalue); return (ret); }
/* * __drop_file -- * Drop a file. */ static int __drop_file( WT_SESSION_IMPL *session, const char *uri, bool force, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_DECL_RET; bool remove_files; const char *filename; WT_RET(__wt_config_gets(session, cfg, "remove_files", &cval)); remove_files = cval.val != 0; filename = uri; if (!WT_PREFIX_SKIP(filename, "file:")) return (EINVAL); /* Close all btree handles associated with this file. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, force)); WT_RET(ret); /* Remove the metadata entry (ignore missing items). */ WT_TRET(__wt_metadata_remove(session, uri)); if (!remove_files) return (ret); /* * Schedule the remove of the underlying physical file when the drop * completes. */ WT_TRET(__wt_meta_track_drop(session, filename)); return (ret); }
/* * __truncate_file -- * WT_SESSION::truncate for a file. */ static int __truncate_file(WT_SESSION_IMPL *session, const char *name) { WT_DECL_RET; const char *filename; uint32_t allocsize; filename = name; if (!WT_PREFIX_SKIP(filename, "file:")) return (EINVAL); /* Open and lock the file. */ WT_RET(__wt_session_get_btree( session, name, NULL, NULL, WT_DHANDLE_EXCLUSIVE)); /* Get the allocation size. */ allocsize = S2BT(session)->allocsize; WT_RET(__wt_session_release_btree(session)); /* Close any btree handles in the file. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_conn_dhandle_close_all(session, name, false)); WT_RET(ret); /* Delete the root address and truncate the file. */ WT_RET(__wt_meta_checkpoint_clear(session, name)); WT_RET(__wt_block_manager_truncate(session, filename, allocsize)); return (0); }
/* * __backup_all -- * Backup all objects in the database. */ static int __backup_all(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) { WT_CONFIG_ITEM cval; WT_CURSOR *cursor; WT_DECL_RET; const char *key, *value; cursor = NULL; /* * Open a cursor on the metadata file and copy all of the entries to * the hot backup file. */ WT_ERR(__wt_metadata_cursor(session, NULL, &cursor)); while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_key(cursor, &key)); WT_ERR(cursor->get_value(cursor, &value)); WT_ERR(__wt_fprintf(cb->bfp, "%s\n%s\n", key, value)); /* * While reading the metadata file, check there are no "sources" * or "types" which can't support hot backup. This checks for * a data source that's non-standard, which can't be backed up, * but is also sanity checking: if there's an entry backed by * anything other than a file or lsm entry, we're confused. */ if ((ret = __wt_config_getones( session, value, "type", &cval)) == 0 && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "file") && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "lsm")) WT_ERR_MSG(session, ENOTSUP, "hot backup is not supported for objects of " "type %.*s", (int)cval.len, cval.str); WT_ERR_NOTFOUND_OK(ret); if ((ret =__wt_config_getones( session, value, "source", &cval)) == 0 && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "file:") && !WT_PREFIX_MATCH_LEN(cval.str, cval.len, "lsm:")) WT_ERR_MSG(session, ENOTSUP, "hot backup is not supported for objects of " "source %.*s", (int)cval.len, cval.str); WT_ERR_NOTFOUND_OK(ret); } WT_ERR_NOTFOUND_OK(ret); /* Build a list of the file objects that need to be copied. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_meta_btree_apply( session, __backup_list_all_append, NULL)); err: if (cursor != NULL) WT_TRET(cursor->close(cursor)); return (ret); }
/* * __backup_all -- * Backup all objects in the database. */ static int __backup_all(WT_SESSION_IMPL *session) { WT_DECL_RET; /* Build a list of the file objects that need to be copied. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_meta_apply_all(session, NULL, __backup_list_uri_append, NULL)); return (ret); }
/* * __wt_curstat_cache_walk -- * Initialize the statistics for a cache cache_walk pass. */ void __wt_curstat_cache_walk(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CONNECTION_IMPL *conn; WT_PAGE_INDEX *root_idx; btree = S2BT(session); conn = S2C(session); /* Set statistics that don't require walking the cache. */ WT_STAT_DATA_SET(session, cache_state_gen_current, conn->cache->evict_pass_gen); /* Root page statistics */ root_idx = WT_INTL_INDEX_GET_SAFE(btree->root.page); WT_STAT_DATA_SET(session, cache_state_root_entries, root_idx->entries); WT_STAT_DATA_SET(session, cache_state_root_size, btree->root.page->memory_footprint); WT_WITH_HANDLE_LIST_LOCK(session, __evict_stat_walk(session)); }
/* * __lsm_drop_file -- * Helper function to drop part of an LSM tree. */ static int __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) { WT_DECL_RET; const char *drop_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_drop), "remove_files=false", NULL }; /* * We need to grab the schema lock to drop the file, so first try to * make sure there is minimal work to freeing space in the cache. Only * bother trying to discard the checkpoint handle: the in-memory handle * should have been closed already. * * This will fail with EBUSY if the file is still in use. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); WT_RET(ret); /* * Take the schema lock for the drop operation. Since __wt_schema_drop * results in the hot backup lock being taken when it updates the * metadata (which would be too late to prevent our drop). */ WT_WITH_SCHEMA_LOCK(session, ret, ret = __wt_schema_drop(session, uri, drop_cfg)); if (ret == 0) ret = __wt_remove(session, uri + strlen("file:")); WT_RET(__wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri)); if (ret == EBUSY || ret == ENOENT) WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker drop of %s failed with %d", uri, ret)); return (ret); }
/* * __wt_lsm_checkpoint_chunk -- * Flush a single LSM chunk to disk. */ int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) { WT_DECL_RET; WT_TXN_ISOLATION saved_isolation; bool flush_set; flush_set = false; /* * If the chunk is already checkpointed, make sure it is also evicted. * Either way, there is no point trying to checkpoint it again. */ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) && !chunk->evicted) { WT_WITH_HANDLE_LIST_LOCK(session, ret = __lsm_discard_handle(session, chunk->uri, NULL)); if (ret == 0) chunk->evicted = 1; else if (ret == EBUSY) ret = 0; else WT_RET_MSG(session, ret, "discard handle"); } if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker %s already on disk", chunk->uri)); return (0); } /* Stop if a running transaction needs the chunk. */ __wt_txn_update_oldest(session, true); if (chunk->switch_txn == WT_TXN_NONE || !__wt_txn_visible_all(session, chunk->switch_txn)) { WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker %s: running transaction, return", chunk->uri)); return (0); } if (!__wt_atomic_cas8(&chunk->flushing, 0, 1)) return (0); flush_set = true; WT_ERR(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s", chunk->uri)); /* * Flush the file before checkpointing: this is the expensive part in * terms of I/O. * * !!! * We can wait here for checkpoints and fsyncs to complete, which can * take a long time. */ if ((ret = __wt_session_get_btree( session, chunk->uri, NULL, NULL, 0)) == 0) { /* * Set read-uncommitted: we have already checked that all of the * updates in this chunk are globally visible, use the cheapest * possible check in reconciliation. */ saved_isolation = session->txn.isolation; session->txn.isolation = WT_ISO_READ_UNCOMMITTED; ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES); session->txn.isolation = saved_isolation; WT_TRET(__wt_session_release_btree(session)); } WT_ERR(ret); WT_ERR(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s", chunk->uri)); /* * Turn on metadata tracking to ensure the checkpoint gets the * necessary handle locks. * * Ensure that we don't race with a running checkpoint: the checkpoint * lock protects against us racing with an application checkpoint in * this chunk. Don't wait for it, though: checkpoints can take a long * time, and our checkpoint operation should be very quick. */ WT_ERR(__wt_meta_track_on(session)); WT_WITH_CHECKPOINT_LOCK(session, ret, WT_WITH_SCHEMA_LOCK(session, ret, ret = __wt_schema_worker( session, chunk->uri, __wt_checkpoint, NULL, NULL, 0))); WT_TRET(__wt_meta_track_off(session, false, ret != 0)); if (ret != 0) WT_ERR_MSG(session, ret, "LSM checkpoint"); /* Now the file is written, get the chunk size. */ WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk)); /* Update the flush timestamp to help track ongoing progress. */ WT_ERR(__wt_epoch(session, &lsm_tree->last_flush_ts)); ++lsm_tree->chunks_flushed; /* Lock the tree, mark the chunk as on disk and update the metadata. */ WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); F_SET(chunk, WT_LSM_CHUNK_ONDISK); ret = __wt_lsm_meta_write(session, lsm_tree); ++lsm_tree->dsk_gen; /* Update the throttle time. */ __wt_lsm_tree_throttle(session, lsm_tree, true); WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); if (ret != 0) WT_ERR_MSG(session, ret, "LSM metadata write"); WT_PUBLISH(chunk->flushing, 0); flush_set = false; /* * Clear the no-eviction flag so the primary can be evicted and * eventually closed. Only do this once the checkpoint has succeeded: * otherwise, accessing the leaf page during the checkpoint can trigger * forced eviction. */ WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); __wt_btree_evictable(session, true); WT_ERR(__wt_session_release_btree(session)); /* Make sure we aren't pinning a transaction ID. */ __wt_txn_release_snapshot(session); WT_ERR(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed %s", chunk->uri)); /* Schedule a bloom filter create for our newly flushed chunk. */ if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF)) WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); else WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_MERGE, 0, lsm_tree)); err: if (flush_set) WT_PUBLISH(chunk->flushing, 0); return (ret); }