/* * __wt_metadata_update -- * Update a row in the metadata. */ int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) { WT_CURSOR *cursor; WT_DECL_RET; WT_RET(__wt_verbose(session, WT_VERB_METADATA, "Update: key: %s, value: %s, tracking: %s, %s" "turtle", key, value, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ")); if (__metadata_turtle(key)) return (__wt_turtle_update(session, key, value)); if (WT_META_TRACKING(session)) WT_RET(__wt_meta_track_update(session, key)); WT_RET(__wt_metadata_cursor(session, "overwrite", &cursor)); cursor->set_key(cursor, key); cursor->set_value(cursor, value); WT_ERR(cursor->insert(cursor)); err: WT_TRET(cursor->close(cursor)); return (ret); }
/* * __wt_metadata_insert -- * Insert a row into the metadata. */ int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value) { WT_CURSOR *cursor; WT_DECL_RET; WT_RET(__wt_verbose(session, WT_VERB_METADATA, "Insert: key: %s, value: %s, tracking: %s, %s" "turtle", key, value, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ")); if (__metadata_turtle(key)) WT_RET_MSG(session, EINVAL, "%s: insert not supported on the turtle file", key); WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); cursor->set_value(cursor, value); WT_ERR(cursor->insert(cursor)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_insert(session, key)); err: WT_TRET(cursor->close(cursor)); return (ret); }
/* * __wt_metadata_update -- * Update a row in the metadata. */ int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) { WT_CURSOR *cursor; WT_DECL_RET; WT_RET(__wt_verbose(session, WT_VERB_METADATA, "Update: key: %s, value: %s, tracking: %s, %s" "turtle", key, value, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ")); if (__metadata_turtle(key)) { WT_WITH_TURTLE_LOCK(session, ret, ret = __wt_turtle_update(session, key, value)); return (ret); } if (WT_META_TRACKING(session)) WT_RET(__wt_meta_track_update(session, key)); WT_RET(__wt_metadata_cursor(session, &cursor)); /* This cursor needs to have overwrite semantics. */ WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_OVERWRITE)); cursor->set_key(cursor, key); cursor->set_value(cursor, value); WT_ERR(cursor->insert(cursor)); err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); return (ret); }
/* * __wt_metadata_remove -- * Remove a row from the metadata. */ int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) { WT_CURSOR *cursor; WT_DECL_RET; WT_RET(__wt_verbose(session, WT_VERB_METADATA, "Remove: key: %s, tracking: %s, %s" "turtle", key, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ")); if (__metadata_turtle(key)) WT_RET_MSG(session, EINVAL, "%s: remove not supported on the turtle file", key); WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); WT_ERR(cursor->search(cursor)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_update(session, key)); WT_ERR(cursor->remove(cursor)); err: WT_TRET(cursor->close(cursor)); return (ret); }
/* * __rename_file -- * WT_SESSION::rename for a file. */ static int __rename_file( WT_SESSION_IMPL *session, const char *uri, const char *newuri) { WT_DECL_RET; bool exist; const char *filename, *newfile; char *newvalue, *oldvalue; newvalue = oldvalue = NULL; filename = uri; newfile = newuri; if (!WT_PREFIX_SKIP(filename, "file:") || !WT_PREFIX_SKIP(newfile, "file:")) return (EINVAL); /* Close any btree handles in the file. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, false)); WT_ERR(ret); /* * First, check if the file being renamed exists in the system. Doing * this check first matches the table rename behavior because we return * WT_NOTFOUND when the renamed file doesn't exist (subsequently mapped * to ENOENT by the session layer). */ WT_ERR(__wt_metadata_search(session, uri, &oldvalue)); /* * Check to see if the proposed name is already in use, in either the * metadata or the filesystem. */ switch (ret = __wt_metadata_search(session, newuri, &newvalue)) { case 0: WT_ERR_MSG(session, EEXIST, "%s", newuri); /* NOTREACHED */ case WT_NOTFOUND: break; default: WT_ERR(ret); } WT_ERR(__wt_fs_exist(session, newfile, &exist)); if (exist) WT_ERR_MSG(session, EEXIST, "%s", newfile); /* Replace the old file entries with new file entries. */ WT_ERR(__wt_metadata_remove(session, uri)); WT_ERR(__wt_metadata_insert(session, newuri, oldvalue)); /* Rename the underlying file. */ WT_ERR(__wt_fs_rename(session, filename, newfile, false)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, uri, newuri)); err: __wt_free(session, newvalue); __wt_free(session, oldvalue); return (ret); }
/* * __conn_btree_apply_internal -- * Apply a function to the open btree handles. */ static int __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]) { WT_DECL_RET; /* * We need to pull the handle into the session handle * cache and make sure it's referenced to stop other * internal code dropping the handle (e.g in LSM when * cleaning up obsolete chunks). Holding the metadata * lock isn't enough. */ ret = __wt_session_get_btree(session, dhandle->name, dhandle->checkpoint, NULL, 0); if (ret == 0) { ret = func(session, cfg); if (WT_META_TRACKING(session)) WT_TRET(__wt_meta_track_handle_lock(session, 0)); else WT_TRET(__wt_session_release_btree(session)); } else if (ret == EBUSY) ret = __wt_conn_btree_apply_single(session, dhandle->name, dhandle->checkpoint, func, cfg); return (ret); }
/* * __wt_meta_track_off -- * Turn off metadata operation tracking, unrolling on error. */ int __wt_meta_track_off(WT_SESSION_IMPL *session, int unroll) { WT_BTREE *saved_btree; WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; const char *ckpt_cfg[] = API_CONF_DEFAULTS(session, checkpoint, NULL); WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); if (--session->meta_track_nest != 0) return (0); trk_orig = session->meta_track; trk = session->meta_track_next; /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; while (--trk >= trk_orig) WT_TRET(__meta_track_apply(session, trk, unroll)); /* If the operation succeeded, checkpoint the metadata. */ if (!unroll && ret == 0 && session->metafile != NULL) { saved_btree = session->btree; session->btree = session->metafile; ret = __wt_checkpoint(session, ckpt_cfg); session->btree = saved_btree; } return (ret); }
/* * __wt_metadata_search -- * Return a copied row from the metadata. * The caller is responsible for freeing the allocated memory. */ int __wt_metadata_search( WT_SESSION_IMPL *session, const char *key, char **valuep) { WT_CURSOR *cursor; WT_DECL_RET; const char *value; *valuep = NULL; WT_RET(__wt_verbose(session, WT_VERB_METADATA, "Search: key: %s, tracking: %s, %s" "turtle", key, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ")); if (__metadata_turtle(key)) return (__wt_turtle_read(session, key, valuep)); WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); WT_ERR(cursor->search(cursor)); WT_ERR(cursor->get_value(cursor, &value)); WT_ERR(__wt_strdup(session, value, valuep)); err: WT_TRET(cursor->close(cursor)); return (ret); }
/*修改文件名操作*/ static int __rename_file(WT_SESSION_IMPL* session, const char* uri, const char* newuri) { WT_DECL_RET; int exist; const char *filename, *newfile; char *newvalue, *oldvalue; newvalue = oldvalue = NULL; filename = uri; newfile = newuri; if (!WT_PREFIX_SKIP(filename, "file:") || !WT_PREFIX_SKIP(newfile, "file:")) return (EINVAL); /*关闭这个文件对应的data source handler*/ WT_WITH_DHANDLE_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, 0)); WT_ERR(ret); /* * First, check if the file being renamed exists in the system. Doing * this check first matches the table rename behavior because we return * WT_NOTFOUND when the renamed file doesn't exist (subsequently mapped * to ENOENT by the session layer). */ /*先查出旧的文件名uri对应的meta 信息,再用新的文件名uri旧版本的meta信息*/ WT_ERR(__wt_metadata_search(session, uri, &oldvalue)); switch (ret = __wt_metadata_search(session, newuri, &newvalue)) { case 0: WT_ERR_MSG(session, EEXIST, "%s", newuri); /* NOTREACHED */ case WT_NOTFOUND: break; default: WT_ERR(ret); } WT_ERR(__wt_exist(session, newfile, &exist)); if (exist) WT_ERR_MSG(session, EEXIST, "%s", newfile); /* Replace the old file entries with new file entries. */ WT_ERR(__wt_metadata_remove(session, uri)); WT_ERR(__wt_metadata_insert(session, newuri, oldvalue)); WT_ERR(__wt_rename(session, filename, newfile)); if(WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, uri, newuri)); err: __wt_free(session, newvalue); __wt_free(session, oldvalue); return ret; }
/* * __wt_meta_track_off -- * Turn off metadata operation tracking, unrolling on error. */ int __wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); trk_orig = session->meta_track; trk = session->meta_track_next; /* If it was a nested transaction, there is nothing to do. */ if (--session->meta_track_nest != 0) return (0); /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; /* * If there were no operations logged, return now and avoid unnecessary * metadata checkpoints. For example, this happens if attempting to * create a data source that already exists (or drop one that doesn't). */ if (trk == trk_orig) return (0); while (--trk >= trk_orig) WT_TRET(__meta_track_apply(session, trk, unroll)); /* * Unroll operations don't need to flush the metadata. * * Also, if we don't have the metadata handle (e.g, we're in the * process of creating the metadata), we can't sync it. */ if (unroll || ret != 0 || !need_sync || session->meta_dhandle == NULL) return (ret); /* If we're logging, make sure the metadata update was flushed. */ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) { if (!FLD_ISSET(S2C(session)->txn_logsync, WT_LOG_DSYNC | WT_LOG_FSYNC)) WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_txn_checkpoint_log(session, 0, WT_TXN_LOG_CKPT_SYNC, NULL)); } else { WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint(session, NULL)); WT_RET(ret); WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint_sync(session, NULL)); } return (ret); }
/* * __rename_file -- * WT_SESSION::rename for a file. */ static int __rename_file( WT_SESSION_IMPL *session, const char *uri, const char *newuri) { WT_DECL_RET; int exist; const char *filename, *newfile, *value; value = NULL; filename = uri; newfile = newuri; if (!WT_PREFIX_SKIP(filename, "file:") || !WT_PREFIX_SKIP(newfile, "file:")) return (EINVAL); /* Close any btree handles in the file. */ WT_RET(__wt_conn_btree_close_all(session, uri)); /* * Check to see if the proposed name is already in use, in either * the metadata or the filesystem. */ switch (ret = __wt_metadata_read(session, newuri, &value)) { case 0: WT_ERR_MSG(session, EEXIST, "%s", newuri); case WT_NOTFOUND: ret = 0; break; default: WT_ERR(ret); } WT_ERR(__wt_exist(session, newfile, &exist)); if (exist) WT_ERR_MSG(session, EEXIST, "%s", newfile); /* Replace the old file entries with new file entries. */ WT_ERR(__wt_metadata_read(session, uri, &value)); WT_ERR(__wt_metadata_remove(session, uri)); WT_ERR(__wt_metadata_insert(session, newuri, value)); /* Rename the underlying file. */ WT_ERR(__wt_rename(session, filename, newfile)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, uri, newuri)); err: __wt_free(session, value); return (ret); }
/* * __wt_metadata_search -- * Return a copied row from the metadata. * The caller is responsible for freeing the allocated memory. */ int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) { WT_CURSOR *cursor; WT_DECL_RET; const char *value; *valuep = NULL; __wt_verbose(session, WT_VERB_METADATA, "Search: key: %s, tracking: %s, %s" "turtle", key, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not "); if (__metadata_turtle(key)) { /* * The returned value should only be set if ret is non-zero, but * Coverity is convinced otherwise. The code path is used enough * that Coverity complains a lot, add an error check to get some * peace and quiet. */ if ((ret = __wt_turtle_read(session, key, valuep)) != 0) __wt_free(session, *valuep); return (ret); } /* * All metadata reads are at read-uncommitted isolation. That's * because once a schema-level operation completes, subsequent * operations must see the current version of checkpoint metadata, or * they may try to read blocks that may have been freed from a file. * Metadata updates use non-transactional techniques (such as the * schema and metadata locks) to protect access to in-flight updates. */ WT_RET(__wt_metadata_cursor(session, &cursor)); cursor->set_key(cursor, key); WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search(cursor)); WT_ERR(ret); WT_ERR(cursor->get_value(cursor, &value)); WT_ERR(__wt_strdup(session, value, valuep)); err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); if (ret != 0) __wt_free(session, *valuep); return (ret); }
/* * __wt_meta_btree_apply -- * Apply a function to all files listed in the metadata, apart from the * metadata file. */ int __wt_meta_btree_apply(WT_SESSION_IMPL *session, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]) { WT_CURSOR *cursor; WT_DATA_HANDLE *saved_dhandle; WT_DECL_RET; const char *uri; int cmp, tret; saved_dhandle = session->dhandle; WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, "file:"); if ((tret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0) tret = cursor->next(cursor); for (; tret == 0; tret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &uri)); if (!WT_PREFIX_MATCH(uri, "file:")) break; else if (strcmp(uri, WT_METAFILE_URI) == 0) continue; /* * We need to pull the handle into the session handle cache * and make sure it's referenced to stop other internal code * dropping the handle (e.g in LSM when cleaning up obsolete * chunks). Holding the metadata lock isn't enough. */ ret = __wt_session_get_btree(session, uri, NULL, NULL, 0); if (ret == 0) { WT_SAVE_DHANDLE(session, ret = func(session, cfg)); if (WT_META_TRACKING(session)) WT_TRET( __wt_meta_track_handle_lock(session, 0)); else WT_TRET(__wt_session_release_btree(session)); } else if (ret == EBUSY) ret = __wt_conn_btree_apply_single( session, uri, NULL, func, cfg); WT_ERR(ret); } if (tret != WT_NOTFOUND) WT_TRET(tret); err: WT_TRET(cursor->close(cursor)); session->dhandle = saved_dhandle; return (ret); }
/* * __wt_metadata_remove -- * Remove a row from the metadata. */ int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) { WT_CURSOR *cursor; WT_DECL_RET; if (__metadata_turtle(key)) WT_RET_MSG(session, EINVAL, "%s: remove not supported on the turtle file", key); WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); WT_ERR(cursor->search(cursor)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_update(session, key)); WT_ERR(cursor->remove(cursor)); err: WT_TRET(cursor->close(cursor)); return (ret); }
/* * __wt_metadata_insert -- * Insert a row into the metadata. */ int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value) { WT_CURSOR *cursor; WT_DECL_RET; if (__metadata_turtle(key)) WT_RET_MSG(session, EINVAL, "%s: insert not supported on the turtle file", key); WT_RET(__wt_metadata_cursor(session, NULL, &cursor)); cursor->set_key(cursor, key); cursor->set_value(cursor, value); WT_ERR(cursor->insert(cursor)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_insert(session, key)); err: WT_TRET(cursor->close(cursor)); return (ret); }
/* * __wt_metadata_update -- * Update a row in the metadata. */ int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) { WT_CURSOR *cursor; WT_DECL_RET; if (__metadata_turtle(key)) return (__wt_turtle_update(session, key, value)); if (WT_META_TRACKING(session)) WT_RET(__wt_meta_track_update(session, key)); WT_RET(__wt_metadata_cursor(session, "overwrite", &cursor)); cursor->set_key(cursor, key); cursor->set_value(cursor, value); WT_ERR(cursor->insert(cursor)); err: WT_TRET(cursor->close(cursor)); return (ret); }
/* * __wt_meta_track_sub_off -- * Commit a group of operations independent of the main transaction. */ int __wt_meta_track_sub_off(WT_SESSION_IMPL *session) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; if (!WT_META_TRACKING(session) || session->meta_track_sub == NULL) return (0); trk_orig = session->meta_track_sub; trk = session->meta_track_next; /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; while (--trk >= trk_orig) WT_TRET(__meta_track_apply(session, trk)); session->meta_track_next = trk_orig; return (ret); }
/* * __wt_meta_track_find_handle -- * Check if we have already seen a handle. */ int __wt_meta_track_find_handle( WT_SESSION_IMPL *session, const char *name, const char *checkpoint) { WT_META_TRACK *trk, *trk_orig; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); trk_orig = session->meta_track; trk = session->meta_track_next; while (--trk >= trk_orig) { if (trk->op != WT_ST_LOCK) continue; if (strcmp(trk->dhandle->name, name) == 0 && ((trk->dhandle->checkpoint == NULL && checkpoint == NULL) || (trk->dhandle->checkpoint != NULL && strcmp(trk->dhandle->checkpoint, checkpoint) == 0))) return (0); } return (WT_NOTFOUND); }
/* * __wt_metadata_search -- * Return a copied row from the metadata. * The caller is responsible for freeing the allocated memory. */ int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) { WT_CURSOR *cursor; WT_DECL_RET; const char *value; *valuep = NULL; WT_RET(__wt_verbose(session, WT_VERB_METADATA, "Search: key: %s, tracking: %s, %s" "turtle", key, WT_META_TRACKING(session) ? "true" : "false", __metadata_turtle(key) ? "" : "not ")); if (__metadata_turtle(key)) return (__wt_turtle_read(session, key, valuep)); /* * All metadata reads are at read-uncommitted isolation. That's * because once a schema-level operation completes, subsequent * operations must see the current version of checkpoint metadata, or * they may try to read blocks that may have been freed from a file. * Metadata updates use non-transactional techniques (such as the * schema and metadata locks) to protect access to in-flight updates. */ WT_RET(__wt_metadata_cursor(session, &cursor)); cursor->set_key(cursor, key); WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search(cursor)); WT_ERR(ret); WT_ERR(cursor->get_value(cursor, &value)); WT_ERR(__wt_strdup(session, value, valuep)); err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); return (ret); }
/* * __snapshot_worker -- * Snapshot the tree. */ static int __snapshot_worker( WT_SESSION_IMPL *session, const char *name, int discard, snapshot_op op) { WT_BTREE *btree; WT_DECL_RET; WT_SNAPSHOT *deleted, *snap, *snapbase; int force, matched, tracked; btree = session->btree; matched = tracked = 0; snap = snapbase = NULL; /* Snapshots are single-threaded. */ __wt_writelock(session, btree->snaplock); /* Set the name to the default, if we aren't provided one. */ if (op == SNAPSHOT && name == NULL) { force = 0; name = WT_INTERNAL_SNAPSHOT; } else force = 1; /* * Get the list of snapshots for this file. If there's no reference, * this file is dead. Discard it from the cache without bothering to * write any dirty pages. */ if ((ret = __wt_meta_snaplist_get(session, btree->name, &snapbase)) != 0) { if (ret == WT_NOTFOUND) ret = __wt_bt_cache_flush( session, NULL, WT_SYNC_DISCARD_NOWRITE, 0); goto err; } switch (op) { case SNAPSHOT: /* * Create a new, possibly named, snapshot. Review existing * snapshots, deleting default snapshots and snapshots with * matching names, add the new snapshot entry at the end of * the list. */ WT_SNAPSHOT_FOREACH(snapbase, snap) if (strcmp(snap->name, name) == 0 || strcmp(snap->name, WT_INTERNAL_SNAPSHOT) == 0) F_SET(snap, WT_SNAP_DELETE); WT_ERR(__wt_strdup(session, name, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP: /* * Drop all snapshots with matching names. * Drop all snapshots with the default name. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) { /* * There should be only one snapshot with a matching * name, but it doesn't hurt to check the rest. */ if (strcmp(snap->name, name) == 0) matched = 1; else if (strcmp(snap->name, WT_INTERNAL_SNAPSHOT) != 0) continue; F_SET(snap, WT_SNAP_DELETE); } if (!matched) goto nomatch; WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP_ALL: /* * Drop all snapshots. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) F_SET(snap, WT_SNAP_DELETE); WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP_FROM: /* * Drop all snapshots after, and including, the named snapshot. * Drop all snapshots with the default name. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) { if (strcmp(snap->name, name) == 0) matched = 1; if (matched || strcmp(snap->name, WT_INTERNAL_SNAPSHOT) == 0) F_SET(snap, WT_SNAP_DELETE); } if (!matched) goto nomatch; WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; case SNAPSHOT_DROP_TO: /* * Drop all snapshots before, and including, the named snapshot. * Drop all snapshots with the default name. * Add a new snapshot with the default name. */ WT_SNAPSHOT_FOREACH(snapbase, snap) { if (!matched || strcmp(snap->name, WT_INTERNAL_SNAPSHOT) == 0) F_SET(snap, WT_SNAP_DELETE); if (strcmp(snap->name, name) == 0) matched = 1; } if (!matched) nomatch: WT_ERR_MSG(session, EINVAL, "no snapshot named %s was found", name); WT_ERR(__wt_strdup(session, WT_INTERNAL_SNAPSHOT, &snap->name)); F_SET(snap, WT_SNAP_ADD); break; } /* * Lock the snapshots that will be deleted. * * Snapshots are only locked when tracking is enabled, which covers * sync and drop operations, but not close. The reasoning is that * there should be no access to a snapshot during close, because any * thread accessing a snapshot will also have the current file handle * open. */ if (WT_META_TRACKING(session)) WT_SNAPSHOT_FOREACH(snapbase, deleted) if (F_ISSET(deleted, WT_SNAP_DELETE)) WT_ERR(__wt_session_lock_snapshot(session, deleted->name, WT_BTREE_EXCLUSIVE)); WT_ERR(__wt_bt_cache_flush( session, snapbase, discard ? WT_SYNC_DISCARD : WT_SYNC, force)); /* If there was a snapshot, update the metadata. */ if (snap->raw.data == NULL) { if (force) WT_ERR_MSG(session, EINVAL, "cache flush failed to create a snapshot"); } else { WT_ERR(__wt_meta_snaplist_set(session, btree->name, snapbase)); /* * If tracking is enabled, defer making pages available until * the end of the transaction. The exception is if the handle * is being discarded: in that case, it will be gone by the * time we try to apply or unroll the meta tracking event. */ if (WT_META_TRACKING(session) && !discard) { WT_ERR(__wt_meta_track_checkpoint(session)); tracked = 1; } else WT_ERR(__wt_bm_snapshot_resolve(session, snapbase)); } err: __wt_meta_snaplist_free(session, snapbase); if (!tracked) __wt_rwunlock(session, btree->snaplock); return (ret); }
/* * __wt_meta_track_off -- * Turn off metadata operation tracking, unrolling on error. */ int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; WT_SESSION_IMPL *ckpt_session; int saved_ret; bool did_drop; saved_ret = 0; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); trk_orig = session->meta_track; trk = session->meta_track_next; /* If it was a nested transaction, there is nothing to do. */ if (--session->meta_track_nest != 0) return (0); /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; /* * If there were no operations logged, skip unnecessary metadata * checkpoints. For example, this happens if attempting to create a * data source that already exists (or drop one that doesn't). */ if (trk == trk_orig) goto err; /* Unrolling doesn't require syncing the metadata. */ if (unroll) goto err; if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) { F_CLR(session, WT_SESSION_SCHEMA_TXN); #ifdef WT_ENABLE_SCHEMA_TXN WT_ERR(__wt_txn_commit(session, NULL)); __wt_errx(session, "TRACK: Commit internal schema txn"); #endif } /* * If we don't have the metadata cursor (e.g, we're in the process of * creating the metadata), we can't sync it. */ if (!need_sync || session->meta_cursor == NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) goto err; /* If we're logging, make sure the metadata update was flushed. */ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); else { WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); ckpt_session = S2C(session)->meta_ckpt_session; /* * If this operation is part of a running transaction, that * should be included in the checkpoint. */ ckpt_session->txn.id = session->txn.id; WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_METADATA)); WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session), WT_WITH_METADATA_LOCK(ckpt_session, ret = __wt_checkpoint(ckpt_session, NULL))); ckpt_session->txn.id = WT_TXN_NONE; if (ret == 0) WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint_sync(session, NULL)); } err: /* * Undo any tracked operations on failure. * Apply any tracked operations post-commit. */ did_drop = false; if (unroll || ret != 0) { saved_ret = ret; ret = 0; while (--trk >= trk_orig) { did_drop = did_drop || trk->op == WT_ST_DROP_COMMIT; WT_TRET(__meta_track_unroll(session, trk)); } } else for (; trk_orig < trk; trk_orig++) { did_drop = did_drop || trk_orig->op == WT_ST_DROP_COMMIT; WT_TRET(__meta_track_apply(session, trk_orig)); } if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) { F_CLR(session, WT_SESSION_SCHEMA_TXN); /* * We should have committed above unless we're unrolling, there * was an error or the operation was a noop. */ WT_ASSERT(session, unroll || saved_ret != 0 || session->txn.mod_count == 0); #ifdef WT_ENABLE_SCHEMA_TXN __wt_err(session, saved_ret, "TRACK: Abort internal schema txn"); WT_TRET(__wt_txn_rollback(session, NULL)); #endif } /* * Wake up the sweep thread: particularly for the in-memory * storage engine, we want to reclaim space immediately. */ if (did_drop && S2C(session)->sweep_cond != NULL) __wt_cond_signal(session, S2C(session)->sweep_cond); if (ret != 0) WT_PANIC_RET(session, ret, "failed to apply or unroll all tracked operations"); return (saved_ret == 0 ? 0 : saved_ret); }
/* * __drop_table -- * WT_SESSION::drop for a table. */ static int __drop_table( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) { WT_COLGROUP *colgroup; WT_DECL_RET; WT_INDEX *idx; WT_TABLE *table; u_int i; const char *name; bool tracked; WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)); name = uri; WT_PREFIX_SKIP_REQUIRED(session, name, "table:"); table = NULL; tracked = false; /* * Open the table so we can drop its column groups and indexes. * * Ideally we would keep the table locked exclusive across the drop, * but for now we rely on the global table lock to prevent the table * being reopened while it is being dropped. One issue is that the * WT_WITHOUT_LOCKS macro can drop and reacquire the global table lock, * avoiding deadlocks while waiting for LSM operation to quiesce. * * Temporarily getting the table exclusively serves the purpose * of ensuring that cursors on the table that are already open * must at least be closed before this call proceeds. */ WT_ERR(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table)); WT_ERR(__wt_schema_release_table(session, table)); WT_ERR(__wt_schema_get_table_uri(session, uri, true, 0, &table)); /* Drop the column groups. */ for (i = 0; i < WT_COLGROUPS(table); i++) { if ((colgroup = table->cgroups[i]) == NULL) continue; /* * Drop the column group before updating the metadata to avoid * the metadata for the table becoming inconsistent if we can't * get exclusive access. */ WT_ERR(__wt_schema_drop(session, colgroup->source, cfg)); WT_ERR(__wt_metadata_remove(session, colgroup->name)); } /* Drop the indices. */ WT_ERR(__wt_schema_open_indices(session, table)); for (i = 0; i < table->nindices; i++) { if ((idx = table->indices[i]) == NULL) continue; /* * Drop the index before updating the metadata to avoid * the metadata for the table becoming inconsistent if we can't * get exclusive access. */ WT_ERR(__wt_schema_drop(session, idx->source, cfg)); WT_ERR(__wt_metadata_remove(session, idx->name)); } /* Make sure the table data handle is closed. */ WT_TRET(__wt_schema_release_table(session, table)); WT_ERR(__wt_schema_get_table_uri( session, uri, true, WT_DHANDLE_EXCLUSIVE, &table)); F_SET(&table->iface, WT_DHANDLE_DISCARD); if (WT_META_TRACKING(session)) { WT_WITH_DHANDLE(session, &table->iface, ret = __wt_meta_track_handle_lock(session, false)); WT_ERR(ret); tracked = true; } /* Remove the metadata entry (ignore missing items). */ WT_ERR(__wt_metadata_remove(session, uri)); err: if (table != NULL && !tracked) WT_TRET(__wt_schema_release_table(session, table)); return (ret); }
/* * __create_file -- * Create a new 'file:' object. */ static int __create_file(WT_SESSION_IMPL *session, const char *uri, int exclusive, const char *config) { WT_DECL_ITEM(val); WT_DECL_RET; uint32_t allocsize; int is_metadata; const char *filename, **p, *filecfg[] = { WT_CONFIG_BASE(session, file_meta), config, NULL, NULL }; char *fileconf; fileconf = NULL; is_metadata = strcmp(uri, WT_METAFILE_URI) == 0; filename = uri; if (!WT_PREFIX_SKIP(filename, "file:")) WT_RET_MSG(session, EINVAL, "Expected a 'file:' URI: %s", uri); /* Check if the file already exists. */ if (!is_metadata && (ret = __wt_metadata_search(session, uri, &fileconf)) != WT_NOTFOUND) { if (exclusive) WT_TRET(EEXIST); goto err; } /* Sanity check the allocation size. */ WT_RET(__wt_direct_io_size_check( session, filecfg, "allocation_size", &allocsize)); /* Create the file. */ WT_ERR(__wt_block_manager_create(session, filename, allocsize)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, NULL, uri)); /* * If creating an ordinary file, append the file ID and current version * numbers to the passed-in configuration and insert the resulting * configuration into the metadata. */ if (!is_metadata) { WT_ERR(__wt_scr_alloc(session, 0, &val)); WT_ERR(__wt_buf_fmt(session, val, "id=%" PRIu32 ",version=(major=%d,minor=%d)", ++S2C(session)->next_file_id, WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX)); for (p = filecfg; *p != NULL; ++p) ; *p = val->data; WT_ERR(__wt_config_collapse(session, filecfg, &fileconf)); WT_ERR(__wt_metadata_insert(session, uri, fileconf)); } /* * Open the file to check that it was setup correctly. We don't need to * pass the configuration, we just wrote the collapsed configuration * into the metadata file, and it's going to be read/used by underlying * functions. * * Keep the handle exclusive until it is released at the end of the * call, otherwise we could race with a drop. */ WT_ERR(__wt_session_get_btree( session, uri, NULL, NULL, WT_DHANDLE_EXCLUSIVE)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_handle_lock(session, 1)); else WT_ERR(__wt_session_release_btree(session)); err: __wt_scr_free(session, &val); __wt_free(session, fileconf); return (ret); }
/* * __wt_meta_track_off -- * Turn off metadata operation tracking, unrolling on error. */ int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; WT_SESSION_IMPL *ckpt_session; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); trk_orig = session->meta_track; trk = session->meta_track_next; /* If it was a nested transaction, there is nothing to do. */ if (--session->meta_track_nest != 0) return (0); /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; /* * If there were no operations logged, return now and avoid unnecessary * metadata checkpoints. For example, this happens if attempting to * create a data source that already exists (or drop one that doesn't). */ if (trk == trk_orig) return (0); if (unroll) { while (--trk >= trk_orig) WT_TRET(__meta_track_unroll(session, trk)); /* Unroll operations don't need to flush the metadata. */ return (ret); } /* * If we don't have the metadata cursor (e.g, we're in the process of * creating the metadata), we can't sync it. */ if (!need_sync || session->meta_cursor == NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) goto done; /* If we're logging, make sure the metadata update was flushed. */ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) { WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); WT_RET(ret); } else { WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); ckpt_session = S2C(session)->meta_ckpt_session; /* * If this operation is part of a running transaction, that * should be included in the checkpoint. */ ckpt_session->txn.id = session->txn.id; F_SET(ckpt_session, WT_SESSION_LOCKED_METADATA); WT_WITH_METADATA_LOCK(session, ret, WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint(ckpt_session, NULL))); F_CLR(ckpt_session, WT_SESSION_LOCKED_METADATA); ckpt_session->txn.id = WT_TXN_NONE; WT_RET(ret); WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint_sync(session, NULL)); WT_RET(ret); } done: /* Apply any tracked operations post-commit. */ for (; trk_orig < trk; trk_orig++) WT_TRET(__meta_track_apply(session, trk_orig)); return (ret); }