/* * __meta_track_apply -- * Apply the changes in a metadata tracking record. */ static int __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk) { WT_BM *bm; WT_BTREE *btree; WT_DECL_RET; switch (trk->op) { case WT_ST_EMPTY: /* Unused slot */ break; case WT_ST_CHECKPOINT: /* Checkpoint, see above */ btree = trk->dhandle->handle; bm = btree->bm; WT_WITH_DHANDLE(session, trk->dhandle, ret = bm->checkpoint_resolve(bm, session)); break; case WT_ST_DROP_COMMIT: if ((ret = __wt_block_manager_drop(session, trk->a, false)) != 0) __wt_err(session, ret, "metadata remove dropped file %s", trk->a); break; case WT_ST_LOCK: WT_WITH_DHANDLE(session, trk->dhandle, ret = __wt_session_release_btree(session)); break; case WT_ST_FILEOP: case WT_ST_REMOVE: case WT_ST_SET: break; } __meta_track_clear(session, trk); return (ret); }
/* * __wt_meta_track_off -- * Turn off metadata operation tracking, unrolling on error. */ int __wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); trk_orig = session->meta_track; trk = session->meta_track_next; /* If it was a nested transaction, there is nothing to do. */ if (--session->meta_track_nest != 0) return (0); /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; /* * If there were no operations logged, return now and avoid unnecessary * metadata checkpoints. For example, this happens if attempting to * create a data source that already exists (or drop one that doesn't). */ if (trk == trk_orig) return (0); while (--trk >= trk_orig) WT_TRET(__meta_track_apply(session, trk, unroll)); /* * Unroll operations don't need to flush the metadata. * * Also, if we don't have the metadata handle (e.g, we're in the * process of creating the metadata), we can't sync it. */ if (unroll || ret != 0 || !need_sync || session->meta_dhandle == NULL) return (ret); /* If we're logging, make sure the metadata update was flushed. */ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) { if (!FLD_ISSET(S2C(session)->txn_logsync, WT_LOG_DSYNC | WT_LOG_FSYNC)) WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_txn_checkpoint_log(session, 0, WT_TXN_LOG_CKPT_SYNC, NULL)); } else { WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint(session, NULL)); WT_RET(ret); WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint_sync(session, NULL)); } return (ret); }
/* * __compact_worker -- * Function to alternate between checkpoints and compaction calls. */ static int __compact_worker(WT_SESSION_IMPL *session) { WT_DECL_RET; u_int i, loop; bool didwork; /* * Reset the handles' compaction skip flag (we don't bother setting * or resetting it when we finish compaction, it's simpler to do it * once, here). */ for (i = 0; i < session->op_handle_next; ++i) session->op_handle[i]->compact_skip = false; /* * Perform an initial checkpoint (see this file's leading comment for * details). */ WT_ERR(__compact_checkpoint(session)); /* * We compact 10% of a file on each pass (but the overall size of the * file is decreasing each time, so we're not compacting 10% of the * original file each time). Try 100 times (which is clearly more than * we need); quit if we make no progress. */ for (loop = 0; loop < 100; ++loop) { /* Step through the list of files being compacted. */ for (didwork = false, i = 0; i < session->op_handle_next; ++i) { /* Skip objects where there's no more work. */ if (session->op_handle[i]->compact_skip) continue; session->compact_state = WT_COMPACT_RUNNING; WT_WITH_DHANDLE(session, session->op_handle[i], ret = __wt_compact(session)); WT_ERR(ret); /* If we did no work, skip this file in the future. */ if (session->compact_state == WT_COMPACT_SUCCESS) didwork = true; else session->op_handle[i]->compact_skip = true; } if (!didwork) break; /* * Perform two checkpoints (see this file's leading comment for * details). */ WT_ERR(__compact_checkpoint(session)); WT_ERR(__compact_checkpoint(session)); } err: session->compact_state = WT_COMPACT_NONE; return (ret); }
/* * __meta_track_unroll -- * Undo the changes in a metadata tracking record. */ static int __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk) { WT_DECL_RET; switch (trk->op) { case WT_ST_EMPTY: /* Unused slot */ break; case WT_ST_CHECKPOINT: /* Checkpoint, see above */ break; case WT_ST_DROP_COMMIT: break; case WT_ST_LOCK: /* Handle lock, see above */ if (trk->created) F_SET(trk->dhandle, WT_DHANDLE_DISCARD); WT_WITH_DHANDLE(session, trk->dhandle, ret = __wt_session_release_btree(session)); break; case WT_ST_FILEOP: /* File operation */ /* * For renames, both a and b are set. * For creates, a is NULL. * For removes, b is NULL. */ if (trk->a != NULL && trk->b != NULL && (ret = __wt_fs_rename(session, trk->b + strlen("file:"), trk->a + strlen("file:"), true)) != 0) __wt_err(session, ret, "metadata unroll rename %s to %s", trk->b, trk->a); if (trk->a == NULL && (ret = __wt_fs_remove(session, trk->b + strlen("file:"), false)) != 0) __wt_err(session, ret, "metadata unroll create %s", trk->b); /* * We can't undo removes yet: that would imply * some kind of temporary rename and remove in * roll forward. */ break; case WT_ST_REMOVE: /* Remove trk.a */ if ((ret = __wt_metadata_remove(session, trk->a)) != 0) __wt_err(session, ret, "metadata unroll remove: %s", trk->a); break; case WT_ST_SET: /* Set trk.a to trk.b */ if ((ret = __wt_metadata_update(session, trk->a, trk->b)) != 0) __wt_err(session, ret, "metadata unroll update %s to %s", trk->a, trk->b); break; } __meta_track_clear(session, trk); return (ret); }
/* * __wt_schema_release_table -- * Release a table handle. */ int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE *table) { WT_DECL_RET; WT_WITH_DHANDLE(session, &table->iface, ret = __wt_session_release_dhandle(session)); return (ret); }
/* * __wt_schema_release_table -- * Release a table handle. */ int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) { WT_DECL_RET; WT_TABLE *table; if ((table = *tablep) == NULL) return (0); *tablep = NULL; WT_WITH_DHANDLE(session, &table->iface, ret = __wt_session_release_dhandle(session)); return (ret); }
/* * __backup_cleanup_handles -- * Release and free all btree handles held by the backup. This is kept * separate from __backup_stop because it can be called without the * schema lock held. */ static int __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) { WT_CURSOR_BACKUP_ENTRY *p; WT_DECL_RET; if (cb->list == NULL) return (0); /* Release the handles, free the file names, free the list itself. */ for (p = cb->list; p->name != NULL; ++p) { if (p->handle != NULL) WT_WITH_DHANDLE(session, p->handle, WT_TRET(__wt_session_release_btree(session))); __wt_free(session, p->name); } __wt_free(session, cb->list); return (ret); }
/* * __wt_meta_track_off -- * Turn off metadata operation tracking, unrolling on error. */ int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; WT_SESSION_IMPL *ckpt_session; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); trk_orig = session->meta_track; trk = session->meta_track_next; /* If it was a nested transaction, there is nothing to do. */ if (--session->meta_track_nest != 0) return (0); /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; /* * If there were no operations logged, return now and avoid unnecessary * metadata checkpoints. For example, this happens if attempting to * create a data source that already exists (or drop one that doesn't). */ if (trk == trk_orig) return (0); if (unroll) { while (--trk >= trk_orig) WT_TRET(__meta_track_unroll(session, trk)); /* Unroll operations don't need to flush the metadata. */ return (ret); } /* * If we don't have the metadata cursor (e.g, we're in the process of * creating the metadata), we can't sync it. */ if (!need_sync || session->meta_cursor == NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) goto done; /* If we're logging, make sure the metadata update was flushed. */ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) { WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); WT_RET(ret); } else { WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); ckpt_session = S2C(session)->meta_ckpt_session; /* * If this operation is part of a running transaction, that * should be included in the checkpoint. */ ckpt_session->txn.id = session->txn.id; F_SET(ckpt_session, WT_SESSION_LOCKED_METADATA); WT_WITH_METADATA_LOCK(session, ret, WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint(ckpt_session, NULL))); F_CLR(ckpt_session, WT_SESSION_LOCKED_METADATA); ckpt_session->txn.id = WT_TXN_NONE; WT_RET(ret); WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint_sync(session, NULL)); WT_RET(ret); } done: /* Apply any tracked operations post-commit. */ for (; trk_orig < trk; trk_orig++) WT_TRET(__meta_track_apply(session, trk_orig)); return (ret); }
/* * __sweep -- * Close unused dhandles on the connection dhandle list. */ static int __sweep(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *dhandle_next; WT_DECL_RET; time_t now; conn = S2C(session); /* * Session's cache handles unless the session itself is closed, at which * time the handle reference counts are immediately decremented. Don't * discard handles that have been open recently. */ WT_RET(__wt_seconds(session, &now)); dhandle = SLIST_FIRST(&conn->dhlh); for (; dhandle != NULL; dhandle = dhandle_next) { dhandle_next = SLIST_NEXT(dhandle, l); if (dhandle->session_ref != 0 || now - dhandle->timeofdeath <= WT_DHANDLE_SWEEP_WAIT) continue; /* * We have a candidate for closing; if it's open, acquire an * exclusive lock on the handle and close it (the lock blocks * threads from opening the handle). We might be blocking an * open for a fairly long time (over disk I/O), but the handle * has been quiescent for awhile. * * The close can fail if an update cannot be written (updates in * a no-longer-referenced file might not yet be globally visible * if sessions have disjoint sets of files open). If the handle * is busy, skip it, we'll retry the close the next time, after * the transaction state has progressed. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { /* * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we * want opens to block on us rather than returning an * EBUSY error to the application. */ ret = __wt_try_writelock(session, dhandle->rwlock); if (ret == EBUSY) { ret = 0; continue; } WT_RET(ret); WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_btree_sync_and_close(session)); if (ret == EBUSY) ret = 0; WT_TRET(__wt_rwunlock(session, dhandle->rwlock)); WT_RET(ret); } /* * Attempt to discard the handle (the called function checks the * handle-open flag after acquiring appropriate locks, which is * why we don't do any special handling of EBUSY returns above, * that path never cleared the handle-open flag. */ ret = __wt_conn_dhandle_discard_single(session, dhandle, 0); if (ret == EBUSY) ret = 0; WT_RET(ret); } return (0); }
/* * __meta_track_apply -- * Apply the changes in a metadata tracking record. */ static int __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll) { WT_BM *bm; WT_BTREE *btree; WT_DECL_RET; int tret; /* * Unlock handles and complete checkpoints regardless of whether we are * unrolling. */ if (!unroll && trk->op != WT_ST_CHECKPOINT && trk->op != WT_ST_LOCK) goto free; switch (trk->op) { case WT_ST_EMPTY: /* Unused slot */ break; case WT_ST_CHECKPOINT: /* Checkpoint, see above */ if (!unroll) { btree = trk->dhandle->handle; bm = btree->bm; WT_WITH_DHANDLE(session, trk->dhandle, WT_TRET(bm->checkpoint_resolve(bm, session))); } break; case WT_ST_LOCK: /* Handle lock, see above */ if (unroll && trk->created) F_SET(trk->dhandle, WT_DHANDLE_DISCARD); WT_WITH_DHANDLE(session, trk->dhandle, WT_TRET(__wt_session_release_btree(session))); break; case WT_ST_FILEOP: /* File operation */ /* * For renames, both a and b are set. * For creates, a is NULL. * For removes, b is NULL. */ if (trk->a != NULL && trk->b != NULL && (tret = __wt_rename(session, trk->b + strlen("file:"), trk->a + strlen("file:"))) != 0) { __wt_err(session, tret, "metadata unroll rename %s to %s", trk->b, trk->a); WT_TRET(tret); } else if (trk->a == NULL) { if ((tret = __wt_remove(session, trk->b + strlen("file:"))) != 0) { __wt_err(session, tret, "metadata unroll create %s", trk->b); WT_TRET(tret); } } /* * We can't undo removes yet: that would imply * some kind of temporary rename and remove in * roll forward. */ break; case WT_ST_REMOVE: /* Remove trk.a */ if ((tret = __wt_metadata_remove(session, trk->a)) != 0) { __wt_err(session, tret, "metadata unroll remove: %s", trk->a); WT_TRET(tret); } break; case WT_ST_SET: /* Set trk.a to trk.b */ if ((tret = __wt_metadata_update( session, trk->a, trk->b)) != 0) { __wt_err(session, tret, "metadata unroll update %s to %s", trk->a, trk->b); WT_TRET(tret); } break; WT_ILLEGAL_VALUE(session); } free: trk->op = WT_ST_EMPTY; __wt_free(session, trk->a); __wt_free(session, trk->b); trk->dhandle = NULL; return (ret); }
/* * __wt_session_compact -- * WT_SESSION.compact method. */ int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config) { WT_COMPACT_STATE compact; WT_CONFIG_ITEM cval; WT_DATA_SOURCE *dsrc; WT_DECL_RET; WT_SESSION_IMPL *session; u_int i; bool ignore_cache_size_set; ignore_cache_size_set = false; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, compact, config, cfg); /* * The compaction thread should not block when the cache is full: it is * holding locks blocking checkpoints and once the cache is full, it can * spend a long time doing eviction. */ if (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE)) { ignore_cache_size_set = true; F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE); } /* In-memory ignores compaction operations. */ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) goto err; /* * Non-LSM object compaction requires checkpoints, which are impossible * in transactional contexts. Disallow in all contexts (there's no * reason for LSM to allow this, possible or not), and check now so the * error message isn't confusing. */ WT_ERR(__wt_txn_context_check(session, false)); /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "lsm:") && !WT_PREFIX_MATCH(uri, "table:")) { if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) ret = dsrc->compact == NULL ? __wt_object_unsupported(session, uri) : dsrc->compact( dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg); else ret = __wt_bad_object_type(session, uri); goto err; } /* Setup the session handle's compaction state structure. */ memset(&compact, 0, sizeof(WT_COMPACT_STATE)); session->compact = &compact; /* Compaction can be time-limited. */ WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval)); session->compact->max_time = (uint64_t)cval.val; __wt_epoch(session, &session->compact->begin); /* Find the types of data sources being compacted. */ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __compact_handle_append, __compact_uri_analyze, cfg, 0)); WT_ERR(ret); if (session->compact->lsm_count != 0) WT_ERR(__wt_schema_worker( session, uri, NULL, __wt_lsm_compact, cfg, 0)); if (session->compact->file_count != 0) WT_ERR(__compact_worker(session)); err: session->compact = NULL; for (i = 0; i < session->op_handle_next; ++i) { WT_WITH_DHANDLE(session, session->op_handle[i], WT_TRET(__compact_end(session))); WT_WITH_DHANDLE(session, session->op_handle[i], WT_TRET(__wt_session_release_dhandle(session))); } __wt_free(session, session->op_handle); session->op_handle_allocated = session->op_handle_next = 0; /* * Release common session resources (for example, checkpoint may acquire * significant reconciliation structures/memory). */ WT_TRET(__wt_session_release_resources(session)); if (ignore_cache_size_set) F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE); if (ret != 0) WT_STAT_CONN_INCR(session, session_table_compact_fail); else WT_STAT_CONN_INCR(session, session_table_compact_success); API_END_RET_NOTFOUND_MAP(session, ret); }
/* * __compact_worker -- * Function to alternate between checkpoints and compaction calls. */ static int __compact_worker(WT_SESSION_IMPL *session) { WT_DECL_RET; u_int i, loop; bool another_pass; /* * Reset the handles' compaction skip flag (we don't bother setting * or resetting it when we finish compaction, it's simpler to do it * once, here). */ for (i = 0; i < session->op_handle_next; ++i) session->op_handle[i]->compact_skip = false; /* * Perform an initial checkpoint (see this file's leading comment for * details). */ WT_ERR(__compact_checkpoint(session)); /* * We compact 10% of a file on each pass (but the overall size of the * file is decreasing each time, so we're not compacting 10% of the * original file each time). Try 100 times (which is clearly more than * we need); quit if we make no progress. */ for (loop = 0; loop < 100; ++loop) { /* Step through the list of files being compacted. */ for (another_pass = false, i = 0; i < session->op_handle_next; ++i) { /* Skip objects where there's no more work. */ if (session->op_handle[i]->compact_skip) continue; session->compact_state = WT_COMPACT_RUNNING; WT_WITH_DHANDLE(session, session->op_handle[i], ret = __wt_compact(session)); /* * If successful and we did work, schedule another pass. * If successful and we did no work, skip this file in * the future. */ if (ret == 0) { if (session-> compact_state == WT_COMPACT_SUCCESS) another_pass = true; else session-> op_handle[i]->compact_skip = true; continue; } /* * If compaction failed because checkpoint was running, * continue with the next handle. We might continue to * race with checkpoint on each handle, but that's OK, * we'll step through all the handles, and then we'll * block until a checkpoint completes. * * Just quit if eviction is the problem. */ if (ret == EBUSY) { if (__wt_cache_stuck(session)) { WT_ERR_MSG(session, EBUSY, "compaction halted by eviction " "pressure"); } ret = 0; another_pass = true; } WT_ERR(ret); } if (!another_pass) break; /* * Perform two checkpoints (see this file's leading comment for * details). */ WT_ERR(__compact_checkpoint(session)); WT_ERR(__compact_checkpoint(session)); } err: session->compact_state = WT_COMPACT_NONE; return (ret); }
/* * __sweep -- * Close unused dhandles on the connection dhandle list. */ static int __sweep(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *dhandle_next; WT_DECL_RET; time_t now; int locked; conn = S2C(session); /* Don't discard handles that have been open recently. */ WT_RET(__wt_seconds(session, &now)); WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); dhandle = SLIST_FIRST(&conn->dhlh); for (; dhandle != NULL; dhandle = dhandle_next) { dhandle_next = SLIST_NEXT(dhandle, l); if (WT_IS_METADATA(dhandle)) continue; if (dhandle->session_inuse != 0 || now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT) continue; if (dhandle->timeofdeath == 0) { dhandle->timeofdeath = now; WT_STAT_FAST_CONN_INCR(session, dh_conn_tod); continue; } /* * We have a candidate for closing; if it's open, acquire an * exclusive lock on the handle and close it. We might be * blocking opens for a long time (over disk I/O), but the * handle was quiescent for awhile. * * The close can fail if an update cannot be written (updates * in a no-longer-referenced file might not yet be globally * visible if sessions have disjoint sets of files open). If * the handle is busy, skip it, we'll retry the close the next * time, after the transaction state has progressed. * * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want * opens to block on us rather than returning an EBUSY error to * the application. */ if ((ret = __wt_try_writelock(session, dhandle->rwlock)) == EBUSY) continue; WT_RET(ret); locked = 1; /* If the handle is open, try to close it. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_btree_sync_and_close(session, 0)); if (ret != 0) goto unlock; /* We closed the btree handle, bump the statistic. */ WT_STAT_FAST_CONN_INCR(session, dh_conn_handles); } /* * If there are no longer any references to the handle in any * sessions, attempt to discard it. The called function * re-checks that the handle is not in use, which is why we * don't do any special handling of EBUSY returns above. */ if (dhandle->session_inuse == 0 && dhandle->session_ref == 0) { WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_dhandle_discard_single(session, 0)); if (ret != 0) goto unlock; /* If the handle was discarded, it isn't locked. */ locked = 0; } else WT_STAT_FAST_CONN_INCR(session, dh_conn_ref); unlock: if (locked) WT_TRET(__wt_writeunlock(session, dhandle->rwlock)); WT_RET_BUSY_OK(ret); } return (0); }
/* * __wt_meta_track_off -- * Turn off metadata operation tracking, unrolling on error. */ int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; WT_SESSION_IMPL *ckpt_session; int saved_ret; bool did_drop; saved_ret = 0; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); trk_orig = session->meta_track; trk = session->meta_track_next; /* If it was a nested transaction, there is nothing to do. */ if (--session->meta_track_nest != 0) return (0); /* Turn off tracking for unroll. */ session->meta_track_next = session->meta_track_sub = NULL; /* * If there were no operations logged, skip unnecessary metadata * checkpoints. For example, this happens if attempting to create a * data source that already exists (or drop one that doesn't). */ if (trk == trk_orig) goto err; /* Unrolling doesn't require syncing the metadata. */ if (unroll) goto err; if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) { F_CLR(session, WT_SESSION_SCHEMA_TXN); #ifdef WT_ENABLE_SCHEMA_TXN WT_ERR(__wt_txn_commit(session, NULL)); __wt_errx(session, "TRACK: Commit internal schema txn"); #endif } /* * If we don't have the metadata cursor (e.g, we're in the process of * creating the metadata), we can't sync it. */ if (!need_sync || session->meta_cursor == NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) goto err; /* If we're logging, make sure the metadata update was flushed. */ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); else { WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); ckpt_session = S2C(session)->meta_ckpt_session; /* * If this operation is part of a running transaction, that * should be included in the checkpoint. */ ckpt_session->txn.id = session->txn.id; WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_METADATA)); WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session), WT_WITH_METADATA_LOCK(ckpt_session, ret = __wt_checkpoint(ckpt_session, NULL))); ckpt_session->txn.id = WT_TXN_NONE; if (ret == 0) WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_checkpoint_sync(session, NULL)); } err: /* * Undo any tracked operations on failure. * Apply any tracked operations post-commit. */ did_drop = false; if (unroll || ret != 0) { saved_ret = ret; ret = 0; while (--trk >= trk_orig) { did_drop = did_drop || trk->op == WT_ST_DROP_COMMIT; WT_TRET(__meta_track_unroll(session, trk)); } } else for (; trk_orig < trk; trk_orig++) { did_drop = did_drop || trk_orig->op == WT_ST_DROP_COMMIT; WT_TRET(__meta_track_apply(session, trk_orig)); } if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) { F_CLR(session, WT_SESSION_SCHEMA_TXN); /* * We should have committed above unless we're unrolling, there * was an error or the operation was a noop. */ WT_ASSERT(session, unroll || saved_ret != 0 || session->txn.mod_count == 0); #ifdef WT_ENABLE_SCHEMA_TXN __wt_err(session, saved_ret, "TRACK: Abort internal schema txn"); WT_TRET(__wt_txn_rollback(session, NULL)); #endif } /* * Wake up the sweep thread: particularly for the in-memory * storage engine, we want to reclaim space immediately. */ if (did_drop && S2C(session)->sweep_cond != NULL) __wt_cond_signal(session, S2C(session)->sweep_cond); if (ret != 0) WT_PANIC_RET(session, ret, "failed to apply or unroll all tracked operations"); return (saved_ret == 0 ? 0 : saved_ret); }
/* * __drop_table -- * WT_SESSION::drop for a table. */ static int __drop_table( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) { WT_COLGROUP *colgroup; WT_DECL_RET; WT_INDEX *idx; WT_TABLE *table; u_int i; const char *name; bool tracked; WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)); name = uri; WT_PREFIX_SKIP_REQUIRED(session, name, "table:"); table = NULL; tracked = false; /* * Open the table so we can drop its column groups and indexes. * * Ideally we would keep the table locked exclusive across the drop, * but for now we rely on the global table lock to prevent the table * being reopened while it is being dropped. One issue is that the * WT_WITHOUT_LOCKS macro can drop and reacquire the global table lock, * avoiding deadlocks while waiting for LSM operation to quiesce. * * Temporarily getting the table exclusively serves the purpose * of ensuring that cursors on the table that are already open * must at least be closed before this call proceeds. */ WT_ERR(__wt_schema_get_table_uri(session, uri, true, WT_DHANDLE_EXCLUSIVE, &table)); WT_ERR(__wt_schema_release_table(session, table)); WT_ERR(__wt_schema_get_table_uri(session, uri, true, 0, &table)); /* Drop the column groups. */ for (i = 0; i < WT_COLGROUPS(table); i++) { if ((colgroup = table->cgroups[i]) == NULL) continue; /* * Drop the column group before updating the metadata to avoid * the metadata for the table becoming inconsistent if we can't * get exclusive access. */ WT_ERR(__wt_schema_drop(session, colgroup->source, cfg)); WT_ERR(__wt_metadata_remove(session, colgroup->name)); } /* Drop the indices. */ WT_ERR(__wt_schema_open_indices(session, table)); for (i = 0; i < table->nindices; i++) { if ((idx = table->indices[i]) == NULL) continue; /* * Drop the index before updating the metadata to avoid * the metadata for the table becoming inconsistent if we can't * get exclusive access. */ WT_ERR(__wt_schema_drop(session, idx->source, cfg)); WT_ERR(__wt_metadata_remove(session, idx->name)); } /* Make sure the table data handle is closed. */ WT_TRET(__wt_schema_release_table(session, table)); WT_ERR(__wt_schema_get_table_uri( session, uri, true, WT_DHANDLE_EXCLUSIVE, &table)); F_SET(&table->iface, WT_DHANDLE_DISCARD); if (WT_META_TRACKING(session)) { WT_WITH_DHANDLE(session, &table->iface, ret = __wt_meta_track_handle_lock(session, false)); WT_ERR(ret); tracked = true; } /* Remove the metadata entry (ignore missing items). */ WT_ERR(__wt_metadata_remove(session, uri)); err: if (table != NULL && !tracked) WT_TRET(__wt_schema_release_table(session, table)); return (ret); }