/* * __compact_checkpoint -- * Perform a checkpoint for compaction. */ static int __compact_checkpoint(WT_SESSION_IMPL *session) { WT_DECL_RET; WT_TXN_GLOBAL *txn_global; uint64_t txn_gen; /* * Force compaction checkpoints: we don't want to skip it because the * work we need to have done is done in the underlying block manager. */ const char *checkpoint_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; /* Checkpoints take a lot of time, check if we've run out. */ WT_RET(__wt_session_compact_check_timeout(session)); if ((ret = __wt_txn_checkpoint(session, checkpoint_cfg, false)) == 0) return (0); WT_RET_BUSY_OK(ret); /* * If there's a checkpoint running, wait for it to complete, checking if * we're out of time. If there's no checkpoint running or the checkpoint * generation number changes, the checkpoint blocking us has completed. */ txn_global = &S2C(session)->txn_global; for (txn_gen = __wt_gen(session, WT_GEN_CHECKPOINT);;) { /* * This loop only checks objects that are declared volatile, * therefore no barriers are needed. */ if (!txn_global->checkpoint_running || txn_gen != __wt_gen(session, WT_GEN_CHECKPOINT)) break; WT_RET(__wt_session_compact_check_timeout(session)); __wt_sleep(2, 0); } return (0); }
/* * __wt_delete_page -- * If deleting a range, try to delete the page without instantiating it. */ int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) { WT_DECL_RET; WT_PAGE *parent; *skipp = false; /* If we have a clean page in memory, attempt to evict it. */ if (ref->state == WT_REF_MEM && __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED)) { if (__wt_page_is_modified(ref->page)) { WT_PUBLISH(ref->state, WT_REF_MEM); return (0); } (void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1); ret = __wt_evict_page(session, ref); (void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1); WT_RET_BUSY_OK(ret); } /* * Atomically switch the page's state to lock it. If the page is not * on-disk, other threads may be using it, no fast delete. * * Possible optimization: if the page is already deleted and the delete * is visible to us (the delete has been committed), we could skip the * page instead of instantiating it and figuring out there are no rows * in the page. While that's a huge amount of work to no purpose, it's * unclear optimizing for overlapping range deletes is worth the effort. */ if (ref->state != WT_REF_DISK || !__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_LOCKED)) return (0); /* * We cannot fast-delete pages that have overflow key/value items as * the overflow blocks have to be discarded. The way we figure that * out is to check the on-page cell type for the page, cells for leaf * pages that have no overflow items are special. * * In some cases, the reference address may not reference an on-page * cell (for example, some combination of page splits), in which case * we can't check the original cell value and we fail. * * To look at an on-page cell, we need to look at the parent page, and * that's dangerous, our parent page could change without warning if * the parent page were to split, deepening the tree. It's safe: the * page's reference will always point to some valid page, and if we find * any problems we simply fail the fast-delete optimization. * * !!! * I doubt it's worth the effort, but we could copy the cell's type into * the reference structure, and then we wouldn't need an on-page cell. */ parent = ref->home; if (__wt_off_page(parent, ref->addr) || __wt_cell_type_raw(ref->addr) != WT_CELL_ADDR_LEAF_NO) goto err; /* * This action dirties the parent page: mark it dirty now, there's no * future reconciliation of the child leaf page that will dirty it as * we write the tree. */ WT_ERR(__wt_page_parent_modify_set(session, ref, false)); /* * Record the change in the transaction structure and set the change's * transaction ID. */ WT_ERR(__wt_calloc_one(session, &ref->page_del)); ref->page_del->txnid = session->txn.id; WT_ERR(__wt_txn_modify_ref(session, ref)); *skipp = true; WT_PUBLISH(ref->state, WT_REF_DELETED); return (0); err: __wt_free(session, ref->page_del); /* * Restore the page to on-disk status, we'll have to instantiate it. */ WT_PUBLISH(ref->state, WT_REF_DISK); return (ret); }
/* * __sweep -- * Close unused dhandles on the connection dhandle list. */ static int __sweep(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *dhandle_next; WT_DECL_RET; time_t now; int locked; conn = S2C(session); /* Don't discard handles that have been open recently. */ WT_RET(__wt_seconds(session, &now)); WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps); dhandle = SLIST_FIRST(&conn->dhlh); for (; dhandle != NULL; dhandle = dhandle_next) { dhandle_next = SLIST_NEXT(dhandle, l); if (WT_IS_METADATA(dhandle)) continue; if (dhandle->session_inuse != 0 || now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT) continue; if (dhandle->timeofdeath == 0) { dhandle->timeofdeath = now; WT_STAT_FAST_CONN_INCR(session, dh_conn_tod); continue; } /* * We have a candidate for closing; if it's open, acquire an * exclusive lock on the handle and close it. We might be * blocking opens for a long time (over disk I/O), but the * handle was quiescent for awhile. * * The close can fail if an update cannot be written (updates * in a no-longer-referenced file might not yet be globally * visible if sessions have disjoint sets of files open). If * the handle is busy, skip it, we'll retry the close the next * time, after the transaction state has progressed. * * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want * opens to block on us rather than returning an EBUSY error to * the application. */ if ((ret = __wt_try_writelock(session, dhandle->rwlock)) == EBUSY) continue; WT_RET(ret); locked = 1; /* If the handle is open, try to close it. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_btree_sync_and_close(session, 0)); if (ret != 0) goto unlock; /* We closed the btree handle, bump the statistic. */ WT_STAT_FAST_CONN_INCR(session, dh_conn_handles); } /* * If there are no longer any references to the handle in any * sessions, attempt to discard it. The called function * re-checks that the handle is not in use, which is why we * don't do any special handling of EBUSY returns above. */ if (dhandle->session_inuse == 0 && dhandle->session_ref == 0) { WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_dhandle_discard_single(session, 0)); if (ret != 0) goto unlock; /* If the handle was discarded, it isn't locked. */ locked = 0; } else WT_STAT_FAST_CONN_INCR(session, dh_conn_ref); unlock: if (locked) WT_TRET(__wt_writeunlock(session, dhandle->rwlock)); WT_RET_BUSY_OK(ret); } return (0); }
/* * __wt_delete_page -- * If deleting a range, try to delete the page without instantiating it. */ int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) { WT_DECL_RET; WT_PAGE *parent; *skipp = false; /* If we have a clean page in memory, attempt to evict it. */ if (ref->state == WT_REF_MEM && __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED)) { if (__wt_page_is_modified(ref->page)) { WT_PUBLISH(ref->state, WT_REF_MEM); return (0); } (void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1); ret = __wt_evict(session, ref, false); (void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1); WT_RET_BUSY_OK(ret); } /* * Atomically switch the page's state to lock it. If the page is not * on-disk, other threads may be using it, no fast delete. */ if (ref->state != WT_REF_DISK || !__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_LOCKED)) return (0); /* * We cannot fast-delete pages that have overflow key/value items as * the overflow blocks have to be discarded. The way we figure that * out is to check the page's cell type, cells for leaf pages without * overflow items are special. * * To look at an on-page cell, we need to look at the parent page, and * that's dangerous, our parent page could change without warning if * the parent page were to split, deepening the tree. It's safe: the * page's reference will always point to some valid page, and if we find * any problems we simply fail the fast-delete optimization. */ parent = ref->home; if (__wt_off_page(parent, ref->addr) ? ((WT_ADDR *)ref->addr)->type != WT_ADDR_LEAF_NO : __wt_cell_type_raw(ref->addr) != WT_CELL_ADDR_LEAF_NO) goto err; /* * This action dirties the parent page: mark it dirty now, there's no * future reconciliation of the child leaf page that will dirty it as * we write the tree. */ WT_ERR(__wt_page_parent_modify_set(session, ref, false)); /* * Record the change in the transaction structure and set the change's * transaction ID. */ WT_ERR(__wt_calloc_one(session, &ref->page_del)); ref->page_del->txnid = session->txn.id; WT_ERR(__wt_txn_modify_ref(session, ref)); *skipp = true; WT_STAT_CONN_INCR(session, rec_page_delete_fast); WT_STAT_DATA_INCR(session, rec_page_delete_fast); WT_PUBLISH(ref->state, WT_REF_DELETED); return (0); err: __wt_free(session, ref->page_del); /* * Restore the page to on-disk status, we'll have to instantiate it. */ WT_PUBLISH(ref->state, WT_REF_DISK); return (ret); }