/* * __wt_compact -- * Compact a file. */ int __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BM *bm; WT_CONFIG_ITEM cval; WT_DECL_RET; WT_PAGE *page; int trigger, skip; bm = S2BT(session)->bm; WT_DSTAT_INCR(session, session_compact); WT_RET(__wt_config_gets(session, cfg, "trigger", &cval)); trigger = (int)cval.val; /* Check if compaction might be useful. */ WT_RET(bm->compact_skip(bm, session, trigger, &skip)); if (skip) return (0); /* * Walk the cache reviewing in-memory pages to see if they need to be * re-written. This requires looking at page reconciliation results, * which means the page cannot be reconciled at the same time as it's * being reviewed for compaction. The underlying functions ensure we * don't collide with page eviction, but we need to make sure we don't * collide with checkpoints either, they are the other operation that * can reconcile a page. */ __wt_spin_lock(session, &S2C(session)->metadata_lock); WT_RET(__wt_bt_cache_op(session, NULL, WT_SYNC_COMPACT)); __wt_spin_unlock(session, &S2C(session)->metadata_lock); /* * Walk the tree, reviewing on-disk pages to see if they need to be * re-written. */ for (page = NULL;;) { WT_RET(__wt_tree_walk(session, &page, WT_TREE_COMPACT)); if (page == NULL) break; /* * The only pages returned by the tree walk function are pages * we want to re-write; mark the page and tree dirty. */ if ((ret = __wt_page_modify_init(session, page)) != 0) { WT_TRET(__wt_page_release(session, page)); WT_RET(ret); } __wt_page_and_tree_modify_set(session, page); WT_DSTAT_INCR(session, btree_compact_rewrite); } return (0); }
/* * __wt_compact -- * Compact a file. */ int __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BM *bm; WT_BTREE *btree; WT_DECL_RET; WT_REF *ref; bool block_manager_begin, skip; WT_UNUSED(cfg); btree = S2BT(session); bm = btree->bm; ref = NULL; block_manager_begin = false; WT_STAT_FAST_DATA_INCR(session, session_compact); /* * Check if compaction might be useful -- the API layer will quit trying * to compact the data source if we make no progress, set a flag if the * block layer thinks compaction is possible. */ WT_RET(bm->compact_skip(bm, session, &skip)); if (skip) return (0); /* * Reviewing in-memory pages requires looking at page reconciliation * results, because we care about where the page is stored now, not * where the page was stored when we first read it into the cache. * We need to ensure we don't race with page reconciliation as it's * writing the page modify information. * * There are three ways we call reconciliation: checkpoints, threads * writing leaf pages (usually in preparation for a checkpoint or if * closing a file), and eviction. * * We're holding the schema lock which serializes with checkpoints. */ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); /* * Get the tree handle's flush lock which blocks threads writing leaf * pages. */ __wt_spin_lock(session, &btree->flush_lock); /* Start compaction. */ WT_ERR(bm->compact_start(bm, session)); block_manager_begin = true; /* Walk the tree reviewing pages to see if they should be re-written. */ for (;;) { /* * Pages read for compaction aren't "useful"; don't update the * read generation of pages already in memory, and if a page is * read, set its generation to a low value so it is evicted * quickly. */ WT_ERR(__wt_tree_walk(session, &ref, WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED)); if (ref == NULL) break; WT_ERR(__compact_rewrite(session, ref, &skip)); if (skip) continue; session->compact_state = WT_COMPACT_SUCCESS; /* Rewrite the page: mark the page and tree dirty. */ WT_ERR(__wt_page_modify_init(session, ref->page)); __wt_page_modify_set(session, ref->page); WT_STAT_FAST_DATA_INCR(session, btree_compact_rewrite); } err: if (ref != NULL) WT_TRET(__wt_page_release(session, ref, 0)); if (block_manager_begin) WT_TRET(bm->compact_end(bm, session)); /* Unblock threads writing leaf pages. */ __wt_spin_unlock(session, &btree->flush_lock); return (ret); }
/*对文件进行compact操作*/ int __wt_compact(WT_SESSION_IMPL* session, const char* cfg[]) { WT_BM *bm; WT_BTREE *btree; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_REF *ref; int block_manager_begin, evict_reset, skip; WT_UNUSED(cfg); conn = S2C(session); btree = S2BT(session); bm = btree->bm; ref = NULL; block_manager_begin = 0; WT_STAT_FAST_DATA_INCR(session, session_compact); /*检查bm对相应的blocks是否可以compact,如果不可以,直接返回*/ WT_RET(bm->compact_skip(bm, session, &skip)); if (skip) return 0; /* * Reviewing in-memory pages requires looking at page reconciliation * results, because we care about where the page is stored now, not * where the page was stored when we first read it into the cache. * We need to ensure we don't race with page reconciliation as it's * writing the page modify information. * * There are three ways we call reconciliation: checkpoints, threads * writing leaf pages (usually in preparation for a checkpoint or if * closing a file), and eviction. * * We're holding the schema lock which serializes with checkpoints. */ WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)); /*获得btree flusk_lock,防止在文件空间compact被其他线程flush*/ __wt_spin_lock(session, &btree->flush_lock); conn->compact_in_memory_pass = 1; WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset)); if (evict_reset) __wt_evict_file_exclusive_off(session); WT_ERR(bm->compact_start(bm, session)); block_manager_begin = 1; session->compaction = 1; for (;;){ WT_ERR(__wt_tree_walk(session, &ref, NULL, WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED)); if (ref == NULL) break; /*进行compact标记*/ WT_ERR(__compact_rewrite(session, ref, &skip)); if (skip) continue; /*如果需要compact的page需要标记为脏page,通过内存驱逐来回写compact结果*/ WT_ERR(__wt_page_modify_init(session, ref->page)); __wt_page_modify_set(session, ref->page); WT_STAT_FAST_DATA_INCR(session, btree_compact_rewrite); } err: if (ref != NULL) WT_TRET(__wt_page_release(session, ref, 0)); /*结束compact动作*/ if (block_manager_begin) WT_TRET(bm->compact_end(bm, session)); /* * Unlock will be a release barrier, use it to update the compaction * status for reconciliation. */ conn->compact_in_memory_pass = 0; __wt_spin_unlock(session, &btree->flush_lock); return ret; }
/* * __wt_compact -- * Compact a file. */ int __wt_compact(WT_SESSION_IMPL *session) { WT_BM *bm; WT_BTREE *btree; WT_DECL_RET; WT_REF *ref; u_int i; bool skip; btree = S2BT(session); bm = btree->bm; ref = NULL; WT_STAT_DATA_INCR(session, session_compact); /* * Check if compaction might be useful -- the API layer will quit trying * to compact the data source if we make no progress, set a flag if the * block layer thinks compaction is possible. */ WT_RET(bm->compact_skip(bm, session, &skip)); if (skip) return (0); /* * Reviewing in-memory pages requires looking at page reconciliation * results, because we care about where the page is stored now, not * where the page was stored when we first read it into the cache. * We need to ensure we don't race with page reconciliation as it's * writing the page modify information. * * There are two ways we call reconciliation: checkpoints and eviction. * Get the tree's flush lock which blocks threads writing pages for * checkpoints. */ __wt_spin_lock(session, &btree->flush_lock); /* Walk the tree reviewing pages to see if they should be re-written. */ for (i = 0;;) { /* Periodically check if we've run out of time. */ if (++i > 100) { WT_ERR(__wt_session_compact_check_timeout(session)); i = 0; } /* * Pages read for compaction aren't "useful"; don't update the * read generation of pages already in memory, and if a page is * read, set its generation to a low value so it is evicted * quickly. */ WT_ERR(__wt_tree_walk(session, &ref, WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED)); if (ref == NULL) break; WT_ERR(__compact_rewrite(session, ref, &skip)); if (skip) continue; session->compact_state = WT_COMPACT_SUCCESS; /* Rewrite the page: mark the page and tree dirty. */ WT_ERR(__wt_page_modify_init(session, ref->page)); __wt_page_modify_set(session, ref->page); WT_STAT_DATA_INCR(session, btree_compact_rewrite); } err: if (ref != NULL) WT_TRET(__wt_page_release(session, ref, 0)); /* Unblock threads writing leaf pages. */ __wt_spin_unlock(session, &btree->flush_lock); return (ret); }
/* * __wt_compact -- * Compact a file. */ int __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BM *bm; WT_BTREE *btree; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_REF *ref; int block_manager_begin, evict_reset, skip; WT_UNUSED(cfg); conn = S2C(session); btree = S2BT(session); bm = btree->bm; ref = NULL; block_manager_begin = 0; WT_STAT_FAST_DATA_INCR(session, session_compact); /* * Check if compaction might be useful -- the API layer will quit trying * to compact the data source if we make no progress, set a flag if the * block layer thinks compaction is possible. */ WT_RET(bm->compact_skip(bm, session, &skip)); if (skip) return (0); /* * Reviewing in-memory pages requires looking at page reconciliation * results, because we care about where the page is stored now, not * where the page was stored when we first read it into the cache. * We need to ensure we don't race with page reconciliation as it's * writing the page modify information. * * There are three ways we call reconciliation: checkpoints, threads * writing leaf pages (usually in preparation for a checkpoint or if * closing a file), and eviction. * * We're holding the schema lock which serializes with checkpoints. */ WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)); /* * Get the tree handle's flush lock which blocks threads writing leaf * pages. */ __wt_spin_lock(session, &btree->flush_lock); /* * That leaves eviction, we don't want to block eviction. Set a flag * so reconciliation knows compaction is running. If reconciliation * sees the flag it locks the page it's writing, we acquire the same * lock when reading the page's modify information, serializing access. * The same page lock blocks work on the page, but compaction is an * uncommon, heavy-weight operation. If it's ever a problem, there's * no reason we couldn't use an entirely separate lock than the page * lock. * * We also need to ensure we don't race with an on-going reconciliation. * After we set the flag, wait for eviction of this file to drain, and * then let eviction continue; */ conn->compact_in_memory_pass = 1; WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset)); if (evict_reset) __wt_evict_file_exclusive_off(session); /* Start compaction. */ WT_ERR(bm->compact_start(bm, session)); block_manager_begin = 1; /* Walk the tree reviewing pages to see if they should be re-written. */ session->compaction = 1; for (;;) { /* * Pages read for compaction aren't "useful"; don't update the * read generation of pages already in memory, and if a page is * read, set its generation to a low value so it is evicted * quickly. */ WT_ERR(__wt_tree_walk(session, &ref, NULL, WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED)); if (ref == NULL) break; WT_ERR(__compact_rewrite(session, ref, &skip)); if (skip) continue; /* Rewrite the page: mark the page and tree dirty. */ WT_ERR(__wt_page_modify_init(session, ref->page)); __wt_page_modify_set(session, ref->page); WT_STAT_FAST_DATA_INCR(session, btree_compact_rewrite); } err: if (ref != NULL) WT_TRET(__wt_page_release(session, ref, 0)); if (block_manager_begin) WT_TRET(bm->compact_end(bm, session)); /* * Unlock will be a release barrier, use it to update the compaction * status for reconciliation. */ conn->compact_in_memory_pass = 0; __wt_spin_unlock(session, &btree->flush_lock); return (ret); }