예제 #1
0
파일: bt_compact.c 프로젝트: Arikes/mongo
/*
 * __wt_compact --
 *	Compact a file.
 */
int
__wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
{
    WT_BM *bm;
    WT_BTREE *btree;
    WT_DECL_RET;
    WT_REF *ref;
    bool block_manager_begin, skip;

    WT_UNUSED(cfg);

    btree = S2BT(session);
    bm = btree->bm;
    ref = NULL;
    block_manager_begin = false;

    WT_STAT_FAST_DATA_INCR(session, session_compact);

    /*
     * Check if compaction might be useful -- the API layer will quit trying
     * to compact the data source if we make no progress, set a flag if the
     * block layer thinks compaction is possible.
     */
    WT_RET(bm->compact_skip(bm, session, &skip));
    if (skip)
        return (0);

    /*
     * Reviewing in-memory pages requires looking at page reconciliation
     * results, because we care about where the page is stored now, not
     * where the page was stored when we first read it into the cache.
     * We need to ensure we don't race with page reconciliation as it's
     * writing the page modify information.
     *
     * There are three ways we call reconciliation: checkpoints, threads
     * writing leaf pages (usually in preparation for a checkpoint or if
     * closing a file), and eviction.
     *
     * We're holding the schema lock which serializes with checkpoints.
     */
    WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));

    /*
     * Get the tree handle's flush lock which blocks threads writing leaf
     * pages.
     */
    __wt_spin_lock(session, &btree->flush_lock);

    /* Start compaction. */
    WT_ERR(bm->compact_start(bm, session));
    block_manager_begin = true;

    /* Walk the tree reviewing pages to see if they should be re-written. */
    for (;;) {
        /*
         * Pages read for compaction aren't "useful"; don't update the
         * read generation of pages already in memory, and if a page is
         * read, set its generation to a low value so it is evicted
         * quickly.
         */
        WT_ERR(__wt_tree_walk(session, &ref,
                              WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED));
        if (ref == NULL)
            break;

        WT_ERR(__compact_rewrite(session, ref, &skip));
        if (skip)
            continue;

        session->compact_state = WT_COMPACT_SUCCESS;

        /* Rewrite the page: mark the page and tree dirty. */
        WT_ERR(__wt_page_modify_init(session, ref->page));
        __wt_page_modify_set(session, ref->page);

        WT_STAT_FAST_DATA_INCR(session, btree_compact_rewrite);
    }

err:
    if (ref != NULL)
        WT_TRET(__wt_page_release(session, ref, 0));

    if (block_manager_begin)
        WT_TRET(bm->compact_end(bm, session));

    /* Unblock threads writing leaf pages. */
    __wt_spin_unlock(session, &btree->flush_lock);

    return (ret);
}
예제 #2
0
파일: bt_compact.c 프로젝트: ksuarz/mongo
/*
 * __wt_compact --
 *	Compact a file.
 */
int
__wt_compact(WT_SESSION_IMPL *session)
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_REF *ref;
	u_int i;
	bool skip;

	btree = S2BT(session);
	bm = btree->bm;
	ref = NULL;

	WT_STAT_DATA_INCR(session, session_compact);

	/*
	 * Check if compaction might be useful -- the API layer will quit trying
	 * to compact the data source if we make no progress, set a flag if the
	 * block layer thinks compaction is possible.
	 */
	WT_RET(bm->compact_skip(bm, session, &skip));
	if (skip)
		return (0);

	/*
	 * Reviewing in-memory pages requires looking at page reconciliation
	 * results, because we care about where the page is stored now, not
	 * where the page was stored when we first read it into the cache.
	 * We need to ensure we don't race with page reconciliation as it's
	 * writing the page modify information.
	 *
	 * There are two ways we call reconciliation: checkpoints and eviction.
	 * Get the tree's flush lock which blocks threads writing pages for
	 * checkpoints.
	 */
	__wt_spin_lock(session, &btree->flush_lock);

	/* Walk the tree reviewing pages to see if they should be re-written. */
	for (i = 0;;) {
		/* Periodically check if we've run out of time. */
		if (++i > 100) {
			WT_ERR(__wt_session_compact_check_timeout(session));
			i = 0;
		}

		/*
		 * Pages read for compaction aren't "useful"; don't update the
		 * read generation of pages already in memory, and if a page is
		 * read, set its generation to a low value so it is evicted
		 * quickly.
		 */
		WT_ERR(__wt_tree_walk(session, &ref,
		    WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED));
		if (ref == NULL)
			break;

		WT_ERR(__compact_rewrite(session, ref, &skip));
		if (skip)
			continue;

		session->compact_state = WT_COMPACT_SUCCESS;

		/* Rewrite the page: mark the page and tree dirty. */
		WT_ERR(__wt_page_modify_init(session, ref->page));
		__wt_page_modify_set(session, ref->page);

		WT_STAT_DATA_INCR(session, btree_compact_rewrite);
	}

err:	if (ref != NULL)
		WT_TRET(__wt_page_release(session, ref, 0));

	/* Unblock threads writing leaf pages. */
	__wt_spin_unlock(session, &btree->flush_lock);

	return (ret);
}
예제 #3
0
/*对文件进行compact操作*/
int __wt_compact(WT_SESSION_IMPL* session, const char* cfg[])
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_REF *ref;
	int block_manager_begin, evict_reset, skip;

	WT_UNUSED(cfg);

	conn = S2C(session);
	btree = S2BT(session);
	bm = btree->bm;
	ref = NULL;
	block_manager_begin = 0;

	WT_STAT_FAST_DATA_INCR(session, session_compact);

	/*检查bm对相应的blocks是否可以compact,如果不可以,直接返回*/
	WT_RET(bm->compact_skip(bm, session, &skip));
	if (skip)
		return 0;

	/*
	* Reviewing in-memory pages requires looking at page reconciliation
	* results, because we care about where the page is stored now, not
	* where the page was stored when we first read it into the cache.
	* We need to ensure we don't race with page reconciliation as it's
	* writing the page modify information.
	*
	* There are three ways we call reconciliation: checkpoints, threads
	* writing leaf pages (usually in preparation for a checkpoint or if
	* closing a file), and eviction.
	*
	* We're holding the schema lock which serializes with checkpoints.
	*/
	WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED));

	/*获得btree flusk_lock,防止在文件空间compact被其他线程flush*/
	__wt_spin_lock(session, &btree->flush_lock);

	conn->compact_in_memory_pass = 1;
	WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset));
	if (evict_reset)
		__wt_evict_file_exclusive_off(session);

	WT_ERR(bm->compact_start(bm, session));
	block_manager_begin = 1;

	session->compaction = 1;
	for (;;){
		
		WT_ERR(__wt_tree_walk(session, &ref, NULL, WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED));
		if (ref == NULL)
			break;

		/*进行compact标记*/
		WT_ERR(__compact_rewrite(session, ref, &skip));
		if (skip)
			continue;

		/*如果需要compact的page需要标记为脏page,通过内存驱逐来回写compact结果*/
		WT_ERR(__wt_page_modify_init(session, ref->page));
		__wt_page_modify_set(session, ref->page);

		WT_STAT_FAST_DATA_INCR(session, btree_compact_rewrite);
	}

err:
	if (ref != NULL)
		WT_TRET(__wt_page_release(session, ref, 0));

	/*结束compact动作*/
	if (block_manager_begin)
		WT_TRET(bm->compact_end(bm, session));

	/*
	 * Unlock will be a release barrier, use it to update the compaction
	 * status for reconciliation.
	 */
	conn->compact_in_memory_pass = 0;
	__wt_spin_unlock(session, &btree->flush_lock);

	return ret;
}
예제 #4
0
/*
 * __wt_compact --
 *	Compact a file.
 */
int
__wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_REF *ref;
	int block_manager_begin, evict_reset, skip;

	WT_UNUSED(cfg);

	conn = S2C(session);
	btree = S2BT(session);
	bm = btree->bm;
	ref = NULL;
	block_manager_begin = 0;

	WT_STAT_FAST_DATA_INCR(session, session_compact);

	/*
	 * Check if compaction might be useful -- the API layer will quit trying
	 * to compact the data source if we make no progress, set a flag if the
	 * block layer thinks compaction is possible.
	 */
	WT_RET(bm->compact_skip(bm, session, &skip));
	if (skip)
		return (0);

	/*
	 * Reviewing in-memory pages requires looking at page reconciliation
	 * results, because we care about where the page is stored now, not
	 * where the page was stored when we first read it into the cache.
	 * We need to ensure we don't race with page reconciliation as it's
	 * writing the page modify information.
	 *
	 * There are three ways we call reconciliation: checkpoints, threads
	 * writing leaf pages (usually in preparation for a checkpoint or if
	 * closing a file), and eviction.
	 *
	 * We're holding the schema lock which serializes with checkpoints.
	 */
	WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED));

	/*
	 * Get the tree handle's flush lock which blocks threads writing leaf
	 * pages.
	 */
	__wt_spin_lock(session, &btree->flush_lock);

	/*
	 * That leaves eviction, we don't want to block eviction.  Set a flag
	 * so reconciliation knows compaction is running.  If reconciliation
	 * sees the flag it locks the page it's writing, we acquire the same
	 * lock when reading the page's modify information, serializing access.
	 * The same page lock blocks work on the page, but compaction is an
	 * uncommon, heavy-weight operation.  If it's ever a problem, there's
	 * no reason we couldn't use an entirely separate lock than the page
	 * lock.
	 *
	 * We also need to ensure we don't race with an on-going reconciliation.
	 * After we set the flag, wait for eviction of this file to drain, and
	 * then let eviction continue;
	 */
	conn->compact_in_memory_pass = 1;
	WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset));
	if (evict_reset)
		__wt_evict_file_exclusive_off(session);

	/* Start compaction. */
	WT_ERR(bm->compact_start(bm, session));
	block_manager_begin = 1;

	/* Walk the tree reviewing pages to see if they should be re-written. */
	session->compaction = 1;
	for (;;) {
		/*
		 * Pages read for compaction aren't "useful"; don't update the
		 * read generation of pages already in memory, and if a page is
		 * read, set its generation to a low value so it is evicted
		 * quickly.
		 */
		WT_ERR(__wt_tree_walk(session, &ref, NULL,
		    WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED));
		if (ref == NULL)
			break;

		WT_ERR(__compact_rewrite(session, ref, &skip));
		if (skip)
			continue;

		/* Rewrite the page: mark the page and tree dirty. */
		WT_ERR(__wt_page_modify_init(session, ref->page));
		__wt_page_modify_set(session, ref->page);

		WT_STAT_FAST_DATA_INCR(session, btree_compact_rewrite);
	}

err:	if (ref != NULL)
		WT_TRET(__wt_page_release(session, ref, 0));

	if (block_manager_begin)
		WT_TRET(bm->compact_end(bm, session));

	/*
	 * Unlock will be a release barrier, use it to update the compaction
	 * status for reconciliation.
	 */
	conn->compact_in_memory_pass = 0;
	__wt_spin_unlock(session, &btree->flush_lock);

	return (ret);
}