Exemple #1
0
/*
 * __compact_rewrite --
 *	Return if a page needs to be re-written.
 */
static int
__compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
	WT_BM *bm;
	WT_DECL_RET;
	WT_PAGE *page;
	WT_PAGE_MODIFY *mod;
	size_t addr_size;
	const uint8_t *addr;

	*skipp = true;					/* Default skip. */

	bm = S2BT(session)->bm;
	page = ref->page;
	mod = page->modify;

	/*
	 * Ignore the root: it may not have a replacement address, and besides,
	 * if anything else gets written, so will it.
	 */
	if (__wt_ref_is_root(ref))
		return (0);

	/* Ignore currently dirty pages, they will be written regardless. */
	if (__wt_page_is_modified(page))
		return (0);

	/*
	 * If the page is clean, test the original addresses.
	 * If the page is a 1-to-1 replacement, test the replacement addresses.
	 * Ignore empty pages, they get merged into the parent.
	 */
	if (mod == NULL || mod->rec_result == 0) {
		WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
		if (addr == NULL)
			return (0);
		WT_RET(
		    bm->compact_page_skip(bm, session, addr, addr_size, skipp));
	} else if (mod->rec_result == WT_PM_REC_REPLACE) {
		/*
		 * The page's modification information can change underfoot if
		 * the page is being reconciled, serialize with reconciliation.
		 */
		WT_RET(__wt_fair_lock(session, &page->page_lock));

		ret = bm->compact_page_skip(bm, session,
		    mod->mod_replace.addr, mod->mod_replace.size, skipp);

		WT_TRET(__wt_fair_unlock(session, &page->page_lock));
		WT_RET(ret);
	}
	return (0);
}
Exemple #2
0
/*
 * __wt_compact_page_skip --
 *	Return if compaction requires we read this page.
 */
int
__wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
    WT_BM *bm;
    size_t addr_size;
    u_int type;
    const uint8_t *addr;

    *skipp = false;				/* Default to reading. */
    type = 0;				/* Keep compiler quiet. */

    bm = S2BT(session)->bm;

    /*
     * We aren't holding a hazard pointer, so we can't look at the page
     * itself, all we can look at is the WT_REF information.  If there's no
     * address, the page isn't on disk, but we have to read internal pages
     * to walk the tree regardless; throw up our hands and read it.
     */
    __wt_ref_info(ref, &addr, &addr_size, &type);
    if (addr == NULL)
        return (0);

    /*
     * Internal pages must be read to walk the tree; ask the block-manager
     * if it's useful to rewrite leaf pages, don't do the I/O if a rewrite
     * won't help.
     */
    return (type == WT_CELL_ADDR_INT ? 0 :
            bm->compact_page_skip(bm, session, addr, addr_size, skipp));
}
Exemple #3
0
/*
 * __wt_compact_page_skip --
 *	Return if the block-manager wants us to re-write this page.
 */
int
__wt_compact_page_skip(
    WT_SESSION_IMPL *session, WT_PAGE *parent, WT_REF *ref, int *skipp)
{
	WT_BM *bm;
	uint32_t addr_size;
	const uint8_t *addr;

	bm = S2BT(session)->bm;

	/*
	 * There's one compaction test we do before we read the page, to see
	 * if the block-manager thinks it useful to rewrite the page.  If a
	 * rewrite won't help, we don't want to do I/O for nothing.  For that
	 * reason, this check is done in a call from inside the tree-walking
	 * routine.
	 *
	 * Ignore everything but on-disk pages, we've already done a pass over
	 * the in-memory pages.
	 */
	if (ref->state != WT_REF_DISK) {
		*skipp = 1;
		return (0);
	}

	__wt_get_addr(parent, ref, &addr, &addr_size);
	if (addr == NULL) {
		*skipp = 1;
		return (0);
	}

	return (bm->compact_page_skip(bm, session, addr, addr_size, skipp));
}
Exemple #4
0
/*将多余的文件空间compact到合适的位置,如果ref在compact范围内,返回skip = 1,表示文件空间不能进行compact*/
static int __compact_rewrite(WT_SESSION_IMPL* session, WT_REF* ref, int* skipp)
{
	WT_BM *bm;
	WT_DECL_RET;
	WT_PAGE *page;
	WT_PAGE_MODIFY *mod;
	size_t addr_size;
	const uint8_t *addr;

	*skipp = 1;	

	bm = S2BT(session)->bm;
	page = ref->page;
	mod = page->modify;

	/*root page是不能被compact*/
	if (__wt_ref_is_root(ref))
		return 0;

	/*ref指向的是个脏页,不进行compact*/
	if (__wt_page_is_modified(page))
		return (0);

	/*假如page一已经被清空的,直接判断是否可以它的block空间compact*/
	if (mod == NULL || F_ISSET(mod, WT_PM_REC_MASK) == 0) {
		WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
		if (addr == NULL)
			return (0);
		WT_RET(bm->compact_page_skip(bm, session, addr, addr_size, skipp));
	}
	else if (F_ISSET(mod, WT_PM_REC_MASK) == WT_PM_REC_REPLACE){ /*如果page空间是替换,那么进行替换block的compact操作判断*/
		WT_PAGE_LOCK(session, page);
		ret = bm->compact_page_skip(bm, session, mod->mod_replace.addr, mod->mod_replace.size, skipp);
		WT_PAGE_UNLOCK(session, page);
		WT_RET(ret);
	}

	return 0;
}
Exemple #5
0
/*在读取ref对应的page时,检查它是否需要compact*/
int __wt_compact_page_skip(WT_SESSION_IMPL* session, WT_REF* ref, int* skipp)
{
	WT_BM *bm;
	size_t addr_size;
	u_int type;
	const uint8_t *addr;

	*skipp = 0;				
	type = 0;

	bm = S2BT(session)->bm;

	/*
	* We aren't holding a hazard pointer, so we can't look at the page
	* itself, all we can look at is the WT_REF information.  If there's no
	* address, the page isn't on disk, but we have to read internal pages
	* to walk the tree regardless; throw up our hands and read it.
	*/
	WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, &type));
	if (addr == NULL)
		return 0;

	return (type == WT_CELL_ADDR_INT ? 0 : bm->compact_page_skip(bm, session, addr, addr_size, skipp));
}
Exemple #6
0
/*
 * __compact_rewrite --
 *	Return if a page needs to be re-written.
 */
static int
__compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
    WT_BM *bm;
    WT_DECL_RET;
    WT_MULTI *multi;
    WT_PAGE *page;
    WT_PAGE_MODIFY *mod;
    size_t addr_size;
    uint32_t i;
    const uint8_t *addr;

    *skipp = true;					/* Default skip. */

    bm = S2BT(session)->bm;
    page = ref->page;
    mod = page->modify;

    /*
     * Ignore the root: it may not have a replacement address, and besides,
     * if anything else gets written, so will it.
     */
    if (__wt_ref_is_root(ref))
        return (0);

    /* Ignore currently dirty pages, they will be written regardless. */
    if (__wt_page_is_modified(page))
        return (0);

    /*
     * If the page is clean, test the original addresses.
     * If the page is a replacement, test the replacement addresses.
     * Ignore empty pages, they get merged into the parent.
     */
    if (mod == NULL || mod->rec_result == 0) {
        __wt_ref_info(ref, &addr, &addr_size, NULL);
        if (addr == NULL)
            return (0);
        return (
                   bm->compact_page_skip(bm, session, addr, addr_size, skipp));
    }

    /*
     * The page's modification information can change underfoot if the page
     * is being reconciled, serialize with reconciliation.
     */
    if (mod->rec_result == WT_PM_REC_REPLACE ||
            mod->rec_result == WT_PM_REC_MULTIBLOCK)
        WT_RET(__wt_fair_lock(session, &page->page_lock));

    if (mod->rec_result == WT_PM_REC_REPLACE)
        ret = bm->compact_page_skip(bm, session,
                                    mod->mod_replace.addr, mod->mod_replace.size, skipp);

    if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
        for (multi = mod->mod_multi,
                i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
            if (multi->disk_image != NULL)
                continue;
            if ((ret = bm->compact_page_skip(bm, session,
                                             multi->addr.addr, multi->addr.size, skipp)) != 0)
                break;
            if (!*skipp)
                break;
        }

    if (mod->rec_result == WT_PM_REC_REPLACE ||
            mod->rec_result == WT_PM_REC_MULTIBLOCK)
        WT_TRET(__wt_fair_unlock(session, &page->page_lock));

    return (ret);
}
Exemple #7
0
/*
 * __wt_compact_evict --
 *	Helper routine to decide if a file's size would benefit from re-writing
 * this page.
 */
int
__wt_compact_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
{
	WT_BM *bm;
	WT_PAGE_MODIFY *mod;
	int skip;
	uint32_t addr_size;
	const uint8_t *addr;

	bm = S2BT(session)->bm;
	mod = page->modify;

	/*
	 * We have to review page reconciliation information as an in-memory
	 * page's original disk addresses might have been fine for compaction
	 * but its replacement addresses might be a problem.  To review page
	 * reconciliation information, we have to lock out both eviction and
	 * checkpoints, as those are the other two operations that can write
	 * a page.
	 *
	 * Ignore the root: it may not have a replacement address, and besides,
	 * if anything else gets written, so will it.
	 */
	if (WT_PAGE_IS_ROOT(page))
		return (0);

	/*
	 * If the page is already dirty, skip some work, it will be written in
	 * any case.
	 */
	if (__wt_page_is_modified(page))
		return (0);

	/*
	 * If the page is clean, test the original addresses.
	 * If the page is a 1-to-1 replacement, test the replacement addresses.
	 * If the page is a split, ignore it, it will be merged into the parent.
	 */
	if (mod == NULL)
		goto disk;

	switch (F_ISSET(mod, WT_PM_REC_MASK)) {
	case 0:
disk:		__wt_get_addr(page->parent, page->ref, &addr, &addr_size);
		if (addr == NULL)
			return (0);
		WT_RET(
		    bm->compact_page_skip(bm, session, addr, addr_size, &skip));
		if (skip)
			return (0);
		break;
	case WT_PM_REC_EMPTY:
		return (0);
	case WT_PM_REC_REPLACE:
		WT_RET(bm->compact_page_skip(bm,
		    session, mod->u.replace.addr, mod->u.replace.size, &skip));
		if (skip)
			return (0);
		break;
	case WT_PM_REC_SPLIT:
	case WT_PM_REC_SPLIT_MERGE:
		return (0);
	}

	/* Mark the page and tree dirty, we want to write this page. */
	WT_RET(__wt_page_modify_init(session, page));
	__wt_page_and_tree_modify_set(session, page);

	WT_DSTAT_INCR(session, btree_compact_rewrite);
	return (0);
}
Exemple #8
0
/*
 * __wt_compact_page_skip --
 *	Return if compaction requires we read this page.
 */
int
__wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
	WT_BM *bm;
	WT_DECL_RET;
	size_t addr_size;
	u_int type;
	const uint8_t *addr;

	/*
	 * Skip deleted pages, rewriting them doesn't seem useful; in a better
	 * world we'd write the parent to delete the page.
	 */
	if (ref->state == WT_REF_DELETED) {
		*skipp = true;
		return (0);
	}

	*skipp = false;				/* Default to reading */

	/*
	 * If the page is in-memory, we want to look at it (it may have been
	 * modified and written, and the current location is the interesting
	 * one in terms of compaction, not the original location).
	 *
	 * This test could be combined with the next one, but this is a cheap
	 * test and the next one is expensive.
	 */
	if (ref->state != WT_REF_DISK)
		return (0);

	/*
	 * There's nothing to prevent the WT_REF state from changing underfoot,
	 * which can change its address. For example, the WT_REF address might
	 * reference an on-page cell, and page eviction can free that memory.
	 * Lock the WT_REF so we can look at its address.
	 */
	if (!__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_LOCKED))
		return (0);

	/*
	 * The page is on disk, so there had better be an address; assert that
	 * fact, test at run-time to avoid the core dump.
	 *
	 * Internal pages must be read to walk the tree; ask the block-manager
	 * if it's useful to rewrite leaf pages, don't do the I/O if a rewrite
	 * won't help.
	 */
	__wt_ref_info(ref, &addr, &addr_size, &type);
	WT_ASSERT(session, addr != NULL);
	if (addr != NULL && type != WT_CELL_ADDR_INT) {
		bm = S2BT(session)->bm;
		ret = bm->compact_page_skip(
		    bm, session, addr, addr_size, skipp);
	}

	/*
	 * Reset the WT_REF state and push the change. The full-barrier isn't
	 * necessary, but it's better to keep pages in circulation than not.
	 */
	ref->state = WT_REF_DISK;
	WT_FULL_BARRIER();

	return (ret);
}