예제 #1
0
파일: bt_read.c 프로젝트: qihsh/mongo
/*
 * __evict_force_check --
 *	Check if a page matches the criteria for forced eviction.
 */
static int
__evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page)
{
	WT_BTREE *btree;

	btree = S2BT(session);

	/* Pages are usually small enough, check that first. */
	if (page->memory_footprint < btree->maxmempage)
		return (0);

	/* Leaf pages only. */
	if (WT_PAGE_IS_INTERNAL(page))
		return (0);

	/*
	 * It's hard to imagine a page with a huge memory footprint that has
	 * never been modified, but check to be sure.
	 */
	if (page->modify == NULL)
		return (0);

	/* Trigger eviction on the next page release. */
	__wt_page_evict_soon(page);

	/* Bump the oldest ID, we're about to do some visibility checks. */
	__wt_txn_update_oldest(session, 0);

	/* If eviction cannot succeed, don't try. */
	return (__wt_page_can_evict(session, page, 1, NULL));
}
예제 #2
0
/*
 * __evict_force_check --
 *	Check if a page matches the criteria for forced eviction.
 */
static int
__evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
{
	WT_BTREE *btree;

	btree = S2BT(session);

	/* Pages are usually small enough, check that first. */
	if (page->memory_footprint < btree->maxmempage)
		return (0);

	/* Leaf pages only. */
	if (WT_PAGE_IS_INTERNAL(page))
		return (0);

	/* Eviction may be turned off. */
	if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(btree, WT_BTREE_NO_EVICTION))
		return (0);

	/*
	 * It's hard to imagine a page with a huge memory footprint that has
	 * never been modified, but check to be sure.
	 */
	if (page->modify == NULL)
		return (0);

	/* Trigger eviction on the next page release. */
	__wt_page_evict_soon(page);

	/* If eviction cannot succeed, don't try. */
	return (__wt_page_can_evict(session, page, 1));
}
예제 #3
0
/*
 * __evict_force_check --
 *	Check if a page matches the criteria for forced eviction.
 */
static bool
__evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
{
	WT_BTREE *btree;
	WT_PAGE *page;

	btree = S2BT(session);
	page = ref->page;

	/* Leaf pages only. */
	if (WT_PAGE_IS_INTERNAL(page))
		return (false);

	/*
	 * It's hard to imagine a page with a huge memory footprint that has
	 * never been modified, but check to be sure.
	 */
	if (page->modify == NULL)
		return (false);

	/* Pages are usually small enough, check that first. */
	if (page->memory_footprint < btree->splitmempage)
		return (false);

	/*
	 * If this session has more than one hazard pointer, eviction will fail
	 * and there is no point trying.
	 */
	if (__wt_hazard_count(session, page) > 1)
		return (false);

	/*
	 * If we have already tried and the transaction state has not moved on,
	 * eviction is highly likely to fail.
	 */
	if (page->modify->last_eviction_id == __wt_txn_oldest_id(session))
		return (false);

	if (page->memory_footprint < btree->maxmempage)
		return (__wt_leaf_page_can_split(session, page));

	/* Trigger eviction on the next page release. */
	__wt_page_evict_soon(session, ref);

	/* Bump the oldest ID, we're about to do some visibility checks. */
	WT_IGNORE_RET(__wt_txn_update_oldest(session, 0));

	/* If eviction cannot succeed, don't try. */
	return (__wt_page_can_evict(session, ref, NULL));
}
예제 #4
0
/*
 * __wt_evict_file --
 *	Discard pages for a specific file.
 */
int
__wt_evict_file(WT_SESSION_IMPL *session, int syncop)
{
	WT_DECL_RET;
	WT_PAGE *page;
	WT_REF *next_ref, *ref;
	bool evict_reset;

	/*
	 * We need exclusive access to the file -- disable ordinary eviction
	 * and drain any blocks already queued.
	 */
	WT_RET(__wt_evict_file_exclusive_on(session, &evict_reset));

	/* Make sure the oldest transaction ID is up-to-date. */
	__wt_txn_update_oldest(session, true);

	/* Walk the tree, discarding pages. */
	next_ref = NULL;
	WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
	    WT_READ_CACHE | WT_READ_NO_EVICT));
	while ((ref = next_ref) != NULL) {
		page = ref->page;

		/*
		 * Eviction can fail when a page in the evicted page's subtree
		 * switches state.  For example, if we don't evict a page marked
		 * empty, because we expect it to be merged into its parent, it
		 * might no longer be empty after it's reconciled, in which case
		 * eviction of its parent would fail.  We can either walk the
		 * tree multiple times (until it's finally empty), or reconcile
		 * each page to get it to its final state before considering if
		 * it's an eviction target or will be merged into its parent.
		 *
		 * Don't limit this test to any particular page type, that tends
		 * to introduce bugs when the reconciliation of other page types
		 * changes, and there's no advantage to doing so.
		 *
		 * Eviction can also fail because an update cannot be written.
		 * If sessions have disjoint sets of files open, updates in a
		 * no-longer-referenced file may not yet be globally visible,
		 * and the write will fail with EBUSY.  Our caller handles that
		 * error, retrying later.
		 */
		if (syncop == WT_SYNC_CLOSE && __wt_page_is_modified(page))
			WT_ERR(__wt_reconcile(session, ref, NULL, WT_EVICTING));

		/*
		 * We can't evict the page just returned to us (it marks our
		 * place in the tree), so move the walk to one page ahead of
		 * the page being evicted.  Note, we reconciled the returned
		 * page first: if reconciliation of that page were to change
		 * the shape of the tree, and we did the next walk call before
		 * the reconciliation, the next walk call could miss a page in
		 * the tree.
		 */
		WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
		    WT_READ_CACHE | WT_READ_NO_EVICT));

		switch (syncop) {
		case WT_SYNC_CLOSE:
			/*
			 * Evict the page.
			 */
			WT_ERR(__wt_evict(session, ref, 1));
			break;
		case WT_SYNC_DISCARD:
			/*
			 * Dead handles may reference dirty pages; clean the
			 * page, both to keep statistics correct, and to let
			 * the page-discard function assert no dirty page is
			 * ever discarded.
			 */
			if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
				__wt_page_modify_clear(session, page);

			WT_ASSERT(session,
			    F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
			    __wt_page_can_evict(session, ref, false, NULL));
			__wt_evict_page_clean_update(session, ref, 1);
			break;
		WT_ILLEGAL_VALUE_ERR(session);
		}
	}

	if (0) {
err:		/* On error, clear any left-over tree walk. */
		if (next_ref != NULL)
			WT_TRET(__wt_page_release(
			    session, next_ref, WT_READ_NO_EVICT));
	}

	if (evict_reset)
		__wt_evict_file_exclusive_off(session);

	return (ret);
}
예제 #5
0
파일: evict_stat.c 프로젝트: ajdavis/mongo
/*
 * __evict_stat_walk --
 *	Walk all the pages in cache for a dhandle gathering stats information
 */
static void
__evict_stat_walk(WT_SESSION_IMPL *session)
{
	WT_BTREE *btree;
	WT_CACHE *cache;
	WT_PAGE *page;
	WT_REF *next_walk;
	uint64_t dsk_size, gen_gap, gen_gap_max, gen_gap_sum, max_pagesize;
	uint64_t min_written_size, num_memory, num_not_queueable, num_queued;
	uint64_t num_smaller_allocsz, pages_clean, pages_dirty, pages_internal;
	uint64_t pages_leaf, seen_count, size, visited_count;
	uint64_t visited_age_gap_sum, unvisited_count, unvisited_age_gap_sum;
	uint64_t walk_count, written_size_cnt, written_size_sum;

	btree = S2BT(session);
	cache = S2C(session)->cache;
	next_walk = NULL;
	gen_gap_max = gen_gap_sum = max_pagesize = 0;
	num_memory = num_not_queueable = num_queued = 0;
	num_smaller_allocsz = pages_clean = pages_dirty = pages_internal = 0;
	pages_leaf = seen_count = size = visited_count = 0;
	visited_age_gap_sum = unvisited_count = unvisited_age_gap_sum = 0;
	walk_count = written_size_cnt = written_size_sum = 0;
	min_written_size = UINT64_MAX;

	while (__wt_tree_walk_count(session, &next_walk, &walk_count,
	    WT_READ_CACHE | WT_READ_NO_EVICT |
	    WT_READ_NO_GEN | WT_READ_NO_WAIT) == 0 &&
	    next_walk != NULL) {
		++seen_count;
		page = next_walk->page;
		size = page->memory_footprint;

		if (__wt_page_is_modified(page))
			++pages_dirty;
		else
			++pages_clean;

		if (!__wt_ref_is_root(next_walk) &&
		    !__wt_page_can_evict(session, next_walk, NULL))
			++num_not_queueable;

		if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
			++num_queued;

		if (size > max_pagesize)
			max_pagesize = size;

		dsk_size = page->dsk != NULL ? page->dsk->mem_size : 0;
		if (dsk_size != 0) {
			if (dsk_size < btree->allocsize)
				++num_smaller_allocsz;
			if (dsk_size < min_written_size)
				min_written_size = dsk_size;
			++written_size_cnt;
			written_size_sum += dsk_size;
		} else
			++num_memory;

		if (WT_PAGE_IS_INTERNAL(page))
			++pages_internal;
		else
			++pages_leaf;

		/* Skip root pages since they are never considered */
		if (__wt_ref_is_root(next_walk))
			continue;

		if (page->evict_pass_gen == 0) {
			unvisited_age_gap_sum +=
			    (cache->evict_pass_gen - page->cache_create_gen);
			++unvisited_count;
		} else {
			visited_age_gap_sum +=
			    (cache->evict_pass_gen - page->cache_create_gen);
			gen_gap = cache->evict_pass_gen - page->evict_pass_gen;
			if (gen_gap > gen_gap_max)
				gen_gap_max = gen_gap;
			gen_gap_sum += gen_gap;
			++visited_count;
		}
	}

	WT_STAT_DATA_SET(session, cache_state_gen_avg_gap,
	    visited_count == 0 ? 0 : gen_gap_sum / visited_count);
	WT_STAT_DATA_SET(session, cache_state_avg_unvisited_age,
	    unvisited_count == 0 ? 0 : unvisited_age_gap_sum / unvisited_count);
	WT_STAT_DATA_SET(session, cache_state_avg_visited_age,
	    visited_count == 0 ? 0 : visited_age_gap_sum / visited_count);
	WT_STAT_DATA_SET(session, cache_state_avg_written_size,
	    written_size_cnt == 0 ? 0 : written_size_sum / written_size_cnt);
	WT_STAT_DATA_SET(session, cache_state_gen_max_gap, gen_gap_max);
	WT_STAT_DATA_SET(session, cache_state_max_pagesize, max_pagesize);
	WT_STAT_DATA_SET(session,
	    cache_state_min_written_size, min_written_size);
	WT_STAT_DATA_SET(session, cache_state_memory, num_memory);
	WT_STAT_DATA_SET(session, cache_state_queued, num_queued);
	WT_STAT_DATA_SET(session, cache_state_not_queueable, num_not_queueable);
	WT_STAT_DATA_SET(session, cache_state_pages, walk_count);
	WT_STAT_DATA_SET(session, cache_state_pages_clean, pages_clean);
	WT_STAT_DATA_SET(session, cache_state_pages_dirty, pages_dirty);
	WT_STAT_DATA_SET(session, cache_state_pages_internal, pages_internal);
	WT_STAT_DATA_SET(session, cache_state_pages_leaf, pages_leaf);
	WT_STAT_DATA_SET(session,
	    cache_state_refs_skipped, walk_count - seen_count);
	WT_STAT_DATA_SET(session,
	    cache_state_smaller_alloc_size, num_smaller_allocsz);
	WT_STAT_DATA_SET(session,
	    cache_state_unvisited_count, unvisited_count);
}