示例#1
0
/*
 * __wt_block_compact_page_skip --
 *	Return if writing a particular page will shrink the file.
 */
int
__wt_block_compact_page_skip(WT_SESSION_IMPL *session,
    WT_BLOCK *block, const uint8_t *addr, uint32_t addr_size, int *skipp)
{
	WT_FH *fh;
	off_t offset;
	uint32_t size, cksum;

	WT_UNUSED(addr_size);
	*skipp = 0;			/* Paranoia: skip on error. */

	fh = block->fh;

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/*
	 * If this block appears in the last half of the file, rewrite it.
	 *
	 * It's unclear we need to lock: the chances of a smashed read are close
	 * to non-existent and the worst thing that can happen is we rewrite a
	 * block we didn't want to rewrite.   On the other hand, compaction is
	 * not expected to be a common operation in WiredTiger, we shouldn't be
	 * here a lot.
	 */
	__wt_spin_lock(session, &block->live_lock);
	*skipp = offset > fh->size / 2 ? 0 : 1;
	__wt_spin_unlock(session, &block->live_lock);

	return (0);
}
示例#2
0
/*
 * __wt_block_salvage_valid --
 *	Let salvage know if a block is valid.
 */
int
__wt_block_salvage_valid(WT_SESSION_IMPL *session,
    WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid)
{
	wt_off_t offset;
	uint32_t size, checksum;

	WT_UNUSED(addr_size);

	/*
	 * Crack the cookie.
	 * If the upper layer took the block, move past it; if the upper layer
	 * rejected the block, move past an allocation size chunk and free it.
	 */
	WT_RET(
	    __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));
	if (valid)
		block->slvg_off = offset + size;
	else {
		WT_RET(__wt_block_off_free(
		    session, block, offset, (wt_off_t)block->allocsize));
		block->slvg_off = offset + block->allocsize;
	}

	return (0);
}
示例#3
0
/*
 * __wt_block_addr_invalid --
 *	Return an error code if an address cookie is invalid.
 */
int
__wt_block_addr_invalid(WT_SESSION_IMPL *session,
    WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live)
{
	wt_off_t offset;
	uint32_t checksum, size;

	WT_UNUSED(session);
	WT_UNUSED(addr_size);
	WT_UNUSED(live);

	/* Crack the cookie. */
	WT_RET(
	    __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));

#ifdef HAVE_DIAGNOSTIC
	/*
	 * In diagnostic mode, verify the address isn't on the available list,
	 * or for live systems, the discard list.
	 */
	WT_RET(__wt_block_misplaced(
	    session, block, "addr-valid", offset, size, live));
#endif

	/* Check if the address is past the end of the file. */
	return (offset + size > block->size ? EINVAL : 0);
}
示例#4
0
/*
 * __wt_bm_read --
 *	Map or read address cookie referenced block into a buffer.
 */
int
__wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
    WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
	WT_BLOCK *block;
	wt_off_t offset;
	uint32_t cksum, size;
	bool mapped;

	WT_UNUSED(addr_size);
	block = bm->block;

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/*
	 * Map the block if it's possible.
	 */
	mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
	if (mapped) {
		buf->data = (uint8_t *)bm->map + offset;
		buf->size = size;
		WT_RET(__wt_mmap_preload(session, buf->data, buf->size));

		WT_STAT_FAST_CONN_INCR(session, block_map_read);
		WT_STAT_FAST_CONN_INCRV(session, block_byte_map_read, size);
		return (0);
	}

#ifdef HAVE_DIAGNOSTIC
	/*
	 * In diagnostic mode, verify the block we're about to read isn't on
	 * the available list, or for live systems, the discard list.
	 */
	WT_RET(__wt_block_misplaced(
	    session, block, "read", offset, size, bm->is_live));
#endif
	/* Read the block. */
	WT_RET(__wt_block_read_off(session, block, buf, offset, size, cksum));

#ifdef HAVE_POSIX_FADVISE
	/* Optionally discard blocks from the system's buffer cache. */
	if (block->os_cache_max != 0 &&
	    (block->os_cache += size) > block->os_cache_max) {
		WT_DECL_RET;

		block->os_cache = 0;
		/* Ignore EINVAL - some file systems don't support the flag. */
		if ((ret = posix_fadvise(block->fh->fd,
		    (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0 &&
		    ret != EINVAL)
			WT_RET_MSG(
			    session, ret, "%s: posix_fadvise", block->name);
	}
#endif
	return (0);
}
示例#5
0
/*
 * __wt_bm_read --
 *	Map or read address cookie referenced block into a buffer.
 */
int
__wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
    WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
	WT_BLOCK *block;
	WT_DECL_RET;
	WT_FILE_HANDLE *handle;
	wt_off_t offset;
	uint32_t checksum, size;
	bool mapped;

	WT_UNUSED(addr_size);
	block = bm->block;

	/* Crack the cookie. */
	WT_RET(
	    __wt_block_buffer_to_addr(block, addr, &offset, &size, &checksum));

	/*
	 * Map the block if it's possible.
	 */
	handle = block->fh->handle;
	mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
	if (mapped && handle->fh_map_preload != NULL) {
		buf->data = (uint8_t *)bm->map + offset;
		buf->size = size;
		ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
		    buf->data, buf->size,bm->mapped_cookie);

		WT_STAT_CONN_INCR(session, block_map_read);
		WT_STAT_CONN_INCRV(session, block_byte_map_read, size);
		return (ret);
	}

#ifdef HAVE_DIAGNOSTIC
	/*
	 * In diagnostic mode, verify the block we're about to read isn't on
	 * the available list, or for live systems, the discard list.
	 */
	WT_RET(__wt_block_misplaced(session,
	    block, "read", offset, size, bm->is_live, __func__, __LINE__));
#endif
	/* Read the block. */
	__wt_capacity_throttle(session, size, WT_THROTTLE_READ);
	WT_RET(
	    __wt_block_read_off(session, block, buf, offset, size, checksum));

	/* Optionally discard blocks from the system's buffer cache. */
	WT_RET(__wt_block_discard(session, block, (size_t)size));

	return (0);
}
示例#6
0
/*
 * __wt_block_addr_valid --
 *	Return if an address cookie is valid.
 */
int
__wt_block_addr_valid(WT_SESSION_IMPL *session,
    WT_BLOCK *block, const uint8_t *addr, size_t addr_size)
{
	off_t offset;
	uint32_t cksum, size;

	WT_UNUSED(session);
	WT_UNUSED(addr_size);

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/* All we care about is if it's past the end of the file. */
	return (offset + size > block->fh->size ? 0 : 1);
}
示例#7
0
/*
 * __wt_block_addr_string --
 *	Return a printable string representation of an address cookie.
 */
int
__wt_block_addr_string(WT_SESSION_IMPL *session,
    WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size)
{
	off_t offset;
	uint32_t cksum, size;

	WT_UNUSED(addr_size);

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/* Printable representation. */
	WT_RET(__wt_buf_fmt(session, buf,
	    "[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
	    (uintmax_t)offset, (uintmax_t)offset + size, size, cksum));

	return (0);
}
示例#8
0
/*
 * __wt_bm_preload --
 *	Pre-load a page.
 */
int
__wt_bm_preload(
    WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
	WT_BLOCK *block;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	wt_off_t offset;
	uint32_t cksum, size;
	bool mapped;

	WT_UNUSED(addr_size);
	block = bm->block;

	/*
	 * Turn off pre-load when direct I/O is configured for the file,
	 * the kernel cache isn't interesting.
	 */
	if (block->fh->direct_io)
		return (0);

	WT_STAT_FAST_CONN_INCR(session, block_preload);

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/* Check for a mapped block. */
	mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
	if (mapped)
		return (__wt_mmap_preload(
		    session, (uint8_t *)bm->map + offset, size));

#ifdef HAVE_POSIX_FADVISE
	if (posix_fadvise(block->fh->fd,
	    (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED) == 0)
		return (0);
#endif

	WT_RET(__wt_scr_alloc(session, size, &tmp));
	ret = __wt_block_read_off(session, block, tmp, offset, size, cksum);
	__wt_scr_free(session, &tmp);
	return (ret);
}
示例#9
0
/*
 * __wt_block_salvage_valid --
 *	Inform salvage a block is valid.
 */
int
__wt_block_salvage_valid(WT_SESSION_IMPL *session,
    WT_BLOCK *block, uint8_t *addr, uint32_t addr_size)
{
	off_t offset;
	uint32_t size, cksum;

	WT_UNUSED(session);
	WT_UNUSED(addr_size);

	/*
	 * The upper layer accepted a block we gave it, move past it.
	 *
	 * Crack the cookie.
	 */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));
	block->slvg_off = offset + size;

	return (0);
}
示例#10
0
/*
 * __wt_block_read --
 *	Read filesystem cookie referenced block into a buffer.
 */
int
__wt_block_read(WT_SESSION_IMPL *session, WT_BLOCK *block,
    WT_ITEM *buf, const uint8_t *addr, uint32_t addr_size)
{
	off_t offset;
	uint32_t size, cksum;

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/* Read the block. */
	WT_RET(__wt_block_read_off(session, block, buf, offset, size, cksum));

	/* Optionally verify the page. */
	if (block->verify)
		WT_RET(__wt_block_verify(
		    session, block, buf, addr, addr_size, offset, size));

	return (0);
}
示例#11
0
/*
 * __wt_bm_preload --
 *	Pre-load a page.
 */
int
__wt_bm_preload(WT_BM *bm,
    WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
	WT_BLOCK *block;
	WT_DECL_RET;
	off_t offset;
	uint32_t cksum, size;
	int mapped;

	WT_UNUSED(addr_size);
	block = bm->block;
	ret = EINVAL;		/* Play games due to conditional compilation */

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/* Check for a mapped block. */
	mapped = bm->map != NULL && offset + size <= (off_t)bm->maplen;
	if (mapped)
		WT_RET(__wt_mmap_preload(
		    session, (uint8_t *)bm->map + offset, size));
	else {
#ifdef HAVE_POSIX_FADVISE
		ret = posix_fadvise(block->fh->fd,
		    (off_t)offset, (off_t)size, POSIX_FADV_WILLNEED);
#endif
		if (ret != 0) {
			WT_DECL_ITEM(tmp);
			WT_RET(__wt_scr_alloc(session, size, &tmp));
			ret = __wt_block_read_off(
			    session, block, tmp, offset, size, cksum);
			__wt_scr_free(&tmp);
			WT_RET(ret);
		}
	}

	WT_STAT_FAST_CONN_INCR(session, block_preload);

	return (0);
}
示例#12
0
/*
 * __wt_bm_corrupt --
 *	Report a block has been corrupted, external API.
 */
int
__wt_bm_corrupt(WT_BM *bm,
    WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	wt_off_t offset;
	uint32_t checksum, size;

	/* Read the block. */
	WT_RET(__wt_scr_alloc(session, 0, &tmp));
	WT_ERR(__wt_bm_read(bm, session, tmp, addr, addr_size));

	/* Crack the cookie, dump the block. */
	WT_ERR(__wt_block_buffer_to_addr(
	    bm->block, addr, &offset, &size, &checksum));
	WT_ERR(__wt_bm_corrupt_dump(session, tmp, offset, size, checksum));

err:	__wt_scr_free(session, &tmp);
	return (ret);
}
示例#13
0
文件: block_read.c 项目: mikety/mongo
/*
 * __wt_bm_preload --
 *	Pre-load a page.
 */
int
__wt_bm_preload(
    WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
	WT_BLOCK *block;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_FILE_HANDLE *handle;
	wt_off_t offset;
	uint32_t cksum, size;
	bool mapped;

	WT_UNUSED(addr_size);

	block = bm->block;

	WT_STAT_FAST_CONN_INCR(session, block_preload);

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	handle = block->fh->handle;
	mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen;
	if (mapped && handle->fh_map_preload != NULL)
		ret = handle->fh_map_preload(handle, (WT_SESSION *)session,
		    (uint8_t *)bm->map + offset, size, bm->mapped_cookie);
	if (!mapped && handle->fh_advise != NULL)
		ret = handle->fh_advise(handle, (WT_SESSION *)session,
		    (wt_off_t)offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED);
	if (ret != EBUSY && ret != ENOTSUP)
		return (ret);

	/* If preload isn't supported, do it the slow way. */
	WT_RET(__wt_scr_alloc(session, 0, &tmp));
	ret = __wt_bm_read(bm, session, tmp, addr, addr_size);
	__wt_scr_free(session, &tmp);

	return (ret);
}
示例#14
0
/*
 * __wt_block_compact_page_skip --
 *	Return if writing a particular page will shrink the file.
 */
int
__wt_block_compact_page_skip(WT_SESSION_IMPL *session,
    WT_BLOCK *block, const uint8_t *addr, size_t addr_size, int *skipp)
{
	WT_DECL_RET;
	WT_EXT *ext;
	WT_EXTLIST *el;
	WT_FH *fh;
	off_t ninety, offset;
	uint32_t size, cksum;

	WT_UNUSED(addr_size);
	*skipp = 1;				/* Return a default skip. */

	fh = block->fh;

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	__wt_spin_lock(session, &block->live_lock);

	/*
	 * If this block is in the last 10% of the file and there's a block on
	 * the available list that's in the first 90% of the file, rewrite the
	 * block.  Checking the available list is necessary (otherwise writing
	 * the block would extend the file), but there's an obvious race if the
	 * file is sufficiently busy.
	 */
	ninety = fh->size - fh->size / 10;
	if (offset > ninety) {
		el = &block->live.avail;
		WT_EXT_FOREACH(ext, el->off)
			if (ext->off < ninety && ext->size >= size) {
				*skipp = 0;
				break;
			}
	}
示例#15
0
/*
 * __wt_bm_read --
 *	Map or read address cookie referenced block into a buffer.
 */
int
__wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
    WT_ITEM *buf, const uint8_t *addr, uint32_t addr_size)
{
	WT_BLOCK *block;
	off_t offset;
	uint32_t size, cksum;
	int mapped;

	WT_UNUSED(addr_size);
	block = bm->block;

	/* Crack the cookie. */
	WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));

	/*
	 * Clear buffers previously used for mapped memory, we may be forced
	 * to read into this buffer.
	 */
	if (F_ISSET(buf, WT_ITEM_MAPPED))
		__wt_buf_free(session, buf);

	/*
	 * If we're going to be able to return mapped memory and the buffer
	 * has allocated memory, discard it.
	 */
	mapped = bm->map != NULL && offset + size <= (off_t)bm->maplen;
	if (buf->mem != NULL && mapped)
		__wt_buf_free(session, buf);

	/* Map the block if it's possible. */
	if (mapped) {
		buf->mem = (uint8_t *)bm->map + offset;
		buf->memsize = size;
		buf->data = buf->mem;
		buf->size = size;
		F_SET(buf, WT_ITEM_MAPPED);

		WT_RET(__wt_mmap_preload(session, buf->mem, buf->size));

		WT_CSTAT_INCR(session, block_map_read);
		WT_CSTAT_INCRV(session, block_byte_map_read, size);
		return (0);
	}

	/* Read the block. */
	WT_RET(__wt_block_read_off(session, block, buf, offset, size, cksum));

#ifdef HAVE_POSIX_FADVISE
	/* Optionally discard blocks from the system's buffer cache. */
	if (block->os_cache_max != 0 &&
	    (block->os_cache += size) > block->os_cache_max) {
		WT_DECL_RET;

		block->os_cache = 0;
		if ((ret = posix_fadvise(block->fh->fd,
		    (off_t)0, (off_t)0, POSIX_FADV_DONTNEED)) != 0)
			WT_RET_MSG(
			    session, ret, "%s: posix_fadvise", block->name);
	}
#endif
	return (0);
}