Exemple #1
0
/*
 * __wt_block_ckpt_to_buffer --
 *	Convert the components into its checkpoint cookie.
 */
int
__wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session,
    WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci)
{
	uint64_t a;

	if (ci->version != WT_BM_CHECKPOINT_VERSION)
		WT_RET_MSG(session, WT_ERROR, "unsupported checkpoint version");

	(*pp)[0] = ci->version;
	(*pp)++;

	WT_RET(__wt_block_addr_to_buffer(block, pp,
	    ci->root_offset, ci->root_size, ci->root_cksum));
	WT_RET(__wt_block_addr_to_buffer(block, pp,
	    ci->alloc.offset, ci->alloc.size, ci->alloc.cksum));
	WT_RET(__wt_block_addr_to_buffer(block, pp,
	    ci->avail.offset, ci->avail.size, ci->avail.cksum));
	WT_RET(__wt_block_addr_to_buffer(block, pp,
	    ci->discard.offset, ci->discard.size, ci->discard.cksum));
	a = (uint64_t)ci->file_size;
	WT_RET(__wt_vpack_uint(pp, 0, a));
	a = (uint64_t)ci->ckpt_size;
	WT_RET(__wt_vpack_uint(pp, 0, a));

	return (0);
}
Exemple #2
0
/*
 * __wt_debug_offset --
 *	Read and dump a disk page in debugging mode, using a file
 * offset/size/checksum triplet.
 */
int
__wt_debug_offset(WT_SESSION_IMPL *session,
     wt_off_t offset, uint32_t size, uint32_t cksum, const char *ofile)
{
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE], *endp;

	WT_ASSERT(session, S2BT_SAFE(session) != NULL);

	/*
	 * This routine depends on the default block manager's view of files,
	 * where an address consists of a file offset, length, and checksum.
	 * This is for debugging only: other block managers might not see a
	 * file or address the same way, that's why there's no block manager
	 * method.
	 *
	 * Convert the triplet into an address structure.
	 */
	endp = addr;
	WT_RET(__wt_block_addr_to_buffer(
	    S2BT(session)->bm->block, &endp, offset, size, cksum));

	/*
	 * Read the address through the btree I/O functions (so the block is
	 * decompressed as necessary).
	 */
	WT_RET(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_bt_read(session, buf, addr, WT_PTRDIFF(endp, addr)));
	ret = __wt_debug_disk(session, buf->mem, ofile);

err:	__wt_scr_free(session, &buf);
	return (ret);
}
Exemple #3
0
/*将buffer的数据写入到block对应的文件中*/
int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, int data_cksum)
{
	wt_off_t offset;
	uint32_t size, cksum;
	uint8_t *endp;

	WT_RET(__wt_block_write_off(session, block, buf, &offset, &size, &cksum, data_cksum, 0));

	endp = addr;
	/*将block的checksum/长度对齐个数/偏移位置写入addr中*/
	WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
	*addr_sizep = WT_PTRDIFF(endp, addr);

	return 0;
}
Exemple #4
0
/*
 * __wt_block_write --
 *	Write a buffer into a block, returning the block's address cookie.
 */
int
__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
    WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
{
	wt_off_t offset;
	uint32_t size, cksum;
	uint8_t *endp;

	WT_RET(__wt_block_write_off(
	    session, block, buf, &offset, &size, &cksum, data_cksum, false));

	endp = addr;
	WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
	*addr_sizep = WT_PTRDIFF(endp, addr);

	return (0);
}
Exemple #5
0
/*
 * __wt_block_write --
 *	Write a buffer into a block, returning the block's address cookie.
 */
int
__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
    WT_ITEM *buf, uint8_t *addr, uint32_t *addr_size, int data_cksum)
{
	off_t offset;
	uint32_t size, cksum;
	uint8_t *endp;

	WT_UNUSED(addr_size);

	WT_RET(__wt_block_write_off(
	    session, block, buf, &offset, &size, &cksum, data_cksum, 0));

	endp = addr;
	WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
	*addr_size = WT_PTRDIFF32(endp, addr);

	return (0);
}
Exemple #6
0
/*
 * __wt_block_checkpoint_load --
 *	Load a checkpoint.
 */
int
__wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
    const uint8_t *addr, size_t addr_size,
    uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint)
{
	WT_BLOCK_CKPT *ci, _ci;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	uint8_t *endp;

	ci = NULL;

	/*
	 * Sometimes we don't find a root page (we weren't given a checkpoint,
	 * or the checkpoint was empty).  In that case we return an empty root
	 * address, set that up now.
	 */
	*root_addr_sizep = 0;

#ifdef HAVE_VERBOSE
	if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
		if (addr != NULL) {
			WT_ERR(__wt_scr_alloc(session, 0, &tmp));
			WT_ERR(__ckpt_string(session, block, addr, tmp));
		}
		__wt_verbose(session, WT_VERB_CHECKPOINT,
		    "%s: load-checkpoint: %s", block->name,
		    addr == NULL ? "[Empty]" : (const char *)tmp->data);
	}
#endif

	/*
	 * There's a single checkpoint in the file that can be written, all of
	 * the others are read-only.  We use the same initialization calls for
	 * readonly checkpoints, but the information doesn't persist.
	 */
	if (checkpoint) {
		ci = &_ci;
		WT_ERR(__wt_block_ckpt_init(session, ci, "checkpoint"));
	} else {
		/*
		 * We depend on the btree level for locking: things will go bad
		 * fast if we open the live system in two handles, or salvage,
		 * truncate or verify the live/running file.
		 */
#ifdef HAVE_DIAGNOSTIC
		__wt_spin_lock(session, &block->live_lock);
		WT_ASSERT(session, block->live_open == false);
		block->live_open = true;
		__wt_spin_unlock(session, &block->live_lock);
#endif
		ci = &block->live;
		WT_ERR(__wt_block_ckpt_init(session, ci, "live"));
	}

	/*
	 * If the checkpoint has an on-disk root page, load it.  Otherwise, size
	 * the file past the description information.
	 */
	if (addr == NULL || addr_size == 0)
		ci->file_size = block->allocsize;
	else {
		/* Crack the checkpoint cookie. */
		WT_ERR(__wt_block_buffer_to_ckpt(session, block, addr, ci));

		/* Verify sets up next. */
		if (block->verify)
			WT_ERR(__wt_verify_ckpt_load(session, block, ci));

		/* Read any root page. */
		if (ci->root_offset != WT_BLOCK_INVALID_OFFSET) {
			endp = root_addr;
			WT_ERR(__wt_block_addr_to_buffer(block, &endp,
			    ci->root_offset, ci->root_size, ci->root_checksum));
			*root_addr_sizep = WT_PTRDIFF(endp, root_addr);
		}

		/*
		 * Rolling a checkpoint forward requires the avail list, the
		 * blocks from which we can allocate.
		 */
		if (!checkpoint)
			WT_ERR(__wt_block_extlist_read_avail(
			    session, block, &ci->avail, ci->file_size));
	}

	/*
	 * If the checkpoint can be written, that means anything written after
	 * the checkpoint is no longer interesting, truncate the file.  Don't
	 * bother checking the avail list for a block at the end of the file,
	 * that was done when the checkpoint was first written (re-writing the
	 * checkpoint might possibly make it relevant here, but it's unlikely
	 * enough I don't bother).
	 */
	if (!checkpoint)
		WT_ERR(__wt_block_truncate(session, block, ci->file_size));

	if (0) {
err:		/*
		 * Don't call checkpoint-unload: unload does real work including
		 * file truncation.  If we fail early enough that the checkpoint
		 * information isn't correct, bad things would happen.  The only
		 * allocated memory was in the service of verify, clean that up.
		 */
		if (block->verify)
			WT_TRET(__wt_verify_ckpt_unload(session, block));
	}

	/* Checkpoints don't need the original information, discard it. */
	if (checkpoint && ci != NULL)
		__wt_block_ckpt_destroy(session, ci);

	__wt_scr_free(session, &tmp);
	return (ret);
}
Exemple #7
0
/*
 * __wt_block_salvage_next --
 *	Return the address for the next potential block from the file.
 */
int
__wt_block_salvage_next(WT_SESSION_IMPL *session,
    WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp)
{
	WT_BLOCK_HEADER *blk;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_FH *fh;
	wt_off_t max, offset;
	uint32_t allocsize, checksum, size;
	uint8_t *endp;

	*eofp = 0;

	fh = block->fh;
	allocsize = block->allocsize;
	WT_ERR(__wt_scr_alloc(session, allocsize, &tmp));

	/* Read through the file, looking for pages. */
	for (max = block->size;;) {
		offset = block->slvg_off;
		if (offset >= max) {			/* Check eof. */
			*eofp = 1;
			goto done;
		}

		/*
		 * Read the start of a possible page (an allocation-size block),
		 * and get a page length from it.  Move to the next allocation
		 * sized boundary, we'll never consider this one again.
		 */
		WT_ERR(__wt_read(
		    session, fh, offset, (size_t)allocsize, tmp->mem));
		blk = WT_BLOCK_HEADER_REF(tmp->mem);
		__wt_block_header_byteswap(blk);
		size = blk->disk_size;
		checksum = blk->checksum;

		/*
		 * Check the block size: if it's not insane, read the block.
		 * Reading the block validates any checksum; if reading the
		 * block succeeds, return its address as a possible page,
		 * otherwise, move past it.
		 */
		if (!__wt_block_offset_invalid(block, offset, size) &&
		    __wt_block_read_off(
		    session, block, tmp, offset, size, checksum) == 0)
			break;

		/* Free the allocation-size block. */
		__wt_verbose(session, WT_VERB_SALVAGE,
		    "skipping %" PRIu32 "B at file offset %" PRIuMAX,
		    allocsize, (uintmax_t)offset);
		WT_ERR(__wt_block_off_free(
		    session, block, offset, (wt_off_t)allocsize));
		block->slvg_off += allocsize;
	}

	/* Re-create the address cookie that should reference this block. */
	endp = addr;
	WT_ERR(__wt_block_addr_to_buffer(block, &endp, offset, size, checksum));
	*addr_sizep = WT_PTRDIFF(endp, addr);

done:
err:	__wt_scr_free(session, &tmp);
	return (ret);
}
Exemple #8
0
/*
 * __wt_block_salvage_next --
 *	Return the next block from the file.
 */
int
__wt_block_salvage_next(
    WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
    uint8_t *addr, uint32_t *addr_sizep, uint64_t *write_genp, int *eofp)
{
	WT_BLOCK_HEADER *blk;
	WT_FH *fh;
	off_t max, offset;
	uint32_t allocsize, cksum, size;
	uint8_t *endp;

	*eofp = 0;

	offset = block->slvg_off;
	fh = block->fh;
	allocsize = block->allocsize;
	WT_RET(__wt_buf_initsize(session, buf, allocsize));

	/* Read through the file, looking for pages with valid checksums. */
	for (max = fh->file_size;;) {
		if (offset >= max) {			/* Check eof. */
			*eofp = 1;
			return (0);
		}

		/*
		 * Read the start of a possible page (an allocation-size block),
		 * and get a page length from it.
		 */
		WT_RET(__wt_read(session, fh, offset, allocsize, buf->mem));
		blk = WT_BLOCK_HEADER_REF(buf->mem);

		/*
		 * The page can't be more than the min/max page size, or past
		 * the end of the file.
		 */
		size = blk->disk_size;
		cksum = blk->cksum;
		if (size == 0 ||
		    size % allocsize != 0 ||
		    size > WT_BTREE_PAGE_SIZE_MAX ||
		    offset + (off_t)size > max)
			goto skip;

		/*
		 * The page size isn't insane, read the entire page: reading the
		 * page validates the checksum and then decompresses the page as
		 * needed.  If reading the page fails, it's probably corruption,
		 * we ignore this block.
		 */
		if (__wt_block_read_off(
		    session, block, buf, offset, size, cksum)) {
skip:			WT_VERBOSE_RET(session, salvage,
			    "skipping %" PRIu32 "B at file offset %" PRIuMAX,
			    allocsize, (uintmax_t)offset);

			/*
			 * Free the block and make sure we don't return it more
			 * than once.
			 */
			WT_RET(__wt_block_off_free(
			    session, block, offset, (off_t)allocsize));
			block->slvg_off = offset += allocsize;
			continue;
		}

		/*
		 * Valid block, return to our caller.
		 *
		 * The buffer may have grown: make sure we read from the full
		 * page image.
		 */
		blk = WT_BLOCK_HEADER_REF(buf->mem);
		break;
	}

	/*
	 * Track the largest write-generation we've seen in the file so future
	 * writes, done after salvage completes, are preferred to these blocks.
	 */
	*write_genp = blk->write_gen;
	if (block->live.write_gen < blk->write_gen)
		block->live.write_gen = blk->write_gen;

	/* Re-create the address cookie that should reference this block. */
	endp = addr;
	WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
	*addr_sizep = WT_PTRDIFF32(endp, addr);

	/* We're successfully returning the page, move past it. */
	block->slvg_off = offset + size;

	return (0);
}
Exemple #9
0
/*
 * __wt_block_salvage_next --
 *	Return the address for the next potential block from the file.
 */
int
__wt_block_salvage_next(WT_SESSION_IMPL *session,
    WT_BLOCK *block, uint8_t *addr, uint32_t *addr_sizep, int *eofp)
{
	WT_BLOCK_HEADER *blk;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_FH *fh;
	off_t max, offset;
	uint32_t allocsize, cksum, size;
	uint8_t *endp;

	*eofp = 0;

	fh = block->fh;
	allocsize = block->allocsize;
	WT_ERR(__wt_scr_alloc(session, allocsize, &tmp));

	/* Read through the file, looking for pages. */
	for (max = fh->size;;) {
		offset = block->slvg_off;
		if (offset >= max) {			/* Check eof. */
			*eofp = 1;
			goto done;
		}

		/*
		 * Read the start of a possible page (an allocation-size block),
		 * and get a page length from it.  Move to the next allocation
		 * sized boundary, we'll never consider this one again.
		 */
		WT_ERR(__wt_read(session, fh, offset, allocsize, tmp->mem));
		blk = WT_BLOCK_HEADER_REF(tmp->mem);
		block->slvg_off += allocsize;

		/*
		 * The page can't be more than the min/max page size, or past
		 * the end of the file.
		 */
		size = blk->disk_size;
		cksum = blk->cksum;
		if (size == 0 ||
		    size % allocsize != 0 ||
		    size > WT_BTREE_PAGE_SIZE_MAX ||
		    offset + (off_t)size > max)
			goto skip;

		/*
		 * The block size isn't insane, read the entire block.  Reading
		 * the block validates the checksum; if reading the block fails,
		 * ignore it.  If reading the block succeeds, return its address
		 * as a possible page.
		 */
		if (__wt_block_read_off(
		    session, block, tmp, offset, size, cksum) == 0)
			break;

skip:		WT_VERBOSE_ERR(session, salvage,
		    "skipping %" PRIu32 "B at file offset %" PRIuMAX,
		    allocsize, (uintmax_t)offset);

		/* Free the allocation-size block. */
		WT_ERR(__wt_block_off_free(
		    session, block, offset, (off_t)allocsize));
	}

	/* Re-create the address cookie that should reference this block. */
	endp = addr;
	WT_ERR(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
	*addr_sizep = WT_PTRDIFF32(endp, addr);

done:
err:	__wt_scr_free(&tmp);
	return (ret);
}