/* * __ckpt_extlist_read -- * Read a checkpoints extent lists and copy */ static int __ckpt_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt) { WT_BLOCK_CKPT *ci; /* * Allocate a checkpoint structure, crack the cookie and read the * checkpoint's extent lists. * * Ignore the avail list: checkpoint avail lists are only useful if we * are rolling forward from the particular checkpoint and they represent * our best understanding of what blocks can be allocated. If we are * not operating on the live checkpoint, subsequent checkpoints might * have allocated those blocks, and the avail list is useless. We don't * discard it, because it is useful as part of verification, but we * don't re-write it either. */ WT_RET(__wt_calloc(session, 1, sizeof(WT_BLOCK_CKPT), &ckpt->bpriv)); ci = ckpt->bpriv; WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name)); WT_RET(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci)); WT_RET(__wt_block_extlist_read( session, block, &ci->alloc, ci->file_size)); WT_RET(__wt_block_extlist_read( session, block, &ci->discard, ci->file_size)); return (0); }
/* * __ckpt_string -- * Return a printable string representation of a checkpoint address cookie. */ static int __ckpt_string(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, WT_ITEM *buf) { WT_BLOCK_CKPT *ci, _ci; /* Initialize the checkpoint, crack the cookie. */ ci = &_ci; WT_RET(__wt_block_ckpt_init(session, block, ci, "string", 0)); WT_RET(__wt_block_buffer_to_ckpt(session, block, addr, ci)); WT_RET(__wt_buf_fmt(session, buf, "version=%d", ci->version)); if (ci->root_offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", root=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->root_offset, (uintmax_t)(ci->root_offset + ci->root_size), ci->root_size, ci->root_cksum)); if (ci->alloc.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->alloc.offset, (uintmax_t)(ci->alloc.offset + ci->alloc.size), ci->alloc.size, ci->alloc.cksum)); if (ci->avail.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", avail=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", avail=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->avail.offset, (uintmax_t)(ci->avail.offset + ci->avail.size), ci->avail.size, ci->avail.cksum)); if (ci->discard.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", discard=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", discard=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->discard.offset, (uintmax_t)(ci->discard.offset + ci->discard.size), ci->discard.size, ci->discard.cksum)); WT_RET(__wt_buf_catfmt(session, buf, ", file size=%" PRIuMAX ", write generation=%" PRIu64, (uintmax_t)ci->file_size, ci->write_gen)); __wt_block_ckpt_destroy(session, ci); return (0); }
/* * __wt_block_salvage_start -- * Start a file salvage. */ int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) { wt_off_t len; uint32_t allocsize; allocsize = block->allocsize; /* Reset the description information in the first block. */ WT_RET(__wt_desc_write(session, block->fh, allocsize)); /* * Salvage creates a new checkpoint when it's finished, set up for * rolling an empty file forward. */ WT_RET(__wt_block_ckpt_init(session, &block->live, "live")); /* * Truncate the file to an allocation-size multiple of blocks (bytes * trailing the last block must be garbage, by definition). */ len = allocsize; if (block->size > allocsize) len = (block->size / allocsize) * allocsize; WT_RET(__wt_block_truncate(session, block, len)); /* * The file's first allocation-sized block is description information, * skip it when reading through the file. */ block->slvg_off = allocsize; /* * The only checkpoint extent we care about is the allocation list. * Start with the entire file on the allocation list, we'll "free" * any blocks we don't want as we process the file. */ WT_RET(__wt_block_insert_ext( session, block, &block->live.alloc, allocsize, len - allocsize)); /* Salvage performs a checkpoint but doesn't start or resolve it. */ WT_ASSERT(session, block->ckpt_state == WT_CKPT_NONE); block->ckpt_state = WT_CKPT_SALVAGE; return (0); }
/* * __wt_block_salvage_start -- * Start a file salvage. */ int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) { off_t len; uint32_t allocsize; /* Reset the description sector. */ WT_RET(__wt_desc_init(session, block->fh)); /* * Salvage creates a new checkpoint when it's finished, set up for * rolling an empty file forward. */ WT_RET(__wt_block_ckpt_init(session, &block->live, "live")); /* * Truncate the file to an initial sector plus N allocation size * units (bytes trailing the last multiple of an allocation size * unit must be garbage, by definition). */ if (block->fh->file_size > WT_BLOCK_DESC_SECTOR) { allocsize = block->allocsize; len = block->fh->file_size - WT_BLOCK_DESC_SECTOR; len = (len / allocsize) * allocsize; len += WT_BLOCK_DESC_SECTOR; if (len != block->fh->file_size) WT_RET(__wt_ftruncate(session, block->fh, len)); } else len = WT_BLOCK_DESC_SECTOR; /* * The first sector of the file is the description record, skip it as * we read the file. */ block->slvg_off = WT_BLOCK_DESC_SECTOR; /* * The only checkpoint extent we care about is the allocation list. * Start with the entire file on the allocation list, we'll "free" * any blocks we don't want as we process the file. */ WT_RET(__wt_block_insert_ext(session, &block->live.alloc, WT_BLOCK_DESC_SECTOR, len - WT_BLOCK_DESC_SECTOR)); return (0); }
/* * __wt_block_checkpoint_load -- * Load a checkpoint. */ int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint) { WT_BLOCK_CKPT *ci, _ci; WT_DECL_ITEM(tmp); WT_DECL_RET; uint8_t *endp; ci = NULL; /* * Sometimes we don't find a root page (we weren't given a checkpoint, * or the checkpoint was empty). In that case we return an empty root * address, set that up now. */ *root_addr_sizep = 0; #ifdef HAVE_VERBOSE if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) { if (addr != NULL) { WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__ckpt_string(session, block, addr, tmp)); } __wt_verbose(session, WT_VERB_CHECKPOINT, "%s: load-checkpoint: %s", block->name, addr == NULL ? "[Empty]" : (const char *)tmp->data); } #endif /* * There's a single checkpoint in the file that can be written, all of * the others are read-only. We use the same initialization calls for * readonly checkpoints, but the information doesn't persist. */ if (checkpoint) { ci = &_ci; WT_ERR(__wt_block_ckpt_init(session, ci, "checkpoint")); } else { /* * We depend on the btree level for locking: things will go bad * fast if we open the live system in two handles, or salvage, * truncate or verify the live/running file. */ #ifdef HAVE_DIAGNOSTIC __wt_spin_lock(session, &block->live_lock); WT_ASSERT(session, block->live_open == false); block->live_open = true; __wt_spin_unlock(session, &block->live_lock); #endif ci = &block->live; WT_ERR(__wt_block_ckpt_init(session, ci, "live")); } /* * If the checkpoint has an on-disk root page, load it. Otherwise, size * the file past the description information. */ if (addr == NULL || addr_size == 0) ci->file_size = block->allocsize; else { /* Crack the checkpoint cookie. */ WT_ERR(__wt_block_buffer_to_ckpt(session, block, addr, ci)); /* Verify sets up next. */ if (block->verify) WT_ERR(__wt_verify_ckpt_load(session, block, ci)); /* Read any root page. */ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET) { endp = root_addr; WT_ERR(__wt_block_addr_to_buffer(block, &endp, ci->root_offset, ci->root_size, ci->root_checksum)); *root_addr_sizep = WT_PTRDIFF(endp, root_addr); } /* * Rolling a checkpoint forward requires the avail list, the * blocks from which we can allocate. */ if (!checkpoint) WT_ERR(__wt_block_extlist_read_avail( session, block, &ci->avail, ci->file_size)); } /* * If the checkpoint can be written, that means anything written after * the checkpoint is no longer interesting, truncate the file. Don't * bother checking the avail list for a block at the end of the file, * that was done when the checkpoint was first written (re-writing the * checkpoint might possibly make it relevant here, but it's unlikely * enough I don't bother). */ if (!checkpoint) WT_ERR(__wt_block_truncate(session, block, ci->file_size)); if (0) { err: /* * Don't call checkpoint-unload: unload does real work including * file truncation. If we fail early enough that the checkpoint * information isn't correct, bad things would happen. The only * allocated memory was in the service of verify, clean that up. */ if (block->verify) WT_TRET(__wt_verify_ckpt_unload(session, block)); } /* Checkpoints don't need the original information, discard it. */ if (checkpoint && ci != NULL) __wt_block_ckpt_destroy(session, ci); __wt_scr_free(session, &tmp); return (ret); }
/* * __wt_block_checkpoint_load -- * Load a checkpoint. */ int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *dsk, const uint8_t *addr, uint32_t addr_size, int readonly) { WT_BLOCK_CKPT *ci; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_UNUSED(addr_size); /* * Sometimes we don't find a root page (we weren't given a checkpoint, * or the referenced checkpoint was empty). In that case we return a * root page size of 0. Set that up now. */ dsk->size = 0; ci = &block->live; WT_RET(__wt_block_ckpt_init(session, block, ci, "live", 1)); if (WT_VERBOSE_ISSET(session, ckpt)) { if (addr != NULL) { WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__ckpt_string(session, block, addr, tmp)); } WT_VERBOSE_ERR(session, ckpt, "%s: load-checkpoint: %s", block->name, addr == NULL ? "[Empty]" : (char *)tmp->data); } /* If not loading a checkpoint from disk, we're done. */ if (addr == NULL || addr_size == 0) return (0); /* Crack the checkpoint cookie. */ if (addr != NULL) WT_ERR(__wt_block_buffer_to_ckpt(session, block, addr, ci)); /* Verify sets up next. */ if (block->verify) WT_ERR(__wt_verify_ckpt_load(session, block, ci)); /* Read, and optionally verify, any root page. */ if (ci->root_offset != WT_BLOCK_INVALID_OFFSET) { WT_ERR(__wt_block_read_off(session, block, dsk, ci->root_offset, ci->root_size, ci->root_cksum)); if (block->verify) { if (tmp == NULL) { WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__ckpt_string( session, block, addr, tmp)); } WT_ERR( __wt_verify_dsk(session, (char *)tmp->data, dsk)); } } /* * Rolling a checkpoint forward requires the avail list, the blocks from * which we can allocate. */ if (!readonly) WT_ERR( __wt_block_extlist_read_avail(session, block, &ci->avail)); /* * If the checkpoint can be written, that means anything written after * the checkpoint is no longer interesting, truncate the file. Don't * bother checking the avail list for a block at the end of the file, * that was done when the checkpoint was first written (re-writing the * checkpoint might possibly make it relevant here, but it's unlikely * enough that I'm not bothering). */ if (!readonly) { WT_VERBOSE_ERR(session, ckpt, "truncate file to %" PRIuMAX, (uintmax_t)ci->file_size); WT_ERR(__wt_ftruncate(session, block->fh, ci->file_size)); } if (0) { err: (void)__wt_block_checkpoint_unload(session, block); } __wt_scr_free(&tmp); return (ret); }