/* * XLogReadBuffer * Read a page during XLOG replay * * This is functionally comparable to ReadBuffer followed by * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE): you get back a pinned * and locked buffer. (Getting the lock is not really necessary, since we * expect that this is only used during single-process XLOG replay, but * some subroutines such as MarkBufferDirty will complain if we don't.) * * If "init" is true then the caller intends to rewrite the page fully * using the info in the XLOG record. In this case we will extend the * relation if needed to make the page exist, and we will not complain about * the page being "new" (all zeroes); in fact, we usually will supply a * zeroed buffer without reading the page at all, so as to avoid unnecessary * failure if the page is present on disk but has corrupt headers. * * If "init" is false then the caller needs the page to be valid already. * If the page doesn't exist or contains zeroes, we return InvalidBuffer. * In this case the caller should silently skip the update on this page. * (In this situation, we expect that the page was later dropped or truncated. * If we don't see evidence of that later in the WAL sequence, we'll complain * at the end of WAL replay.) */ Buffer XLogReadBuffer(Relation reln, BlockNumber blkno, bool init) { BlockNumber lastblock = RelationGetNumberOfBlocks(reln); Buffer buffer; MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; Assert(blkno != P_NEW); if (blkno < lastblock) { /* page exists in file */ if (init) buffer = ReadOrZeroBuffer(reln, blkno); else buffer = ReadBuffer(reln, blkno); } else { /* hm, page doesn't exist in file */ if (!init) { log_invalid_page(reln->rd_node, blkno, false); return InvalidBuffer; } /* OK to extend the file */ /* we do this in recovery only - no rel-extension lock needed */ Assert(InRecovery); buffer = InvalidBuffer; while (blkno >= lastblock) { if (buffer != InvalidBuffer) ReleaseBuffer(buffer); buffer = ReadBuffer(reln, P_NEW); lastblock++; } Assert(BufferGetBlockNumber(buffer) == blkno); } LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); if (!init) { /* check that page has been initialized */ Page page = (Page) BufferGetPage(buffer); if (PageIsNew((PageHeader) page)) { UnlockReleaseBuffer(buffer); log_invalid_page(reln->rd_node, blkno, true); return InvalidBuffer; } } return buffer; }
/* * XLogReadBufferExtended * Read a page during XLOG replay * * This is functionally comparable to ReadBufferExtended. There's some * differences in the behavior wrt. the "mode" argument: * * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we * return InvalidBuffer. In this case the caller should silently skip the * update on this page. (In this situation, we expect that the page was later * dropped or truncated. If we don't see evidence of that later in the WAL * sequence, we'll complain at the end of WAL replay.) * * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended * with all-zeroes pages up to the given block number. * * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't * exist, and we don't check for all-zeroes. Thus, no log entry is made * to imply that the page should be dropped or truncated later. * * NB: A redo function should normally not call this directly. To get a page * to modify, use XLogReplayBuffer instead. It is important that all pages * modified by a WAL record are registered in the WAL records, or they will be * invisible to tools that that need to know which pages are modified. */ Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode) { BlockNumber lastblock; Buffer buffer; SMgrRelation smgr; Assert(blkno != P_NEW); /* Open the relation at smgr level */ smgr = smgropen(rnode, InvalidBackendId); /* * Create the target file if it doesn't already exist. This lets us cope * if the replay sequence contains writes to a relation that is later * deleted. (The original coding of this routine would instead suppress * the writes, but that seems like it risks losing valuable data if the * filesystem loses an inode during a crash. Better to write the data * until we are actually told to delete the file.) */ smgrcreate(smgr, forknum, true); lastblock = smgrnblocks(smgr, forknum); if (blkno < lastblock) { /* page exists in file */ buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); } else { /* hm, page doesn't exist in file */ if (mode == RBM_NORMAL) { log_invalid_page(rnode, forknum, blkno, false); return InvalidBuffer; } if (mode == RBM_NORMAL_NO_LOG) return InvalidBuffer; /* OK to extend the file */ /* we do this in recovery only - no rel-extension lock needed */ Assert(InRecovery); buffer = InvalidBuffer; do { if (buffer != InvalidBuffer) { if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } buffer = ReadBufferWithoutRelcache(rnode, forknum, P_NEW, mode, NULL); } while (BufferGetBlockNumber(buffer) < blkno); /* Handle the corner case that P_NEW returns non-consecutive pages */ if (BufferGetBlockNumber(buffer) != blkno) { if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); } } if (mode == RBM_NORMAL) { /* check that page has been initialized */ Page page = (Page) BufferGetPage(buffer); /* * We assume that PageIsNew is safe without a lock. During recovery, * there should be no other backends that could modify the buffer at * the same time. */ if (PageIsNew(page)) { ReleaseBuffer(buffer); log_invalid_page(rnode, forknum, blkno, true); return InvalidBuffer; } } return buffer; }
/* * xl_read_buf_ext * Read a page during XLOG replay * * This is functionally comparable to ReadBufferExtended. There's some * differences in the behavior wrt. the "mode" argument: * * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we * return INVALID_BUF. In this case the caller should silently skip the * update on this page. (In this situation, we expect that the page was later * dropped or truncated. If we don't see evidence of that later in the WAL * sequence, we'll complain at the end of WAL replay.) * * In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the * relation is extended with all-zeroes pages up to the given block number. */ buf_id_t xl_read_buf_ext(struct fnode rnode, enum fork forknum, block_t blkno, enum readbuf_mode mode) { block_t lastblock; buf_id_t buffer; struct smgr * smgr; ASSERT(blkno != P_NEW); /* Open the relation at smgr level */ smgr = smgr_open(rnode, INVALID_BKNID); /* * Create the target file if it doesn't already exist. This lets us * cope if the replay sequence contains writes to a relation that is * later deleted. * The original coding of this routine would instead suppress * the writes, but that seems like it risks losing valuable data if the * filesystem loses an inode during a crash. Better to write the data * until we are actually told to delete the file. */ smgr_create(smgr, forknum, true); lastblock = smgr_nr_blk(smgr, forknum); if (blkno < lastblock) { /* page exists in file */ buffer = read_buf_no_cache(rnode, forknum, blkno, mode, NULL); } else { /* hm, page doesn't exist in file */ if (mode == RBM_NORMAL) { log_invalid_page(rnode, forknum, blkno, false); return INVALID_BUF; } /* OK to extend the file */ /* we do this in recovery only - no rel-extension lock needed */ ASSERT(in_recovery); buffer = INVALID_BUF; while (blkno >= lastblock) { if (buffer != INVALID_BUF) release_buf(buffer); buffer = read_buf_no_cache(rnode, forknum, P_NEW, mode, NULL); lastblock++; } ASSERT(buf_block_nr(buffer) == blkno); } if (mode == RBM_NORMAL) { /* check that page has been initialized */ page_p page = (page_p) BUF_PAGE(buffer); /* * We assume that PAGE_NEW is safe without a lock. During * recovery, there should be no other backends that could modify * the buffer at the same time. */ if (PAGE_NEW(page)) { release_buf(buffer); log_invalid_page(rnode, forknum, blkno, true); return INVALID_BUF; } } return buffer; }