Пример #1
0
/*
 * Set checksum for a page in private memory.
 *
 * This must only be used when we know that no other process can be modifying
 * the page buffer.
 */
void
PageSetChecksumInplace(Page page, BlockNumber blkno)
{
	/* If we don't need a checksum, just return */
	if (PageIsNew(page) || !DataChecksumsEnabled())
		return;

	((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
}
Пример #2
0
/*
 * Set checksum for a page in shared buffers.
 *
 * If checksums are disabled, or if the page is not initialized, just return
 * the input.  Otherwise, we must make a copy of the page before calculating
 * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
 * from making the final checksum invalid.  It doesn't matter if we include or
 * exclude hints during the copy, as long as we write a valid page and
 * associated checksum.
 *
 * Returns a pointer to the block-sized data that needs to be written. Uses
 * statically-allocated memory, so the caller must immediately write the
 * returned page and not refer to it again.
 */
char *
PageSetChecksumCopy(Page page, BlockNumber blkno)
{
	static char *pageCopy = NULL;

	/* If we don't need a checksum, just return the passed-in data */
	if (PageIsNew(page) || !DataChecksumsEnabled())
		return (char *) page;

	/*
	 * We allocate the copy space once and use it over on each subsequent
	 * call.  The point of palloc'ing here, rather than having a static char
	 * array, is first to ensure adequate alignment for the checksumming code
	 * and second to avoid wasting space in processes that never call this.
	 */
	if (pageCopy == NULL)
		pageCopy = MemoryContextAlloc(TopMemoryContext, BLCKSZ);

	memcpy(pageCopy, (char *) page, BLCKSZ);
	((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
	return pageCopy;
}
Пример #3
0
/*
 * PageIsVerified
 *		Check that the page header and checksum (if any) appear valid.
 *
 * This is called when a page has just been read in from disk.  The idea is
 * to cheaply detect trashed pages before we go nuts following bogus item
 * pointers, testing invalid transaction identifiers, etc.
 *
 * It turns out to be necessary to allow zeroed pages here too.  Even though
 * this routine is *not* called when deliberately adding a page to a relation,
 * there are scenarios in which a zeroed page might be found in a table.
 * (Example: a backend extends a relation, then crashes before it can write
 * any WAL entry about the new page.  The kernel will already have the
 * zeroed page in the file, and it will stay that way after restart.)  So we
 * allow zeroed pages here, and are careful that the page access macros
 * treat such a page as empty and without free space.  Eventually, VACUUM
 * will clean up such a page and make it usable.
 */
bool
PageIsVerified(Page page, BlockNumber blkno)
{
	PageHeader	p = (PageHeader) page;
	size_t	   *pagebytes;
	int			i;
	bool		checksum_failure = false;
	bool		header_sane = false;
	bool		all_zeroes = false;
	uint16		checksum = 0;

	/*
	 * Don't verify page data unless the page passes basic non-zero test
	 */
	if (!PageIsNew(page))
	{
		if (DataChecksumsEnabled())
		{
			checksum = pg_checksum_page((char *) page, blkno);

			if (checksum != p->pd_checksum)
				checksum_failure = true;
		}

		/*
		 * The following checks don't prove the header is correct, only that
		 * it looks sane enough to allow into the buffer pool. Later usage of
		 * the block can still reveal problems, which is why we offer the
		 * checksum option.
		 */
		if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
			p->pd_lower <= p->pd_upper &&
			p->pd_upper <= p->pd_special &&
			p->pd_special <= BLCKSZ &&
			p->pd_special == MAXALIGN(p->pd_special))
			header_sane = true;

		if (header_sane && !checksum_failure)
			return true;
	}

	/*
	 * Check all-zeroes case. Luckily BLCKSZ is guaranteed to always be a
	 * multiple of size_t - and it's much faster to compare memory using the
	 * native word size.
	 */
	StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t),
					 "BLCKSZ has to be a multiple of sizeof(size_t)");

	all_zeroes = true;
	pagebytes = (size_t *) page;
	for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
	{
		if (pagebytes[i] != 0)
		{
			all_zeroes = false;
			break;
		}
	}

	if (all_zeroes)
		return true;

	/*
	 * Throw a WARNING if the checksum fails, but only after we've checked for
	 * the all-zeroes case.
	 */
	if (checksum_failure)
	{
		ereport(WARNING,
				(ERRCODE_DATA_CORRUPTED,
				 errmsg("page verification failed, calculated checksum %u but expected %u",
						checksum, p->pd_checksum)));

		if (header_sane && ignore_checksum_failure)
			return true;
	}

	return false;
}
Пример #4
0
/*
 *	visibilitymap_set - set a bit on a previously pinned page
 *
 * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
 * or InvalidXLogRecPtr in normal running.	The page LSN is advanced to the
 * one provided; in normal running, we generate a new XLOG record and set the
 * page LSN to that value.	cutoff_xid is the largest xmin on the page being
 * marked all-visible; it is needed for Hot Standby, and can be
 * InvalidTransactionId if the page contains no tuples.
 *
 * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
 * this function. Except in recovery, caller should also pass the heap
 * buffer. When checksums are enabled and we're not in recovery, we must add
 * the heap buffer to the WAL chain to protect it from being torn.
 *
 * You must pass a buffer containing the correct map page to this function.
 * Call visibilitymap_pin first to pin the right one. This function doesn't do
 * any I/O.
 */
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
				  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid)
{
	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
	uint8		mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
	Page		page;
	char	   *map;

#ifdef TRACE_VISIBILITYMAP
	elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
#endif

	Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
	Assert(InRecovery || BufferIsValid(heapBuf));

	/* Check that we have the right heap page pinned, if present */
	if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
		elog(ERROR, "wrong heap buffer passed to visibilitymap_set");

	/* Check that we have the right VM page pinned */
	if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
		elog(ERROR, "wrong VM buffer passed to visibilitymap_set");

	page = BufferGetPage(vmBuf);
	map = PageGetContents(page);
	LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);

	if (!(map[mapByte] & (1 << mapBit)))
	{
		START_CRIT_SECTION();

		map[mapByte] |= (1 << mapBit);
		MarkBufferDirty(vmBuf);

		if (RelationNeedsWAL(rel))
		{
			if (XLogRecPtrIsInvalid(recptr))
			{
				Assert(!InRecovery);
				recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
										  cutoff_xid);

				/*
				 * If data checksums are enabled, we need to protect the heap
				 * page from being torn.
				 */
				if (DataChecksumsEnabled())
				{
					Page heapPage = BufferGetPage(heapBuf);

					/* caller is expected to set PD_ALL_VISIBLE first */
					Assert(PageIsAllVisible(heapPage));
					PageSetLSN(heapPage, recptr);
				}
			}
			PageSetLSN(page, recptr);
		}

		END_CRIT_SECTION();
	}

	LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK);
}
Пример #5
0
/**
 * @brief Write block buffer contents.	Number of block buffer to be
 * written is specified by num argument.
 *
 * Flow:
 * <ol>
 *	 <li>If no more space is available in the data file, switch to a new one.</li>
 *	 <li>Compute block number which can be written to the current file.</li>
 *	 <li>Save the last block number in the load status file.</li>
 *	 <li>Write to the current file.</li>
 *	 <li>If there are other data, write them too.</li>
 * </ol>
 *
 * @param loader [in] Direct Writer.
 * @return File descriptor for the current data file.
 */
static void
flush_pages(DirectWriter *loader)
{
	int			i;
	int			num;
	LoadStatus *ls = &loader->ls;

	num = loader->curblk;
	if (!PageIsEmpty(GetCurrentPage(loader)))
		num += 1;

	if (num <= 0)
		return;		/* no work */

	/*
	 * Add WAL entry (only the first page) to ensure the current xid will
	 * be recorded in xlog. We must flush some xlog records with XLogFlush()
	 * before write any data blocks to follow the WAL protocol.
	 *
	 * If postgres process, such as loader and COPY, is killed by "kill -9",
	 * database will be rewound to the last checkpoint and recovery will
	 * be performed using WAL.
	 *
	 * After the recovery, if there are xid's which have not been recorded
	 * to WAL, such xid's will be reused.
	 *
	 * However, in the loader and COPY, data file is actually updated and
	 * xid must not be reused.
	 *
	 * WAL entry with such xid can be added using XLogInsert().  However,
	 * such entries are not really written to the disk immediately.
	 * WAL entries are flushed to the disk by XLogFlush(), typically
	 * when a transaction is commited.	COPY prevents xid reuse by
	 * this method.
	 */
#if PG_VERSION_NUM >= 90100
	if (ls->ls.create_cnt == 0 && !RELATION_IS_LOCAL(loader->base.rel)
			&& !(loader->base.rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) )
	{
		XLogRecPtr	recptr;

		recptr = log_newpage(&ls->ls.rnode, MAIN_FORKNUM,
			ls->ls.exist_cnt, loader->blocks);
		XLogFlush(recptr);
	}
#else
	if (ls->ls.create_cnt == 0 && !RELATION_IS_LOCAL(loader->base.rel) )
	{
		XLogRecPtr	recptr;

		recptr = log_newpage(&ls->ls.rnode, MAIN_FORKNUM,
			ls->ls.exist_cnt, loader->blocks);
		XLogFlush(recptr);
	}
#endif
	/*
	 * Write blocks. We might need to write multiple files on boundary of
	 * relation segments.
	 */
	for (i = 0; i < num;)
	{
		char	   *buffer;
		int			total;
		int			written;
		int			flush_num;
		BlockNumber	relblks = LS_TOTAL_CNT(ls);

		/* Switch to the next file if the current file has been filled up. */
		if (relblks % RELSEG_SIZE == 0)
			close_data_file(loader);
		if (loader->datafd == -1)
			loader->datafd = open_data_file(ls->ls.rnode,
											RELATION_IS_LOCAL(loader->base.rel),
											relblks);

		/* Number of blocks to be added to the current file. */
		flush_num = Min(num - i, RELSEG_SIZE - relblks % RELSEG_SIZE);
		Assert(flush_num > 0);

		/* Write the last block number to the load status file. */
		UpdateLSF(loader, flush_num);

#if PG_VERSION_NUM >= 90300
		/* If we need a checksum, add it */
	        if (DataChecksumsEnabled()){
        		int j = 0;
			Page contained_page;
	        	for (  j=0; j<flush_num; j++ ) {
                		contained_page = GetTargetPage(loader,j);
		                ((PageHeader) contained_page)->pd_checksum = 
					pg_checksum_page((char *) contained_page, LS_TOTAL_CNT(ls) - 1 - j);
        		}
		}	
#endif

		/*
		 * Flush flush_num data block to the current file.
		 * Then the current file size becomes RELSEG_SIZE self->blocks.
		 */
		buffer = loader->blocks + BLCKSZ * i;
		total = BLCKSZ * flush_num;
		written = 0;
		while (total > 0)
		{
			int	len = write(loader->datafd, buffer + written, total);
			if (len == -1)
			{
				/* fatal error, do not want to write blocks anymore */
				ereport(ERROR, (errcode_for_file_access(),
								errmsg("could not write to data file: %m")));
			}
			written += len;
			total -= len;
		}

		i += flush_num;
	}

	/*
	 * NOTICE: Be sure reset curblk to 0 and reinitialize recycled page
	 * if you will continue to use blocks.
	 */
}