Exemplo n.º 1
0
static void
spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
	RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
	Buffer		buffer;
	Page		page;

	buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
	Assert(BufferIsValid(buffer));
	page = (Page) BufferGetPage(buffer);
	SpGistInitMetapage(page);
	PageSetLSN(page, lsn);
	PageSetTLI(page, ThisTimeLineID);
	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);

	buffer = XLogReadBuffer(*node, SPGIST_HEAD_BLKNO, true);
	Assert(BufferIsValid(buffer));
	SpGistInitBuffer(buffer, SPGIST_LEAF);
	page = (Page) BufferGetPage(buffer);
	PageSetLSN(page, lsn);
	PageSetTLI(page, ThisTimeLineID);
	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);
}
Exemplo n.º 2
0
/*
 * _bitmap_log_bitmap_lastwords() -- log the last two words in a bitmap.
 */
void
_bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer, 
							 OffsetNumber lovOffset, BMLOVItem lovItem)
{
	xl_bm_bitmap_lastwords	xlLastwords;
	XLogRecPtr				recptr;
	XLogRecData				rdata[1];

	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	RelationFetchGpRelationNodeForXLog(rel);

	xlLastwords.bm_node = rel->rd_node;
	xlLastwords.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid;
	xlLastwords.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum;
	xlLastwords.bm_last_compword = lovItem->bm_last_compword;
	xlLastwords.bm_last_word = lovItem->bm_last_word;
	xlLastwords.lov_words_header = lovItem->lov_words_header;
	xlLastwords.bm_last_setbit = lovItem->bm_last_setbit;
	xlLastwords.bm_last_tid_location = lovItem->bm_last_tid_location;
	xlLastwords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
	xlLastwords.bm_lov_offset = lovOffset;

	rdata[0].buffer = InvalidBuffer;
	rdata[0].data = (char*)&xlLastwords;
	rdata[0].len = sizeof(xl_bm_bitmap_lastwords);
	rdata[0].next = NULL;

	recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP_LASTWORDS, 
						rdata);

	PageSetLSN(BufferGetPage(lovBuffer), recptr);
	PageSetTLI(BufferGetPage(lovBuffer), ThisTimeLineID);
}
Exemplo n.º 3
0
/*
 * _bitmap_log_metapage() -- log the changes to the metapage
 */
void
_bitmap_log_metapage(Relation rel, BMMetaPage metapage)
{
	/* XLOG stuff */
	START_CRIT_SECTION();

	if (!(rel->rd_istemp))
	{
		xl_bm_metapage*		xlMeta;
		XLogRecPtr			recptr;
		XLogRecData			rdata[1];

#ifdef BM_DEBUG
		elog(LOG, "call _bitmap_log_metapage.");
#endif

		xlMeta = (xl_bm_metapage*)
			palloc(MAXALIGN(sizeof(xl_bm_metapage)));
		xlMeta->bm_node = rel->rd_node;

		rdata[0].buffer = InvalidBuffer;
		rdata[0].data = (char*)xlMeta;
		rdata[0].len = MAXALIGN(sizeof(xl_bm_metapage));
		rdata[0].next = NULL;
			
		recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_META, rdata);

		PageSetLSN(metapage, recptr);
		PageSetTLI(metapage, ThisTimeLineID);
		pfree(xlMeta);
	}

	END_CRIT_SECTION();
}
Exemplo n.º 4
0
/*
 * Replay the clearing of F_FOLLOW_RIGHT flag.
 */
static void
gistRedoClearFollowRight(RelFileNode node, XLogRecPtr lsn,
						 BlockNumber leftblkno)
{
	Buffer		buffer;

	buffer = XLogReadBuffer(node, leftblkno, false);
	if (BufferIsValid(buffer))
	{
		Page		page = (Page) BufferGetPage(buffer);

		/*
		 * Note that we still update the page even if page LSN is equal to the
		 * LSN of this record, because the updated NSN is not included in the
		 * full page image.
		 */
		if (!XLByteLT(lsn, PageGetLSN(page)))
		{
			GistPageGetOpaque(page)->nsn = lsn;
			GistClearFollowRight(page);

			PageSetLSN(page, lsn);
			PageSetTLI(page, ThisTimeLineID);
			MarkBufferDirty(buffer);
		}
		UnlockReleaseBuffer(buffer);
	}
}
Exemplo n.º 5
0
/*
 * Replay the clearing of F_FOLLOW_RIGHT flag on a child page.
 *
 * Even if the WAL record includes a full-page image, we have to update the
 * follow-right flag, because that change is not included in the full-page
 * image.  To be sure that the intermediate state with the wrong flag value is
 * not visible to concurrent Hot Standby queries, this function handles
 * restoring the full-page image as well as updating the flag.  (Note that
 * we never need to do anything else to the child page in the current WAL
 * action.)
 */
static void
gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
						 RelFileNode node, BlockNumber childblkno)
{
	Buffer		buffer;
	Page		page;

	if (record->xl_info & XLR_BKP_BLOCK(block_index))
		buffer = RestoreBackupBlock(lsn, record, block_index, false, true);
	else
	{
		buffer = XLogReadBuffer(node, childblkno, false);
		if (!BufferIsValid(buffer))
			return;				/* page was deleted, nothing to do */
	}
	page = (Page) BufferGetPage(buffer);

	/*
	 * Note that we still update the page even if page LSN is equal to the LSN
	 * of this record, because the updated NSN is not included in the full
	 * page image.
	 */
	if (lsn >= PageGetLSN(page))
	{
		GistPageSetNSN(page, lsn);
		GistClearFollowRight(page);

		PageSetLSN(page, lsn);
		PageSetTLI(page, ThisTimeLineID);
		MarkBufferDirty(buffer);
	}
	UnlockReleaseBuffer(buffer);
}
Exemplo n.º 6
0
/*
 *	visibilitymap_set - set a bit on a previously pinned page
 *
 * recptr is the LSN of the heap page. The LSN of the visibility map page is
 * advanced to that, to make sure that the visibility map doesn't get flushed
 * to disk before the update to the heap page that made all tuples visible.
 *
 * This is an opportunistic function. It does nothing, unless *buf
 * contains the bit for heapBlk. Call visibilitymap_pin first to pin
 * the right map page. This function doesn't do any I/O.
 */
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
				  Buffer *buf)
{
	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
	uint8		mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
	Page		page;
	char	   *map;

#ifdef TRACE_VISIBILITYMAP
	elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
#endif

	/* Check that we have the right page pinned */
	if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != mapBlock)
		return;

	page = BufferGetPage(*buf);
	map = PageGetContents(page);
	LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);

	if (!(map[mapByte] & (1 << mapBit)))
	{
		map[mapByte] |= (1 << mapBit);

		if (XLByteLT(PageGetLSN(page), recptr))
			PageSetLSN(page, recptr);
		PageSetTLI(page, ThisTimeLineID);
		MarkBufferDirty(*buf);
	}

	LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
}
Exemplo n.º 7
0
static void
spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
{
	char	   *ptr = XLogRecGetData(record);
	spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr;
	OffsetNumber *toDelete;
	Buffer		buffer;
	Page		page;

	ptr += sizeof(spgxlogVacuumRoot);
	toDelete = (OffsetNumber *) ptr;

	if (!(record->xl_info & XLR_BKP_BLOCK_1))
	{
		buffer = XLogReadBuffer(xldata->node, SPGIST_HEAD_BLKNO, false);
		if (BufferIsValid(buffer))
		{
			page = BufferGetPage(buffer);
			if (!XLByteLE(lsn, PageGetLSN(page)))
			{
				/* The tuple numbers are in order */
				PageIndexMultiDelete(page, toDelete, xldata->nDelete);

				PageSetLSN(page, lsn);
				PageSetTLI(page, ThisTimeLineID);
				MarkBufferDirty(buffer);
			}
			UnlockReleaseBuffer(buffer);
		}
	}
}
Exemplo n.º 8
0
/*
 * buffer must be pinned and locked by caller
 */
void
gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer key)
{
	Page		page;

	Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
	page = BufferGetPage(buffer);

	START_CRIT_SECTION();

	GISTInitBuffer(buffer, 0);
	gistfillbuffer(r, page, itup, len, FirstOffsetNumber);

	MarkBufferDirty(buffer);

	if (!r->rd_istemp)
	{
		XLogRecPtr	recptr;
		XLogRecData *rdata;

		rdata = formUpdateRdata(r, buffer,
								NULL, 0,
								itup, len, key);

		recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_NEW_ROOT, rdata);
		PageSetLSN(page, recptr);
		PageSetTLI(page, ThisTimeLineID);
	}
	else
		PageSetLSN(page, XLogRecPtrForTemp);

	END_CRIT_SECTION();
}
Exemplo n.º 9
0
/*
 * _bitmap_log_newpage() -- log a new page.
 *
 * This function is called before writing a new buffer.
 */
void
_bitmap_log_newpage(Relation rel, uint8 info, Buffer buf)
{
	Page page;

	xl_bm_newpage		xlNewPage;
	XLogRecPtr			recptr;
	XLogRecData			rdata[1];

	page = BufferGetPage(buf);

	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	RelationFetchGpRelationNodeForXLog(rel);

	xlNewPage.bm_node = rel->rd_node;
	xlNewPage.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid;
	xlNewPage.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum;
	xlNewPage.bm_new_blkno = BufferGetBlockNumber(buf);

	elog(DEBUG1, "_bitmap_log_newpage: blkno=%d", xlNewPage.bm_new_blkno);

	rdata[0].buffer = InvalidBuffer;
	rdata[0].data = (char *)&xlNewPage;
	rdata[0].len = sizeof(xl_bm_newpage);
	rdata[0].next = NULL;
			
	recptr = XLogInsert(RM_BITMAP_ID, info, rdata);

	PageSetLSN(page, recptr);
	PageSetTLI(page, ThisTimeLineID);
}
Exemplo n.º 10
0
/*
 * _bitmap_log_metapage() -- log the changes to the metapage
 */
void
_bitmap_log_metapage(Relation rel, Page page)
{
	BMMetaPage metapage = (BMMetaPage) PageGetContents(page);

	xl_bm_metapage*		xlMeta;
	XLogRecPtr			recptr;
	XLogRecData			rdata[1];

	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	RelationFetchGpRelationNodeForXLog(rel);

	xlMeta = (xl_bm_metapage *)
		palloc(MAXALIGN(sizeof(xl_bm_metapage)));
	xlMeta->bm_node = rel->rd_node;
	xlMeta->bm_persistentTid = rel->rd_relationnodeinfo.persistentTid;
	xlMeta->bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum;
	xlMeta->bm_lov_heapId = metapage->bm_lov_heapId;
	xlMeta->bm_lov_indexId = metapage->bm_lov_indexId;
	xlMeta->bm_lov_lastpage = metapage->bm_lov_lastpage;

	rdata[0].buffer = InvalidBuffer;
	rdata[0].data = (char*)xlMeta;
	rdata[0].len = MAXALIGN(sizeof(xl_bm_metapage));
	rdata[0].next = NULL;
			
	recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_META, rdata);

	PageSetLSN(page, recptr);
	PageSetTLI(page, ThisTimeLineID);
	pfree(xlMeta);
}
Exemplo n.º 11
0
/*
 * _bt_lognewpage() -- create an XLOG entry for a new page of the btree.
 */
void
_bt_lognewpage(Relation index,
			   Page newPage,
			   BlockNumber blockNo)
{
	/* We use the heap NEWPAGE record type for this */
	xl_heap_newpage xlrec;
	XLogRecPtr	recptr;
	XLogRecData rdata[2];
	
	/* NO ELOG(ERROR) from here till newpage op is logged */
	START_CRIT_SECTION();
	
	xl_heapnode_set(&xlrec.heapnode, index);
	xlrec.blkno = blockNo;
	
	rdata[0].data = (char *) &xlrec;
	rdata[0].len = SizeOfHeapNewpage;
	rdata[0].buffer = InvalidBuffer;
	rdata[0].next = &(rdata[1]);
	
	rdata[1].data = (char *) newPage;
	rdata[1].len = BLCKSZ;
	rdata[1].buffer = InvalidBuffer;
	rdata[1].next = NULL;
	
	recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
	
	PageSetLSN(newPage, recptr);
	PageSetTLI(newPage, ThisTimeLineID);
	
	END_CRIT_SECTION();
}
Exemplo n.º 12
0
/*
 * _bitmap_log_lovitem() -- log adding a new lov item to a lov page.
 */
void
_bitmap_log_lovitem(Relation rel, Buffer lovBuffer, OffsetNumber offset,
					BMLOVItem lovItem, Buffer metabuf, bool is_new_lov_blkno)
{
	Page lovPage = BufferGetPage(lovBuffer);

	xl_bm_lovitem	xlLovItem;
	XLogRecPtr		recptr;
	XLogRecData		rdata[1];

	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	RelationFetchGpRelationNodeForXLog(rel);

	Assert(BufferGetBlockNumber(lovBuffer) > 0);

	xlLovItem.bm_node = rel->rd_node;
	xlLovItem.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid;
	xlLovItem.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum;
	xlLovItem.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
	xlLovItem.bm_lov_offset = offset;
	memcpy(&(xlLovItem.bm_lovItem), lovItem, sizeof(BMLOVItemData));
	xlLovItem.bm_is_new_lov_blkno = is_new_lov_blkno;

	rdata[0].buffer = InvalidBuffer;
	rdata[0].data = (char*)&xlLovItem;
	rdata[0].len = sizeof(xl_bm_lovitem);
	rdata[0].next = NULL;

	recptr = XLogInsert(RM_BITMAP_ID, 
						XLOG_BITMAP_INSERT_LOVITEM, rdata);

	if (is_new_lov_blkno)
	{
		Page metapage = BufferGetPage(metabuf);

		PageSetLSN(metapage, recptr);
		PageSetTLI(metapage, ThisTimeLineID);
	}

	PageSetLSN(lovPage, recptr);
	PageSetTLI(lovPage, ThisTimeLineID);

	elog(DEBUG1, "Insert a new lovItem at (blockno, offset): (%d,%d)",
		 BufferGetBlockNumber(lovBuffer), offset);
}
Exemplo n.º 13
0
/*
 * emit a completed btree page, and release the working storage.
 */
static void
_bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
{
	/* Ensure rd_smgr is open (could have been closed by relcache flush!) */
	RelationOpenSmgr(wstate->index);

	/* XLOG stuff */
	if (wstate->btws_use_wal)
	{
		/* We use the heap NEWPAGE record type for this */
		log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page);
	}
	else
	{
		/* Leave the page LSN zero if not WAL-logged, but set TLI anyway */
		PageSetTLI(page, ThisTimeLineID);
	}

	/*
	 * If we have to write pages nonsequentially, fill in the space with
	 * zeroes until we come back and overwrite.  This is not logically
	 * necessary on standard Unix filesystems (unwritten space will read as
	 * zeroes anyway), but it should help to avoid fragmentation. The dummy
	 * pages aren't WAL-logged though.
	 */
	while (blkno > wstate->btws_pages_written)
	{
		if (!wstate->btws_zeropage)
			wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
		smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM,
				   wstate->btws_pages_written++,
				   (char *) wstate->btws_zeropage,
				   true);
	}

	/*
	 * Now write the page.	We say isTemp = true even if it's not a temp
	 * index, because there's no need for smgr to schedule an fsync for this
	 * write; we'll do it ourselves before ending the build.
	 */
	if (blkno == wstate->btws_pages_written)
	{
		/* extending the file... */
		smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
				   (char *) page, true);
		wstate->btws_pages_written++;
	}
	else
	{
		/* overwriting a block we zero-filled before */
		smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
				  (char *) page, true);
	}

	pfree(page);
}
Exemplo n.º 14
0
/*
 * Delete item(s) from a btree page.
 *
 * This must only be used for deleting leaf items.	Deleting an item on a
 * non-leaf page has to be done as part of an atomic action that includes
 * deleting the page it points to.
 *
 * This routine assumes that the caller has pinned and locked the buffer,
 * and will write the buffer afterwards.  Also, the given itemnos *must*
 * appear in increasing order in the array.
 */
void
_bt_delitems(Relation rel, Buffer buf,
			 OffsetNumber *itemnos, int nitems)
{
	Page		page = BufferGetPage(buf);

	/* No ereport(ERROR) until changes are logged */
	START_CRIT_SECTION();

	/* Fix the page */
	PageIndexMultiDelete(page, itemnos, nitems);

	/* XLOG stuff */
	if (!rel->rd_istemp)
	{
		xl_btree_delete xlrec;
		XLogRecPtr	recptr;
		XLogRecData rdata[2];

		xlrec.node = rel->rd_node;
		xlrec.block = BufferGetBlockNumber(buf);

		rdata[0].data = (char *) &xlrec;
		rdata[0].len = SizeOfBtreeDelete;
		rdata[0].buffer = InvalidBuffer;
		rdata[0].next = &(rdata[1]);

		/*
		 * The target-offsets array is not in the buffer, but pretend that it
		 * is.	When XLogInsert stores the whole buffer, the offsets array
		 * need not be stored too.
		 */
		if (nitems > 0)
		{
			rdata[1].data = (char *) itemnos;
			rdata[1].len = nitems * sizeof(OffsetNumber);
		}
		else
		{
			rdata[1].data = NULL;
			rdata[1].len = 0;
		}
		rdata[1].buffer = buf;
		rdata[1].buffer_std = true;
		rdata[1].next = NULL;

		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);

		PageSetLSN(page, recptr);
		PageSetTLI(page, ThisTimeLineID);
	}

	END_CRIT_SECTION();
}
Exemplo n.º 15
0
/*
 * _bitmap_log_lovmetapage() -- log the lov meta page.
 */
void
_bitmap_log_lovmetapage(Relation rel, Buffer lovMetaBuffer, uint8 numOfAttrs)
{
	Page			lovMetapage;
	BMLOVMetaItem	metaItems;

	lovMetapage = BufferGetPage(lovMetaBuffer);
	metaItems = (BMLOVMetaItem)PageGetContents(lovMetapage);

	/* XLOG stuff */
	START_CRIT_SECTION();

	if (!(rel->rd_istemp))
	{
		BMLOVMetaItem	copyMetaItems;
		XLogRecPtr		recptr;
		XLogRecData		rdata[1];


		xl_bm_lovmetapage* xlLovMeta;

#ifdef BM_DEBUG
		elog(LOG, "call _bitmap_log_lovmetapage: numOfAttrs=%d", numOfAttrs);
#endif

		xlLovMeta = (xl_bm_lovmetapage*)
			palloc(sizeof(xl_bm_lovmetapage)+
					numOfAttrs*sizeof(BMLOVMetaItemData));

		xlLovMeta->bm_node = rel->rd_node;
		xlLovMeta->bm_num_of_attrs = numOfAttrs;

		copyMetaItems = (BMLOVMetaItem)
			(((char*)xlLovMeta) + sizeof(xl_bm_lovmetapage)); 
		memcpy(copyMetaItems, metaItems, numOfAttrs*sizeof(BMLOVMetaItemData));

		rdata[0].buffer = InvalidBuffer;
		rdata[0].data = (char*)xlLovMeta;
		rdata[0].len = 
			sizeof(xl_bm_lovmetapage) + numOfAttrs*sizeof(BMLOVMetaItemData);
		rdata[0].next = NULL;

		recptr = XLogInsert(RM_BITMAP_ID, 
							XLOG_BITMAP_INSERT_LOVMETA, rdata);

		PageSetLSN(lovMetapage, recptr);
		PageSetTLI(lovMetapage, ThisTimeLineID);
		pfree(xlLovMeta);
	}

	END_CRIT_SECTION();
}
Exemplo n.º 16
0
/*
 * Creates posting tree with one page. Function
 * suppose that items[] fits to page
 */
static BlockNumber
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
{
	BlockNumber blkno;
	Buffer		buffer = GinNewBuffer(index);
	Page		page;

	START_CRIT_SECTION();

	GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
	page = BufferGetPage(buffer);
	blkno = BufferGetBlockNumber(buffer);

	memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
	GinPageGetOpaque(page)->maxoff = nitems;

	MarkBufferDirty(buffer);

	if (!index->rd_istemp)
	{
		XLogRecPtr	recptr;
		XLogRecData rdata[2];
		ginxlogCreatePostingTree data;

		data.node = index->rd_node;
		data.blkno = blkno;
		data.nitem = nitems;

		rdata[0].buffer = InvalidBuffer;
		rdata[0].data = (char *) &data;
		rdata[0].len = sizeof(ginxlogCreatePostingTree);
		rdata[0].next = &rdata[1];

		rdata[1].buffer = InvalidBuffer;
		rdata[1].data = (char *) items;
		rdata[1].len = sizeof(ItemPointerData) * nitems;
		rdata[1].next = NULL;



		recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
		PageSetLSN(page, recptr);
		PageSetTLI(page, ThisTimeLineID);

	}

	UnlockReleaseBuffer(buffer);

	END_CRIT_SECTION();

	return blkno;
}
Exemplo n.º 17
0
/*
 *	_bt_metapinit() -- Initialize the metadata page of a new btree.
 *
 * Note: this is actually not used for standard btree index building;
 * nbtsort.c prefers not to make the metadata page valid until completion
 * of build.
 *
 * Note: there's no real need for any locking here.  Since the transaction
 * creating the index hasn't committed yet, no one else can even see the index
 * much less be trying to use it.  (In a REINDEX-in-place scenario, that's
 * not true, but we assume the caller holds sufficient locks on the index.)
 */
void
_bt_metapinit(Relation rel)
{
	Buffer		buf;
	Page		pg;
	BTMetaPageData *metad;

	if (RelationGetNumberOfBlocks(rel) != 0)
		elog(ERROR, "cannot initialize non-empty btree index \"%s\"",
			 RelationGetRelationName(rel));

	buf = ReadBuffer(rel, P_NEW);
	Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE);
	pg = BufferGetPage(buf);

	_bt_initmetapage(pg, P_NONE, 0);
	metad = BTPageGetMeta(pg);

	/* NO ELOG(ERROR) from here till newmeta op is logged */
	START_CRIT_SECTION();

	/* XLOG stuff */
	if (!rel->rd_istemp)
	{
		xl_btree_newmeta xlrec;
		XLogRecPtr	recptr;
		XLogRecData rdata[1];

		xlrec.node = rel->rd_node;
		xlrec.meta.root = metad->btm_root;
		xlrec.meta.level = metad->btm_level;
		xlrec.meta.fastroot = metad->btm_fastroot;
		xlrec.meta.fastlevel = metad->btm_fastlevel;

		rdata[0].data = (char *) &xlrec;
		rdata[0].len = SizeOfBtreeNewmeta;
		rdata[0].buffer = InvalidBuffer;
		rdata[0].next = NULL;

		recptr = XLogInsert(RM_BTREE_ID,
							XLOG_BTREE_NEWMETA,
							rdata);

		PageSetLSN(pg, recptr);
		PageSetTLI(pg, ThisTimeLineID);
	}

	END_CRIT_SECTION();

	WriteBuffer(buf);
}
Exemplo n.º 18
0
static void
bitmap_xlog_insert_bitmap_lastwords(bool redo, XLogRecPtr lsn, XLogRecord* record)
{
	xl_bm_bitmap_lastwords	*xlrec = 
		(xl_bm_bitmap_lastwords*) XLogRecGetData(record);
	Relation reln;

	reln = XLogOpenRelation(xlrec->bm_node);
	if (!RelationIsValid(reln))
		return;

	if (redo)
	{
		Buffer		lovBuffer;
		Page		lovPage;
		BMLOVItem	lovItem;

#ifdef BM_DEBUG
		ereport(LOG, (errcode(LOG), 
			errmsg("call bitmap_xlog_insert_bitmap_lastwords: redo=%d\n", 
					redo)));
#endif

		lovBuffer = XLogReadBuffer(false, reln, xlrec->bm_lov_blkno);
		if (!BufferIsValid(lovBuffer))
			elog(PANIC, "bm_insert_redo: block unfound: %d",
				 xlrec->bm_lov_blkno);

		lovPage = BufferGetPage(lovBuffer);

		if (XLByteLT(PageGetLSN(lovPage), lsn))
		{
			lovItem = (BMLOVItem)
				PageGetItem(lovPage, PageGetItemId(lovPage, xlrec->bm_lov_offset));

			lovItem->bm_last_compword = xlrec->bm_last_compword;
			lovItem->bm_last_word = xlrec->bm_last_word;
			lovItem->bm_last_two_headerbits = xlrec->bm_last_two_headerbits;

			PageSetLSN(lovPage, lsn);
			PageSetTLI(lovPage, ThisTimeLineID);
			_bitmap_wrtbuf(lovBuffer);
		}

		else
			_bitmap_relbuf(lovBuffer);
	}

	else
		elog(PANIC, "bm_insert_undo: not implemented.");
}
Exemplo n.º 19
0
/*
 *	lazy_vacuum_page() -- free dead tuples on a page
 *					 and repair its fragmentation.
 *
 * Caller must hold pin and buffer cleanup lock on the buffer.
 *
 * tupindex is the index in vacrelstats->dead_tuples of the first dead
 * tuple for this page.  We assume the rest follow sequentially.
 * The return value is the first tupindex after the tuples of this page.
 */
static int
lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
				 int tupindex, LVRelStats *vacrelstats)
{
	OffsetNumber unused[MaxOffsetNumber];
	int			uncnt;
	Page		page = BufferGetPage(buffer);
	ItemId		itemid;

	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

	START_CRIT_SECTION();

	for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
	{
		BlockNumber tblk;
		OffsetNumber toff;

		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
		if (tblk != blkno)
			break;				/* past end of tuples for this block */
		toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
		itemid = PageGetItemId(page, toff);
		itemid->lp_flags &= ~LP_USED;
	}

	uncnt = PageRepairFragmentation(page, unused);

	MarkBufferDirty(buffer);

	/* XLOG stuff */
	if (!onerel->rd_istemp)
	{
		XLogRecPtr	recptr;

		recptr = log_heap_clean(onerel, buffer, unused, uncnt);
		PageSetLSN(page, recptr);
		PageSetTLI(page, ThisTimeLineID);
	}
	else
	{
		/* No XLOG record, but still need to flag that XID exists on disk */
		MyXactMadeTempRelUpdate = true;
	}

	END_CRIT_SECTION();

	return tupindex;
}
Exemplo n.º 20
0
/*
 *	lazy_vacuum_page() -- free dead tuples on a page
 *					 and repair its fragmentation.
 *
 * Caller must hold pin and buffer cleanup lock on the buffer.
 *
 * tupindex is the index in vacrelstats->dead_tuples of the first dead
 * tuple for this page.  We assume the rest follow sequentially.
 * The return value is the first tupindex after the tuples of this page.
 */
static int
lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
				 int tupindex, LVRelStats *vacrelstats)
{
	Page		page = BufferGetPage(buffer);
	OffsetNumber unused[MaxOffsetNumber];
	int			uncnt = 0;

	MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD;

	START_CRIT_SECTION();

	for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
	{
		BlockNumber tblk;
		OffsetNumber toff;
		ItemId		itemid;

		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
		if (tblk != blkno)
			break;				/* past end of tuples for this block */
		toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
		itemid = PageGetItemId(page, toff);
		ItemIdSetUnused(itemid);
		unused[uncnt++] = toff;
	}

	PageRepairFragmentation(page);

	MarkBufferDirty(buffer);

	/* XLOG stuff */
	if (!onerel->rd_istemp)
	{
		XLogRecPtr	recptr;

		recptr = log_heap_clean(onerel, buffer,
								NULL, 0, NULL, 0,
								unused, uncnt,
								false);
		PageSetLSN(page, recptr);
		PageSetTLI(page, ThisTimeLineID);
	}

	END_CRIT_SECTION();

	return tupindex;
}
Exemplo n.º 21
0
/*
 * Write the given statistics to the index's metapage
 *
 * Note: nPendingPages and ginVersion are *not* copied over
 */
void
ginUpdateStats(Relation index, const GinStatsData *stats)
{
	Buffer			metabuffer;
	Page			metapage;
	GinMetaPageData	*metadata;

	metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
	LockBuffer(metabuffer, GIN_EXCLUSIVE);
	metapage = BufferGetPage(metabuffer);
	metadata = GinPageGetMeta(metapage);

	START_CRIT_SECTION();

	metadata->nTotalPages = stats->nTotalPages;
	metadata->nEntryPages = stats->nEntryPages;
	metadata->nDataPages = stats->nDataPages;
	metadata->nEntries = stats->nEntries;

	MarkBufferDirty(metabuffer);

	if (RelationNeedsWAL(index))
	{
		XLogRecPtr			recptr;
		ginxlogUpdateMeta	data;
		XLogRecData			rdata;

		data.node = index->rd_node;
		data.ntuples = 0;
		data.newRightlink = data.prevTail = InvalidBlockNumber;
		memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));

		rdata.buffer = InvalidBuffer;
		rdata.data = (char *) &data;
		rdata.len = sizeof(ginxlogUpdateMeta);
		rdata.next = NULL;

		recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
		PageSetLSN(metapage, recptr);
		PageSetTLI(metapage, ThisTimeLineID);
	}

	UnlockReleaseBuffer(metabuffer);

	END_CRIT_SECTION();
}
Exemplo n.º 22
0
/*
 *	visibilitymap_set - set a bit on a previously pinned page
 *
 * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
 * or InvalidXLogRecPtr in normal running.  The page LSN is advanced to the
 * one provided; in normal running, we generate a new XLOG record and set the
 * page LSN to that value.  cutoff_xid is the largest xmin on the page being
 * marked all-visible; it is needed for Hot Standby, and can be
 * InvalidTransactionId if the page contains no tuples.
 *
 * You must pass a buffer containing the correct map page to this function.
 * Call visibilitymap_pin first to pin the right one. This function doesn't do
 * any I/O.
 */
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
				  Buffer buf, TransactionId cutoff_xid)
{
	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
	uint8		mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
	Page		page;
	char	   *map;

#ifdef TRACE_VISIBILITYMAP
	elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
#endif

	Assert(InRecovery || XLogRecPtrIsInvalid(recptr));

	/* Check that we have the right page pinned */
	if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock)
		elog(ERROR, "wrong buffer passed to visibilitymap_set");

	page = BufferGetPage(buf);
	map = PageGetContents(page);
	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);

	if (!(map[mapByte] & (1 << mapBit)))
	{
		START_CRIT_SECTION();

		map[mapByte] |= (1 << mapBit);
		MarkBufferDirty(buf);

		if (RelationNeedsWAL(rel))
		{
			if (XLogRecPtrIsInvalid(recptr))
				recptr = log_heap_visible(rel->rd_node, heapBlk, buf,
										  cutoff_xid);
			PageSetLSN(page, recptr);
			PageSetTLI(page, ThisTimeLineID);
		}

		END_CRIT_SECTION();
	}

	LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
Exemplo n.º 23
0
static void
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
	RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
	Buffer		buffer;
	Page		page;

	buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
	Assert(BufferIsValid(buffer));
	page = (Page) BufferGetPage(buffer);

	GISTInitBuffer(buffer, F_LEAF);

	PageSetLSN(page, lsn);
	PageSetTLI(page, ThisTimeLineID);

	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);
}
Exemplo n.º 24
0
static void
bitmap_xlog_insert_meta(bool redo, XLogRecPtr lsn, XLogRecord* record)
{
	xl_bm_metapage	*xlrec = (xl_bm_metapage*) XLogRecGetData(record);
	Relation		reln;

	reln = XLogOpenRelation(xlrec->bm_node);
	
	if (!RelationIsValid(reln))
		return;

	if (redo)
	{
		Buffer			metabuf;
		BMMetaPage			metapage;

#ifdef BM_DEBUG
		ereport(LOG, (errcode(LOG), 
			errmsg("call bitmap_xlog_insert_meta: redo=%d\n", redo)));
#endif

		metabuf = XLogReadBuffer(false, reln, BM_METAPAGE);
		if (!BufferIsValid(metabuf))
			elog(PANIC, "bm_insert_redo: block unfound: %d", BM_METAPAGE);

		/* restore the page */
		metapage = (BMMetaPage)BufferGetPage(metabuf);

		if (XLByteLT(PageGetLSN(metapage), lsn))
		{
			PageSetLSN(metapage, lsn);
			PageSetTLI(metapage, ThisTimeLineID);
			_bitmap_wrtbuf(metabuf);
		}

		else
			_bitmap_relbuf(metabuf);
	}

	else
		elog(PANIC, "bm_insert_undo: not implemented.");
}
Exemplo n.º 25
0
/*
 * _bitmap_log_lovitem() -- log adding a new lov item to a lov page.
 */
void
_bitmap_log_lovitem(Relation rel, Buffer lovBuffer, bool isNewItem,
					OffsetNumber offset, BMLOVItem lovItem)
{
	Page lovPage = BufferGetPage(lovBuffer);

	/* XLOG stuff */
	START_CRIT_SECTION();

	if (!(rel->rd_istemp))
	{
		xl_bm_lovitem	xlLovItem;
		XLogRecPtr		recptr;
		XLogRecData		rdata[1];

#ifdef BM_DEBUG
		elog(LOG, "call _bitmap_log_lovitem: blkno=%d, offset=%d, isNew=%d", 
			 BufferGetBlockNumber(lovBuffer), offset, isNewItem);
#endif

		xlLovItem.bm_node = rel->rd_node;
		xlLovItem.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
		xlLovItem.bm_isNewItem = isNewItem;
		xlLovItem.bm_lov_offset = offset;
		memcpy(&(xlLovItem.bm_lovItem), lovItem,
				sizeof(BMLOVItemData));

		rdata[0].buffer = InvalidBuffer;
		rdata[0].data = (char*)&xlLovItem;
		rdata[0].len = sizeof(xl_bm_lovitem);
		rdata[0].next = NULL;

		recptr = XLogInsert(RM_BITMAP_ID, 
							XLOG_BITMAP_INSERT_LOVITEM, rdata);

		PageSetLSN(lovPage, recptr);
		PageSetTLI(lovPage, ThisTimeLineID);
	}

	END_CRIT_SECTION();
}
Exemplo n.º 26
0
/*
 * _bitmap_log_bitmap_lastwords() -- log the last two words in a bitmap.
 */
void
_bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer, 
							 OffsetNumber lovOffset, BMLOVItem lovItem)
{
	/* XLOG stuff */
	START_CRIT_SECTION();

	if (!(rel->rd_istemp))
	{
		xl_bm_bitmap_lastwords	xlLastwords;
		XLogRecPtr				recptr;
		XLogRecData				rdata[1];

#ifdef BM_DEBUG
		elog(LOG, 
		"call _bitmap_log_bitmap_lastwords: lov_blkno=%d, last_compword=%x, last_word=%x", 
		BufferGetBlockNumber(lovBuffer), lovItem->bm_last_compword, 
		lovItem->bm_last_word);
#endif

		xlLastwords.bm_node = rel->rd_node;
		xlLastwords.bm_last_compword = lovItem->bm_last_compword;
		xlLastwords.bm_last_word = lovItem->bm_last_word;
		xlLastwords.bm_last_two_headerbits = lovItem->bm_last_two_headerbits;
		xlLastwords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
		xlLastwords.bm_lov_offset = lovOffset;

		rdata[0].buffer = InvalidBuffer;
		rdata[0].data = (char*)&xlLastwords;
		rdata[0].len = sizeof(xl_bm_bitmap_lastwords);
		rdata[0].next = NULL;

		recptr = 
			XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP_LASTWORDS, rdata);

		PageSetLSN(BufferGetPage(lovBuffer), recptr);
		PageSetTLI(BufferGetPage(lovBuffer), ThisTimeLineID);
	}

	END_CRIT_SECTION();
}
Exemplo n.º 27
0
static void
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
	RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
	Buffer		buffer;
	Page		page;

	/* Backup blocks are not used in create_index records */
	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));

	buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
	Assert(BufferIsValid(buffer));
	page = (Page) BufferGetPage(buffer);

	GISTInitBuffer(buffer, F_LEAF);

	PageSetLSN(page, lsn);
	PageSetTLI(page, ThisTimeLineID);

	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);
}
Exemplo n.º 28
0
/*
 * _bitmap_log_updateword() -- log updating a single word in a given
 * 	bitmap page.
 */
void
_bitmap_log_updateword(Relation rel, Buffer bitmapBuffer, int word_no)
{
	Page				bitmapPage;
	BMBitmap			bitmap;
	xl_bm_updateword	xlBitmapWord;
	XLogRecPtr			recptr;
	XLogRecData			rdata[1];

	// Fetch gp_persistent_relation_node information that will be added to XLOG record.
	RelationFetchGpRelationNodeForXLog(rel);

	bitmapPage = BufferGetPage(bitmapBuffer);
	bitmap = (BMBitmap) PageGetContentsMaxAligned(bitmapPage);

	xlBitmapWord.bm_node = rel->rd_node;
	xlBitmapWord.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid;
	xlBitmapWord.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum;
	xlBitmapWord.bm_blkno = BufferGetBlockNumber(bitmapBuffer);
	xlBitmapWord.bm_word_no = word_no;
	xlBitmapWord.bm_cword = bitmap->cwords[word_no];
	xlBitmapWord.bm_hword = bitmap->hwords[word_no/BM_HRL_WORD_SIZE];

	elog(DEBUG1, "_bitmap_log_updateword: (blkno, word_no, cword, hword)="
		 "(%d, %d, " INT64_FORMAT ", " INT64_FORMAT ")", xlBitmapWord.bm_blkno,
		 xlBitmapWord.bm_word_no, xlBitmapWord.bm_cword,
		 xlBitmapWord.bm_hword);

	rdata[0].buffer = InvalidBuffer;
	rdata[0].data = (char*)&xlBitmapWord;
	rdata[0].len = sizeof(xl_bm_updateword);
	rdata[0].next = NULL;

	recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_UPDATEWORD, rdata);

	PageSetLSN(bitmapPage, recptr);
	PageSetTLI(bitmapPage, ThisTimeLineID);
}
Exemplo n.º 29
0
static void
gistRedoPageDeleteRecord(XLogRecPtr lsn, XLogRecord *record)
{
	gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record);
	Buffer		buffer;
	Page		page;

	/* nothing else to do if page was backed up (and no info to do it with) */
	if (record->xl_info & XLR_BKP_BLOCK_1)
		return;

	buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
	if (!BufferIsValid(buffer))
		return;

	page = (Page) BufferGetPage(buffer);
	GistPageSetDeleted(page);

	PageSetLSN(page, lsn);
	PageSetTLI(page, ThisTimeLineID);
	MarkBufferDirty(buffer);
	UnlockReleaseBuffer(buffer);
}
Exemplo n.º 30
0
/*
 *	lazy_scan_heap() -- scan an open heap relation
 *
 *		This routine sets commit status bits, builds lists of dead tuples
 *		and pages with free space, and calculates statistics on the number
 *		of live tuples in the heap.  When done, or when we run low on space
 *		for dead-tuple TIDs, invoke vacuuming of indexes and heap.
 *
 *		If there are no indexes then we just vacuum each dirty page as we
 *		process it, since there's no point in gathering many tuples.
 */
static void
lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
			   Relation *Irel, int nindexes, bool scan_all)
{
	BlockNumber nblocks,
				blkno;
	HeapTupleData tuple;
	char	   *relname;
	BlockNumber empty_pages,
				vacuumed_pages;
	double		num_tuples,
				tups_vacuumed,
				nkeep,
				nunused;
	IndexBulkDeleteResult **indstats;
	int			i;
	PGRUsage	ru0;
	Buffer		vmbuffer = InvalidBuffer;
	BlockNumber next_not_all_visible_block;
	bool		skipping_all_visible_blocks;

	pg_rusage_init(&ru0);

	relname = RelationGetRelationName(onerel);
	ereport(elevel,
			(errmsg("vacuuming \"%s.%s\"",
					get_namespace_name(RelationGetNamespace(onerel)),
					relname)));

	empty_pages = vacuumed_pages = 0;
	num_tuples = tups_vacuumed = nkeep = nunused = 0;

	indstats = (IndexBulkDeleteResult **)
		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));

	nblocks = RelationGetNumberOfBlocks(onerel);
	vacrelstats->rel_pages = nblocks;
	vacrelstats->scanned_pages = 0;
	vacrelstats->nonempty_pages = 0;
	vacrelstats->latestRemovedXid = InvalidTransactionId;

	lazy_space_alloc(vacrelstats, nblocks);

	/*
	 * We want to skip pages that don't require vacuuming according to the
	 * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD
	 * consecutive pages.  Since we're reading sequentially, the OS should be
	 * doing readahead for us, so there's no gain in skipping a page now and
	 * then; that's likely to disable readahead and so be counterproductive.
	 * Also, skipping even a single page means that we can't update
	 * relfrozenxid, so we only want to do it if we can skip a goodly number
	 * of pages.
	 *
	 * Before entering the main loop, establish the invariant that
	 * next_not_all_visible_block is the next block number >= blkno that's not
	 * all-visible according to the visibility map, or nblocks if there's no
	 * such block.	Also, we set up the skipping_all_visible_blocks flag,
	 * which is needed because we need hysteresis in the decision: once we've
	 * started skipping blocks, we may as well skip everything up to the next
	 * not-all-visible block.
	 *
	 * Note: if scan_all is true, we won't actually skip any pages; but we
	 * maintain next_not_all_visible_block anyway, so as to set up the
	 * all_visible_according_to_vm flag correctly for each page.
	 */
	for (next_not_all_visible_block = 0;
		 next_not_all_visible_block < nblocks;
		 next_not_all_visible_block++)
	{
		if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
			break;
		vacuum_delay_point();
	}
	if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD)
		skipping_all_visible_blocks = true;
	else
		skipping_all_visible_blocks = false;

	for (blkno = 0; blkno < nblocks; blkno++)
	{
		Buffer		buf;
		Page		page;
		OffsetNumber offnum,
					maxoff;
		bool		tupgone,
					hastup;
		int			prev_dead_count;
		OffsetNumber frozen[MaxOffsetNumber];
		int			nfrozen;
		Size		freespace;
		bool		all_visible_according_to_vm;
		bool		all_visible;
		bool		has_dead_tuples;

		if (blkno == next_not_all_visible_block)
		{
			/* Time to advance next_not_all_visible_block */
			for (next_not_all_visible_block++;
				 next_not_all_visible_block < nblocks;
				 next_not_all_visible_block++)
			{
				if (!visibilitymap_test(onerel, next_not_all_visible_block,
										&vmbuffer))
					break;
				vacuum_delay_point();
			}

			/*
			 * We know we can't skip the current block.  But set up
			 * skipping_all_visible_blocks to do the right thing at the
			 * following blocks.
			 */
			if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD)
				skipping_all_visible_blocks = true;
			else
				skipping_all_visible_blocks = false;
			all_visible_according_to_vm = false;
		}
		else
		{
			/* Current block is all-visible */
			if (skipping_all_visible_blocks && !scan_all)
				continue;
			all_visible_according_to_vm = true;
		}

		vacuum_delay_point();

		vacrelstats->scanned_pages++;

		/*
		 * If we are close to overrunning the available space for dead-tuple
		 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
		 */
		if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
			vacrelstats->num_dead_tuples > 0)
		{
			/* Log cleanup info before we touch indexes */
			vacuum_log_cleanup_info(onerel, vacrelstats);

			/* Remove index entries */
			for (i = 0; i < nindexes; i++)
				lazy_vacuum_index(Irel[i],
								  &indstats[i],
								  vacrelstats);
			/* Remove tuples from heap */
			lazy_vacuum_heap(onerel, vacrelstats);

			/*
			 * Forget the now-vacuumed tuples, and press on, but be careful
			 * not to reset latestRemovedXid since we want that value to be
			 * valid.
			 */
			vacrelstats->num_dead_tuples = 0;
			vacrelstats->num_index_scans++;
		}

		buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
								 RBM_NORMAL, vac_strategy);

		/* We need buffer cleanup lock so that we can prune HOT chains. */
		LockBufferForCleanup(buf);

		page = BufferGetPage(buf);

		if (PageIsNew(page))
		{
			/*
			 * An all-zeroes page could be left over if a backend extends the
			 * relation but crashes before initializing the page. Reclaim such
			 * pages for use.
			 *
			 * We have to be careful here because we could be looking at a
			 * page that someone has just added to the relation and not yet
			 * been able to initialize (see RelationGetBufferForTuple). To
			 * protect against that, release the buffer lock, grab the
			 * relation extension lock momentarily, and re-lock the buffer. If
			 * the page is still uninitialized by then, it must be left over
			 * from a crashed backend, and we can initialize it.
			 *
			 * We don't really need the relation lock when this is a new or
			 * temp relation, but it's probably not worth the code space to
			 * check that, since this surely isn't a critical path.
			 *
			 * Note: the comparable code in vacuum.c need not worry because
			 * it's got exclusive lock on the whole relation.
			 */
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
			LockRelationForExtension(onerel, ExclusiveLock);
			UnlockRelationForExtension(onerel, ExclusiveLock);
			LockBufferForCleanup(buf);
			if (PageIsNew(page))
			{
				ereport(WARNING,
				(errmsg("relation \"%s\" page %u is uninitialized --- fixing",
						relname, blkno)));
				PageInit(page, BufferGetPageSize(buf), 0);
				empty_pages++;
			}
			freespace = PageGetHeapFreeSpace(page);
			MarkBufferDirty(buf);
			UnlockReleaseBuffer(buf);

			RecordPageWithFreeSpace(onerel, blkno, freespace);
			continue;
		}

		if (PageIsEmpty(page))
		{
			empty_pages++;
			freespace = PageGetHeapFreeSpace(page);

			if (!PageIsAllVisible(page))
			{
				PageSetAllVisible(page);
				SetBufferCommitInfoNeedsSave(buf);
			}

			LockBuffer(buf, BUFFER_LOCK_UNLOCK);

			/* Update the visibility map */
			if (!all_visible_according_to_vm)
			{
				visibilitymap_pin(onerel, blkno, &vmbuffer);
				LockBuffer(buf, BUFFER_LOCK_SHARE);
				if (PageIsAllVisible(page))
					visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer);
				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
			}

			ReleaseBuffer(buf);
			RecordPageWithFreeSpace(onerel, blkno, freespace);
			continue;
		}

		/*
		 * Prune all HOT-update chains in this page.
		 *
		 * We count tuples removed by the pruning step as removed by VACUUM.
		 */
		tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
										 &vacrelstats->latestRemovedXid);

		/*
		 * Now scan the page to collect vacuumable items and check for tuples
		 * requiring freezing.
		 */
		all_visible = true;
		has_dead_tuples = false;
		nfrozen = 0;
		hastup = false;
		prev_dead_count = vacrelstats->num_dead_tuples;
		maxoff = PageGetMaxOffsetNumber(page);
		for (offnum = FirstOffsetNumber;
			 offnum <= maxoff;
			 offnum = OffsetNumberNext(offnum))
		{
			ItemId		itemid;

			itemid = PageGetItemId(page, offnum);

			/* Unused items require no processing, but we count 'em */
			if (!ItemIdIsUsed(itemid))
			{
				nunused += 1;
				continue;
			}

			/* Redirect items mustn't be touched */
			if (ItemIdIsRedirected(itemid))
			{
				hastup = true;	/* this page won't be truncatable */
				continue;
			}

			ItemPointerSet(&(tuple.t_self), blkno, offnum);

			/*
			 * DEAD item pointers are to be vacuumed normally; but we don't
			 * count them in tups_vacuumed, else we'd be double-counting (at
			 * least in the common case where heap_page_prune() just freed up
			 * a non-HOT tuple).
			 */
			if (ItemIdIsDead(itemid))
			{
				lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
				all_visible = false;
				continue;
			}

			Assert(ItemIdIsNormal(itemid));

			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
			tuple.t_len = ItemIdGetLength(itemid);

			tupgone = false;

			switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
			{
				case HEAPTUPLE_DEAD:

					/*
					 * Ordinarily, DEAD tuples would have been removed by
					 * heap_page_prune(), but it's possible that the tuple
					 * state changed since heap_page_prune() looked.  In
					 * particular an INSERT_IN_PROGRESS tuple could have
					 * changed to DEAD if the inserter aborted.  So this
					 * cannot be considered an error condition.
					 *
					 * If the tuple is HOT-updated then it must only be
					 * removed by a prune operation; so we keep it just as if
					 * it were RECENTLY_DEAD.  Also, if it's a heap-only
					 * tuple, we choose to keep it, because it'll be a lot
					 * cheaper to get rid of it in the next pruning pass than
					 * to treat it like an indexed tuple.
					 */
					if (HeapTupleIsHotUpdated(&tuple) ||
						HeapTupleIsHeapOnly(&tuple))
						nkeep += 1;
					else
						tupgone = true; /* we can delete the tuple */
					all_visible = false;
					break;
				case HEAPTUPLE_LIVE:
					/* Tuple is good --- but let's do some validity checks */
					if (onerel->rd_rel->relhasoids &&
						!OidIsValid(HeapTupleGetOid(&tuple)))
						elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
							 relname, blkno, offnum);

					/*
					 * Is the tuple definitely visible to all transactions?
					 *
					 * NB: Like with per-tuple hint bits, we can't set the
					 * PD_ALL_VISIBLE flag if the inserter committed
					 * asynchronously. See SetHintBits for more info. Check
					 * that the HEAP_XMIN_COMMITTED hint bit is set because of
					 * that.
					 */
					if (all_visible)
					{
						TransactionId xmin;

						if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
						{
							all_visible = false;
							break;
						}

						/*
						 * The inserter definitely committed. But is it old
						 * enough that everyone sees it as committed?
						 */
						xmin = HeapTupleHeaderGetXmin(tuple.t_data);
						if (!TransactionIdPrecedes(xmin, OldestXmin))
						{
							all_visible = false;
							break;
						}
					}
					break;
				case HEAPTUPLE_RECENTLY_DEAD:

					/*
					 * If tuple is recently deleted then we must not remove it
					 * from relation.
					 */
					nkeep += 1;
					all_visible = false;
					break;
				case HEAPTUPLE_INSERT_IN_PROGRESS:
					/* This is an expected case during concurrent vacuum */
					all_visible = false;
					break;
				case HEAPTUPLE_DELETE_IN_PROGRESS:
					/* This is an expected case during concurrent vacuum */
					all_visible = false;
					break;
				default:
					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
					break;
			}

			if (tupgone)
			{
				lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
				HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
											 &vacrelstats->latestRemovedXid);
				tups_vacuumed += 1;
				has_dead_tuples = true;
			}
			else
			{
				num_tuples += 1;
				hastup = true;

				/*
				 * Each non-removable tuple must be checked to see if it needs
				 * freezing.  Note we already have exclusive buffer lock.
				 */
				if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
									  InvalidBuffer))
					frozen[nfrozen++] = offnum;
			}
		}						/* scan along page */

		/*
		 * If we froze any tuples, mark the buffer dirty, and write a WAL
		 * record recording the changes.  We must log the changes to be
		 * crash-safe against future truncation of CLOG.
		 */
		if (nfrozen > 0)
		{
			MarkBufferDirty(buf);
			if (RelationNeedsWAL(onerel))
			{
				XLogRecPtr	recptr;

				recptr = log_heap_freeze(onerel, buf, FreezeLimit,
										 frozen, nfrozen);
				PageSetLSN(page, recptr);
				PageSetTLI(page, ThisTimeLineID);
			}
		}

		/*
		 * If there are no indexes then we can vacuum the page right now
		 * instead of doing a second scan.
		 */
		if (nindexes == 0 &&
			vacrelstats->num_dead_tuples > 0)
		{
			/* Remove tuples from heap */
			lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);

			/*
			 * Forget the now-vacuumed tuples, and press on, but be careful
			 * not to reset latestRemovedXid since we want that value to be
			 * valid.
			 */
			vacrelstats->num_dead_tuples = 0;
			vacuumed_pages++;
		}

		freespace = PageGetHeapFreeSpace(page);

		/* Update the all-visible flag on the page */
		if (!PageIsAllVisible(page) && all_visible)
		{
			PageSetAllVisible(page);
			SetBufferCommitInfoNeedsSave(buf);
		}

		/*
		 * It's possible for the value returned by GetOldestXmin() to move
		 * backwards, so it's not wrong for us to see tuples that appear to
		 * not be visible to everyone yet, while PD_ALL_VISIBLE is already
		 * set. The real safe xmin value never moves backwards, but
		 * GetOldestXmin() is conservative and sometimes returns a value
		 * that's unnecessarily small, so if we see that contradiction it just
		 * means that the tuples that we think are not visible to everyone yet
		 * actually are, and the PD_ALL_VISIBLE flag is correct.
		 *
		 * There should never be dead tuples on a page with PD_ALL_VISIBLE
		 * set, however.
		 */
		else if (PageIsAllVisible(page) && has_dead_tuples)
		{
			elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
				 relname, blkno);
			PageClearAllVisible(page);
			SetBufferCommitInfoNeedsSave(buf);

			/*
			 * Normally, we would drop the lock on the heap page before
			 * updating the visibility map, but since this case shouldn't
			 * happen anyway, don't worry about that.
			 */
			visibilitymap_clear(onerel, blkno);
		}

		LockBuffer(buf, BUFFER_LOCK_UNLOCK);

		/* Update the visibility map */
		if (!all_visible_according_to_vm && all_visible)
		{
			visibilitymap_pin(onerel, blkno, &vmbuffer);
			LockBuffer(buf, BUFFER_LOCK_SHARE);
			if (PageIsAllVisible(page))
				visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer);
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}

		ReleaseBuffer(buf);

		/* Remember the location of the last page with nonremovable tuples */
		if (hastup)
			vacrelstats->nonempty_pages = blkno + 1;

		/*
		 * If we remembered any tuples for deletion, then the page will be
		 * visited again by lazy_vacuum_heap, which will compute and record
		 * its post-compaction free space.	If not, then we're done with this
		 * page, so remember its free space as-is.	(This path will always be
		 * taken if there are no indexes.)
		 */
		if (vacrelstats->num_dead_tuples == prev_dead_count)
			RecordPageWithFreeSpace(onerel, blkno, freespace);
	}

	/* save stats for use later */
	vacrelstats->scanned_tuples = num_tuples;
	vacrelstats->tuples_deleted = tups_vacuumed;

	/* now we can compute the new value for pg_class.reltuples */
	vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
														 nblocks,
												  vacrelstats->scanned_pages,
														 num_tuples);

	/* If any tuples need to be deleted, perform final vacuum cycle */
	/* XXX put a threshold on min number of tuples here? */
	if (vacrelstats->num_dead_tuples > 0)
	{
		/* Log cleanup info before we touch indexes */
		vacuum_log_cleanup_info(onerel, vacrelstats);

		/* Remove index entries */
		for (i = 0; i < nindexes; i++)
			lazy_vacuum_index(Irel[i],
							  &indstats[i],
							  vacrelstats);
		/* Remove tuples from heap */
		lazy_vacuum_heap(onerel, vacrelstats);
		vacrelstats->num_index_scans++;
	}

	/* Release the pin on the visibility map page */
	if (BufferIsValid(vmbuffer))
	{
		ReleaseBuffer(vmbuffer);
		vmbuffer = InvalidBuffer;
	}

	/* Do post-vacuum cleanup and statistics update for each index */
	for (i = 0; i < nindexes; i++)
		lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);

	/* If no indexes, make log report that lazy_vacuum_heap would've made */
	if (vacuumed_pages)
		ereport(elevel,
				(errmsg("\"%s\": removed %.0f row versions in %u pages",
						RelationGetRelationName(onerel),
						tups_vacuumed, vacuumed_pages)));

	ereport(elevel,
			(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
					RelationGetRelationName(onerel),
					tups_vacuumed, num_tuples,
					vacrelstats->scanned_pages, nblocks),
			 errdetail("%.0f dead row versions cannot be removed yet.\n"
					   "There were %.0f unused item pointers.\n"
					   "%u pages are entirely empty.\n"
					   "%s.",
					   nkeep,
					   nunused,
					   empty_pages,
					   pg_rusage_show(&ru0))));
}