Example #1
0
/* ----------------------------------------------------------------
 *		BitmapHeapNext
 *
 *		Retrieve next tuple from the BitmapHeapScan node's currentRelation
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
BitmapHeapNext(BitmapHeapScanState *node)
{
	EState	   *estate;
	ExprContext *econtext;
	HeapScanDesc scandesc;
	Index		scanrelid;
	TIDBitmap  *tbm;
	TBMIterateResult *tbmres;
	OffsetNumber targoffset;
	TupleTableSlot *slot;

	OnDiskBitmapWords *odbm;
	ODBMIterateResult *odbmres;
	bool inmem = false;

	/*
	 * extract necessary information from index scan node
	 */
	estate = node->ss.ps.state;
	econtext = node->ss.ps.ps_ExprContext;
	slot = node->ss.ss_ScanTupleSlot;
	scandesc = node->ss.ss_currentScanDesc;
	scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid;
	tbm = node->tbm;
	tbmres = node->tbmres;
	odbm = node->odbm;
	odbmres = node->odbmres;

	/*
	 * Clear any reference to the previously returned tuple.  The idea here is
	 * to not have the tuple slot be the last holder of a pin on that tuple's
	 * buffer; if it is, we'll need a separate visit to the bufmgr to release
	 * the buffer.	By clearing here, we get to have the release done by
	 * ReleaseAndReadBuffer, below.
	 */
	ExecClearTuple(slot);

	/*
	 * Check if we are evaluating PlanQual for tuple of this relation.
	 * Additional checking is not good, but no other way for now. We could
	 * introduce new nodes for this case and handle IndexScan --> NewNode
	 * switching in Init/ReScan plan...
	 */
	if (estate->es_evTuple != NULL &&
		estate->es_evTuple[scanrelid - 1] != NULL)
	{
		if (estate->es_evTupleNull[scanrelid - 1])
			return slot;		/* return empty slot */

		ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
					   slot, InvalidBuffer, false);

		/* Does the tuple meet the original qual conditions? */
		econtext->ecxt_scantuple = slot;

		ResetExprContext(econtext);

		if (!ExecQual(node->bitmapqualorig, econtext, false))
			ExecClearTuple(slot);		/* would not be returned by scan */

		/* Flag for the next call that no more tuples */
		estate->es_evTupleNull[scanrelid - 1] = true;

		return slot;
	}

	/* check if this requires in-mem bitmap scan or on-disk bitmap index. */
	inmem = ((BitmapHeapScan*)(((PlanState*)node)->plan))->inmem;

	/*
	 * If the underline indexes are on disk bitmap indexes
	 */
	if (!inmem) {
		uint64	nextTid = 0;

		if (odbm == NULL)
		{
			odbm = odbm_create(ODBM_MAX_WORDS);
			node->odbm = odbm;
		}

		if (odbmres == NULL)
		{
			odbmres = odbm_res_create(odbm);
			node->odbmres = odbmres;
		}

		for (;;)
		{
			/* If we have used up the words from previous scan, or 
				we haven't scan the underlying index scan for wrods yet,
				then do it.
			 */
			if (odbm->numOfWords == 0 &&
				odbmres->nextTidLoc >= odbmres->numOfTids)
			{

				Plan* outerPlan = (((PlanState*)node)->lefttree)->plan;
				odbm_set_bitmaptype(outerPlan, false);

				odbm->firstTid = odbmres->nextTid;
				odbm->startNo = 0;
				odbm_set_child_resultnode(((PlanState*)node)->lefttree,
										  odbm);
				odbm = (OnDiskBitmapWords *)
					MultiExecProcNode(outerPlanState(node));

				if (!odbm || !IsA(odbm, OnDiskBitmapWords))
					elog(ERROR, "unrecognized result from subplan");

				odbm_begin_iterate(node->odbm, node->odbmres);
			}

			/* If we can not find more words, then this scan is over. */
			if (odbm == NULL ||
				(odbm->numOfWords == 0 && 
				 odbmres->nextTidLoc >= odbmres->numOfTids))
				return ExecClearTuple(slot);

			nextTid = odbm_findnexttid(odbm, odbmres);

			if (nextTid == 0)
				continue;

			ItemPointerSet(&scandesc->rs_ctup.t_self,
							(nextTid-1)/MaxNumHeapTuples,
							((nextTid-1)%MaxNumHeapTuples)+1);
			/* fetch the heap tuple and see if it matches the snapshot. */
			if (heap_release_fetch(scandesc->rs_rd,
								   scandesc->rs_snapshot,
								   &scandesc->rs_ctup,
								   &scandesc->rs_cbuf,
								   true,
								   &scandesc->rs_pgstat_info))
			{
				/*
			 	* Set up the result slot to point to this tuple.
				 * Note that the slot acquires a pin on the buffer.
				 */
				ExecStoreTuple(&scandesc->rs_ctup,
							   slot,
							   scandesc->rs_cbuf,
							   false);

			
				/* return this tuple */
				return slot;
			}
		}
	}

	/*
	 * If we haven't yet performed the underlying index scan, do it, and
	 * prepare the bitmap to be iterated over.
	 */
	if (tbm == NULL)
	{
		tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));

		if (!tbm || !IsA(tbm, TIDBitmap))
			elog(ERROR, "unrecognized result from subplan");

		node->tbm = tbm;
		node->tbmres = tbmres = NULL;

		tbm_begin_iterate(tbm);
	}

	for (;;)
	{
		/*
		 * Get next page of results if needed
		 */
		if (tbmres == NULL)
		{
			node->tbmres = tbmres = tbm_iterate(tbm);
			if (tbmres == NULL)
			{
				/* no more entries in the bitmap */
				break;
			}

			/*
			 * Ignore any claimed entries past what we think is the end of the
			 * relation.  (This is probably not necessary given that we got
			 * AccessShareLock before performing any of the indexscans, but
			 * let's be safe.)
			 */
			if (tbmres->blockno >= scandesc->rs_nblocks)
			{
				node->tbmres = tbmres = NULL;
				continue;
			}

			/*
			 * Acquire pin on the current heap page.  We'll hold the pin until
			 * done looking at the page.  We trade in any pin we held before.
			 */
			scandesc->rs_cbuf = ReleaseAndReadBuffer(scandesc->rs_cbuf,
													 scandesc->rs_rd,
													 tbmres->blockno);

			/*
			 * Determine how many entries we need to look at on this page. If
			 * the bitmap is lossy then we need to look at each physical item
			 * pointer; otherwise we just look through the offsets listed in
			 * tbmres.
			 */
			if (tbmres->ntuples >= 0)
			{
				/* non-lossy case */
				node->minslot = 0;
				node->maxslot = tbmres->ntuples - 1;
			}
			else
			{
				/* lossy case */
				Page		dp;

				LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_SHARE);
				dp = (Page) BufferGetPage(scandesc->rs_cbuf);

				node->minslot = FirstOffsetNumber;
				node->maxslot = PageGetMaxOffsetNumber(dp);

				LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_UNLOCK);
			}

			/*
			 * Set curslot to first slot to examine
			 */
			node->curslot = node->minslot;
		}
		else
		{
			/*
			 * Continuing in previously obtained page; advance curslot
			 */
			node->curslot++;
		}

		/*
		 * Out of range?  If so, nothing more to look at on this page
		 */
		if (node->curslot < node->minslot || node->curslot > node->maxslot)
		{
			node->tbmres = tbmres = NULL;
			continue;
		}

		/*
		 * Okay to try to fetch the tuple
		 */
		if (tbmres->ntuples >= 0)
		{
			/* non-lossy case */
			targoffset = tbmres->offsets[node->curslot];
		}
		else
		{
			/* lossy case */
			targoffset = (OffsetNumber) node->curslot;
		}

		ItemPointerSet(&scandesc->rs_ctup.t_self, tbmres->blockno, targoffset);

		/*
		 * Fetch the heap tuple and see if it matches the snapshot. We use
		 * heap_release_fetch to avoid useless bufmgr traffic.
		 */
		if (heap_release_fetch(scandesc->rs_rd,
							   scandesc->rs_snapshot,
							   &scandesc->rs_ctup,
							   &scandesc->rs_cbuf,
							   true,
							   &scandesc->rs_pgstat_info))
		{
			/*
			 * Set up the result slot to point to this tuple. Note that the
			 * slot acquires a pin on the buffer.
			 */
			ExecStoreTuple(&scandesc->rs_ctup,
						   slot,
						   scandesc->rs_cbuf,
						   false);

			/*
			 * If we are using lossy info, we have to recheck the qual
			 * conditions at every tuple.
			 */
			if (tbmres->ntuples < 0)
			{
				econtext->ecxt_scantuple = slot;
				ResetExprContext(econtext);

				if (!ExecQual(node->bitmapqualorig, econtext, false))
				{
					/* Fails recheck, so drop it and loop back for another */
					ExecClearTuple(slot);
					continue;
				}
			}

			/* OK to return this tuple */
			return slot;
		}

		/*
		 * Failed the snap, so loop back and try again.
		 */
	}

	/*
	 * if we get here it means we are at the end of the scan..
	 */
	return ExecClearTuple(slot);
}
Example #2
0
File: nbtree.c Project: LJoNe/gpdb
/*
 * Post vacuum, iterate over all entries in index, check if the h_tid
 * of each entry exists and is not dead.  For specific system tables,
 * also ensure that the key in index entry matches the corresponding
 * attribute in the heap tuple.
 */
void
_bt_validate_vacuum(Relation irel, Relation hrel, TransactionId oldest_xmin)
{
	MIRROREDLOCK_BUFMGR_DECLARE;

	BlockNumber blkno;
	BlockNumber num_pages;
	Buffer ibuf = InvalidBuffer;
	Buffer hbuf = InvalidBuffer;
	Page ipage;
	BTPageOpaque opaque;
	IndexTuple itup;
	HeapTupleData htup;
	OffsetNumber maxoff,
			minoff,
			offnum;
	Oid ioid,
		hoid;
	bool isnull;

	blkno = BTREE_METAPAGE + 1;
	num_pages = RelationGetNumberOfBlocks(irel);

	elog(LOG, "btvalidatevacuum: index %s, heap %s",
		 RelationGetRelationName(irel), RelationGetRelationName(hrel));

	MIRROREDLOCK_BUFMGR_LOCK;
	for (; blkno < num_pages; blkno++)
	{
		ibuf = ReadBuffer(irel, blkno);
		ipage = BufferGetPage(ibuf);
		opaque = (BTPageOpaque) PageGetSpecialPointer(ipage);
		if (!PageIsNew(ipage))
			_bt_checkpage(irel, ibuf);
		if (P_ISLEAF(opaque))
		{
			minoff = P_FIRSTDATAKEY(opaque);
			maxoff = PageGetMaxOffsetNumber(ipage);
			for (offnum = minoff;
				 offnum <= maxoff;
				 offnum = OffsetNumberNext(offnum))
			{
				itup = (IndexTuple) PageGetItem(ipage,
												PageGetItemId(ipage, offnum));
				ItemPointerCopy(&itup->t_tid, &htup.t_self);
				/*
				 * TODO: construct a tid bitmap based on index tids
				 * and fetch heap tids in order afterwards.  That will
				 * also allow validating if a heap tid appears twice
				 * in a unique index.
				 */
				if (!heap_release_fetch(hrel, SnapshotAny, &htup,
										&hbuf, true, NULL))
				{
					elog(ERROR, "btvalidatevacuum: tid (%d,%d) from index %s "
						 "not found in heap %s",
						 ItemPointerGetBlockNumber(&itup->t_tid),
						 ItemPointerGetOffsetNumber(&itup->t_tid),
						 RelationGetRelationName(irel),
						 RelationGetRelationName(hrel));
				}
				switch (HeapTupleSatisfiesVacuum(hrel, htup.t_data, oldest_xmin, hbuf))
				{
					case HEAPTUPLE_RECENTLY_DEAD:
					case HEAPTUPLE_LIVE:
					case HEAPTUPLE_INSERT_IN_PROGRESS:
					case HEAPTUPLE_DELETE_IN_PROGRESS:
						/* these tuples are considered alive by vacuum */
						break;
					case HEAPTUPLE_DEAD:
						elog(ERROR, "btvalidatevacuum: vacuum did not remove "
							 "dead tuple (%d,%d) from heap %s and index %s",
							 ItemPointerGetBlockNumber(&itup->t_tid),
							 ItemPointerGetOffsetNumber(&itup->t_tid),
							 RelationGetRelationName(hrel),
							 RelationGetRelationName(irel));
						break;
					default:
						elog(ERROR, "btvalidatevacuum: invalid visibility");
						break;
				}
				switch(RelationGetRelid(irel))
				{
					case DatabaseOidIndexId:
					case TypeOidIndexId:
					case ClassOidIndexId:
					case ConstraintOidIndexId:
						hoid = HeapTupleGetOid(&htup);
						ioid = index_getattr(itup, 1, RelationGetDescr(irel), &isnull);
						if (hoid != ioid)
						{
							elog(ERROR,
								 "btvalidatevacuum: index oid(%d) != heap oid(%d)"
								 " tuple (%d,%d) index %s", ioid, hoid,
								 ItemPointerGetBlockNumber(&itup->t_tid),
								 ItemPointerGetOffsetNumber(&itup->t_tid),
								 RelationGetRelationName(irel));
						}
						break;
					case GpRelationNodeOidIndexId:
						hoid = heap_getattr(&htup, 1, RelationGetDescr(hrel), &isnull);
						ioid = index_getattr(itup, 1, RelationGetDescr(irel), &isnull);
						if (hoid != ioid)
						{
							elog(ERROR,
								 "btvalidatevacuum: index oid(%d) != heap oid(%d)"
								 " tuple (%d,%d) index %s", ioid, hoid,
								 ItemPointerGetBlockNumber(&itup->t_tid),
								 ItemPointerGetOffsetNumber(&itup->t_tid),
								 RelationGetRelationName(irel));
						}
						int4 hsegno = heap_getattr(&htup, 2, RelationGetDescr(hrel), &isnull);
						int4 isegno = index_getattr(itup, 2, RelationGetDescr(irel), &isnull);
						if (isegno != hsegno)
						{
							elog(ERROR,
								 "btvalidatevacuum: index segno(%d) != heap segno(%d)"
								 " tuple (%d,%d) index %s", isegno, hsegno,
								 ItemPointerGetBlockNumber(&itup->t_tid),
								 ItemPointerGetOffsetNumber(&itup->t_tid),
								 RelationGetRelationName(irel));
						}
						break;
					default:
						break;
				}
				if (RelationGetNamespace(irel) == PG_AOSEGMENT_NAMESPACE)
				{
					int4 isegno = index_getattr(itup, 1, RelationGetDescr(irel), &isnull);
					int4 hsegno = heap_getattr(&htup, 1, RelationGetDescr(hrel), &isnull);
					if (isegno != hsegno)
					{
						elog(ERROR,
							 "btvalidatevacuum: index segno(%d) != heap segno(%d)"
							 " tuple (%d,%d) index %s", isegno, hsegno,
							 ItemPointerGetBlockNumber(&itup->t_tid),
							 ItemPointerGetOffsetNumber(&itup->t_tid),
							 RelationGetRelationName(irel));
					}
				}
			}
		}
		if (BufferIsValid(ibuf))
			ReleaseBuffer(ibuf);
	}
	if (BufferIsValid(hbuf))
		ReleaseBuffer(hbuf);
	MIRROREDLOCK_BUFMGR_UNLOCK;
}
Example #3
0
/* ----------------
 *		index_getnext - get the next heap tuple from a scan
 *
 * The result is the next heap tuple satisfying the scan keys and the
 * snapshot, or NULL if no more matching tuples exist.	On success,
 * the buffer containing the heap tuple is pinned (the pin will be dropped
 * at the next index_getnext or index_endscan).
 * ----------------
 */
HeapTuple
index_getnext(IndexScanDesc scan, ScanDirection direction)
{
	MIRROREDLOCK_BUFMGR_DECLARE;

	HeapTuple	heapTuple = &scan->xs_ctup;
	FmgrInfo   *procedure;

	SCAN_CHECKS;
	GET_SCAN_PROCEDURE(amgettuple);

	/* just make sure this is false... */
	scan->kill_prior_tuple = false;

	for (;;)
	{
		bool		found;

		/*
		 * The AM's gettuple proc finds the next tuple matching the scan keys.
		 */
		found = DatumGetBool(FunctionCall2(procedure,
										   PointerGetDatum(scan),
										   Int32GetDatum(direction)));

		/* Reset kill flag immediately for safety */
		scan->kill_prior_tuple = false;

		if (!found)
		{
			/* Release any held pin on a heap page */
			if (BufferIsValid(scan->xs_cbuf))
			{
				ReleaseBuffer(scan->xs_cbuf);
				scan->xs_cbuf = InvalidBuffer;
			}
			return NULL;		/* failure exit */
		}

		pgstat_count_index_tuples(scan->indexRelation, 1);

		/*
		 * Fetch the heap tuple and see if it matches the snapshot.
		 */
		if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot,
							   heapTuple, &scan->xs_cbuf, true,
							   scan->indexRelation))
			break;

		/* Skip if no undeleted tuple at this location */
		if (heapTuple->t_data == NULL)
			continue;

		/*
		 * If we can't see it, maybe no one else can either.  Check to see if
		 * the tuple is dead to all transactions.  If so, signal the index AM
		 * to not return it on future indexscans.
		 *
		 * We told heap_release_fetch to keep a pin on the buffer, so we can
		 * re-access the tuple here.  But we must re-lock the buffer first.
		 */

		// -------- MirroredLock ----------
		MIRROREDLOCK_BUFMGR_LOCK;

		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);

		if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
									 scan->xs_cbuf, true) == HEAPTUPLE_DEAD)
			scan->kill_prior_tuple = true;

		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
		
		MIRROREDLOCK_BUFMGR_UNLOCK;
		// -------- MirroredLock ----------
		
	}

	/* Success exit */
	return heapTuple;
}