Пример #1
0
/*
 * Initializes the BitmapTableScanState, including creation of the
 * scan description and the bitmapqualorig.
 */
BitmapTableScanState *
ExecInitBitmapTableScan(BitmapTableScan *node, EState *estate, int eflags)
{
	/* check for unsupported flags */
	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
	/*
	 * Assert caller didn't ask for an unsafe snapshot --- see comments at
	 * head of file.
	 *
	 * MPP-4703: the MVCC-snapshot restriction is required for correct results.
	 * our test-mode may deliberately return incorrect results, but that's OK.
	 */
	Assert(IsMVCCSnapshot(estate->es_snapshot) || gp_select_invisible);

	BitmapTableScanState *state = makeNode(BitmapTableScanState);

	BitmapTableScanBegin(state, (Plan *) node, estate, eflags);

	/*
	 * initialize child nodes
	 *
	 * We do this last because the child nodes will open indexscans on our
	 * relation's indexes, and we want to be sure we have acquired a lock on
	 * the relation first.
	 */
	outerPlanState(state) = ExecInitNode(outerPlan(node), estate, eflags);

	initGpmonPktForBitmapTableScan((Plan *)node, &state->ss.ps.gpmon_pkt, estate);

	return state;
}
Пример #2
0
/* ----------------
 *		index_fetch_heap - get the scan's next heap tuple
 *
 * The result is a visible heap tuple associated with the index TID most
 * recently fetched by index_getnext_tid, or NULL if no more matching tuples
 * exist.  (There can be more than one matching tuple because of HOT chains,
 * although when using an MVCC snapshot it should be impossible for more than
 * one such tuple to exist.)
 *
 * On success, the buffer containing the heap tup is pinned (the pin will be
 * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
 * call).
 *
 * Note: caller must check scan->xs_recheck, and perform rechecking of the
 * scan keys if required.  We do not do that here because we don't have
 * enough information to do it efficiently in the general case.
 * ----------------
 */
HeapTuple
index_fetch_heap(IndexScanDesc scan)
{
	ItemPointer tid = &scan->xs_ctup.t_self;
	bool		all_dead = false;
	bool		got_heap_tuple;

	/* We can skip the buffer-switching logic if we're in mid-HOT chain. */
	if (!scan->xs_continue_hot)
	{
		/* Switch to correct buffer if we don't have it already */
		Buffer		prev_buf = scan->xs_cbuf;

		scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
											 scan->heapRelation,
											 ItemPointerGetBlockNumber(tid));

		/*
		 * Prune page, but only if we weren't already on this page
		 */
		if (prev_buf != scan->xs_cbuf)
			heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
	}

	/* Obtain share-lock on the buffer so we can examine visibility */
	LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
	got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
											scan->xs_cbuf,
											scan->xs_snapshot,
											&scan->xs_ctup,
											&all_dead,
											!scan->xs_continue_hot);
	LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);

	if (got_heap_tuple)
	{
		/*
		 * Only in a non-MVCC snapshot can more than one member of the HOT
		 * chain be visible.
		 */
		scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
		pgstat_count_heap_fetch(scan->indexRelation);
		return &scan->xs_ctup;
	}

	/* We've reached the end of the HOT chain. */
	scan->xs_continue_hot = false;

	/*
	 * If we scanned a whole HOT chain and found only dead tuples, tell index
	 * AM to kill its entry for that TID (this will take effect in the next
	 * amgettuple call, in index_getnext_tid).  We do not do this when in
	 * recovery because it may violate MVCC to do so.  See comments in
	 * RelationGetIndexScan().
	 */
	if (!scan->xactStartedInRecovery)
		scan->kill_prior_tuple = all_dead;

	return NULL;
}
Пример #3
0
void
table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
{
	Assert(IsMVCCSnapshot(snapshot));

	RegisterSnapshot(snapshot);
	scan->rs_snapshot = snapshot;
	scan->rs_temp_snap = true;
}
Пример #4
0
/*
 * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
 *
 * In particular, determine if this tuple would be visible to a catalog scan
 * that started now.  We don't handle the case of a non-MVCC scan snapshot,
 * because no caller needs that yet.
 *
 * This is useful to test whether an object was deleted while we waited to
 * acquire lock on it.
 *
 * Note: we don't actually *need* the tuple to be passed in, but it's a
 * good crosscheck that the caller is interested in the right tuple.
 */
bool
systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
{
	Snapshot	freshsnap;
	bool		result;

	/*
	 * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves
	 * acquire snapshots, so we need not register the snapshot.  Those
	 * facilities are too low-level to have any business scanning tables.
	 */
	freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));

	if (sysscan->irel)
	{
		IndexScanDesc scan = sysscan->iscan;

		Assert(IsMVCCSnapshot(scan->xs_snapshot));
		Assert(tup == &scan->xs_ctup);
		Assert(BufferIsValid(scan->xs_cbuf));
		/* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
		result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf);
		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
	}
	else
	{
		HeapScanDesc scan = sysscan->scan;

		Assert(IsMVCCSnapshot(scan->rs_snapshot));
		Assert(tup == &scan->rs_ctup);
		Assert(BufferIsValid(scan->rs_cbuf));
		/* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
		result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf);
		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
	}
	return result;
}
Пример #5
0
Size
table_parallelscan_estimate(Relation rel, Snapshot snapshot)
{
	Size		sz = 0;

	if (IsMVCCSnapshot(snapshot))
		sz = add_size(sz, EstimateSnapshotSpace(snapshot));
	else
		Assert(snapshot == SnapshotAny);

	sz = add_size(sz, rel->rd_tableam->parallelscan_estimate(rel));

	return sz;
}
Пример #6
0
/* ----------------
 *		index_restrpos	- restore a scan position
 *
 * NOTE: this only restores the internal scan state of the index AM.
 * The current result tuple (scan->xs_ctup) doesn't change.  See comments
 * for ExecRestrPos().
 *
 * NOTE: in the presence of HOT chains, mark/restore only works correctly
 * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
 * returnable tuple in each HOT chain, and so restoring the prior state at the
 * granularity of the index AM is sufficient.  Since the only current user
 * of mark/restore functionality is nodeMergejoin.c, this effectively means
 * that merge-join plans only work for MVCC snapshots.  This could be fixed
 * if necessary, but for now it seems unimportant.
 * ----------------
 */
void
index_restrpos(IndexScanDesc scan)
{
	Assert(IsMVCCSnapshot(scan->xs_snapshot));

	SCAN_CHECKS;
	CHECK_SCAN_PROCEDURE(amrestrpos);

	scan->xs_continue_hot = false;

	scan->kill_prior_tuple = false; /* for safety */

	scan->indexRelation->rd_amroutine->amrestrpos(scan);
}
Пример #7
0
/* ----------------
 *		index_restrpos	- restore a scan position
 *
 * NOTE: this only restores the internal scan state of the index AM.
 * The current result tuple (scan->xs_ctup) doesn't change.  See comments
 * for ExecRestrPos().
 *
 * NOTE: in the presence of HOT chains, mark/restore only works correctly
 * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
 * returnable tuple in each HOT chain, and so restoring the prior state at the
 * granularity of the index AM is sufficient.  Since the only current user
 * of mark/restore functionality is nodeMergejoin.c, this effectively means
 * that merge-join plans only work for MVCC snapshots.  This could be fixed
 * if necessary, but for now it seems unimportant.
 * ----------------
 */
void
index_restrpos(IndexScanDesc scan)
{
	FmgrInfo   *procedure;

	Assert(IsMVCCSnapshot(scan->xs_snapshot));

	SCAN_CHECKS;
	GET_SCAN_PROCEDURE(amrestrpos);

	scan->xs_continue_hot = false;

	scan->kill_prior_tuple = false;		/* for safety */

	FunctionCall1(procedure, PointerGetDatum(scan));
}
Пример #8
0
void
table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
							  Snapshot snapshot)
{
	Size		snapshot_off = rel->rd_tableam->parallelscan_initialize(rel, pscan);

	pscan->phs_snapshot_off = snapshot_off;

	if (IsMVCCSnapshot(snapshot))
	{
		SerializeSnapshot(snapshot, (char *) pscan + pscan->phs_snapshot_off);
		pscan->phs_snapshot_any = false;
	}
	else
	{
		Assert(snapshot == SnapshotAny);
		pscan->phs_snapshot_any = true;
	}
}
Пример #9
0
/* ----------------------------------------------------------------
 *		ExecInitBitmapHeapScan
 *
 *		Initializes the scan's state information.
 * ----------------------------------------------------------------
 */
BitmapHeapScanState *
ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
{
	BitmapHeapScanState *scanstate;
	Relation	currentRelation;

	/* check for unsupported flags */
	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));

	/*
	 * Assert caller didn't ask for an unsafe snapshot --- see comments at
	 * head of file.
	 */
	Assert(IsMVCCSnapshot(estate->es_snapshot));

	/*
	 * create state structure
	 */
	scanstate = makeNode(BitmapHeapScanState);
	scanstate->ss.ps.plan = (Plan *) node;
	scanstate->ss.ps.state = estate;

	scanstate->tbm = NULL;
	scanstate->tbmiterator = NULL;
	scanstate->tbmres = NULL;
	scanstate->prefetch_iterator = NULL;
	scanstate->prefetch_pages = 0;
	scanstate->prefetch_target = 0;

	/*
	 * Miscellaneous initialization
	 *
	 * create expression context for node
	 */
	ExecAssignExprContext(estate, &scanstate->ss.ps);

	scanstate->ss.ps.ps_TupFromTlist = false;

	/*
	 * initialize child expressions
	 */
	scanstate->ss.ps.targetlist = (List *)
		ExecInitExpr((Expr *) node->scan.plan.targetlist,
					 (PlanState *) scanstate);
	scanstate->ss.ps.qual = (List *)
		ExecInitExpr((Expr *) node->scan.plan.qual,
					 (PlanState *) scanstate);
	scanstate->bitmapqualorig = (List *)
		ExecInitExpr((Expr *) node->bitmapqualorig,
					 (PlanState *) scanstate);

	/*
	 * tuple table initialization
	 */
	ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
	ExecInitScanTupleSlot(estate, &scanstate->ss);

	/*
	 * open the base relation and acquire appropriate lock on it.
	 */
	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);

	scanstate->ss.ss_currentRelation = currentRelation;

	/*
	 * Even though we aren't going to do a conventional seqscan, it is useful
	 * to create a HeapScanDesc --- most of the fields in it are usable.
	 */
	scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
														 estate->es_snapshot,
														 0,
														 NULL);

	/*
	 * get the scan type from the relation descriptor.
	 */
	ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));

	/*
	 * Initialize result tuple type and projection info.
	 */
	ExecAssignResultTypeFromTL(&scanstate->ss.ps);
	ExecAssignScanProjectionInfo(&scanstate->ss);

	/*
	 * initialize child nodes
	 *
	 * We do this last because the child nodes will open indexscans on our
	 * relation's indexes, and we want to be sure we have acquired a lock on
	 * the relation first.
	 */
	outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);

	/*
	 * all done.
	 */
	return scanstate;
}
Пример #10
0
/* ----------------------------------------------------------------
 *		ExecInitBitmapHeapScan
 *
 *		Initializes the scan's state information.
 * ----------------------------------------------------------------
 */
BitmapHeapScanState *
ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
{
	BitmapHeapScanState *scanstate;
	Relation	currentRelation;
	int			io_concurrency;

	/* check for unsupported flags */
	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));

	/*
	 * Assert caller didn't ask for an unsafe snapshot --- see comments at
	 * head of file.
	 */
	Assert(IsMVCCSnapshot(estate->es_snapshot));

	/*
	 * create state structure
	 */
	scanstate = makeNode(BitmapHeapScanState);
	scanstate->ss.ps.plan = (Plan *) node;
	scanstate->ss.ps.state = estate;

	scanstate->tbm = NULL;
	scanstate->tbmiterator = NULL;
	scanstate->tbmres = NULL;
	scanstate->exact_pages = 0;
	scanstate->lossy_pages = 0;
	scanstate->prefetch_iterator = NULL;
	scanstate->prefetch_pages = 0;
	scanstate->prefetch_target = 0;
	/* may be updated below */
	scanstate->prefetch_maximum = target_prefetch_pages;

	/*
	 * Miscellaneous initialization
	 *
	 * create expression context for node
	 */
	ExecAssignExprContext(estate, &scanstate->ss.ps);

	scanstate->ss.ps.ps_TupFromTlist = false;

	/*
	 * initialize child expressions
	 */
	scanstate->ss.ps.targetlist = (List *)
		ExecInitExpr((Expr *) node->scan.plan.targetlist,
					 (PlanState *) scanstate);
	scanstate->ss.ps.qual = (List *)
		ExecInitExpr((Expr *) node->scan.plan.qual,
					 (PlanState *) scanstate);
	scanstate->bitmapqualorig = (List *)
		ExecInitExpr((Expr *) node->bitmapqualorig,
					 (PlanState *) scanstate);

	/*
	 * tuple table initialization
	 */
	ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
	ExecInitScanTupleSlot(estate, &scanstate->ss);

	/*
	 * open the base relation and acquire appropriate lock on it.
	 */
	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);

	/*
	 * Determine the maximum for prefetch_target.  If the tablespace has a
	 * specific IO concurrency set, use that to compute the corresponding
	 * maximum value; otherwise, we already initialized to the value computed
	 * by the GUC machinery.
	 */
	io_concurrency =
		get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
	if (io_concurrency != effective_io_concurrency)
	{
		double		maximum;

		if (ComputeIoConcurrency(io_concurrency, &maximum))
			scanstate->prefetch_maximum = rint(maximum);
	}

	scanstate->ss.ss_currentRelation = currentRelation;

	/*
	 * Even though we aren't going to do a conventional seqscan, it is useful
	 * to create a HeapScanDesc --- most of the fields in it are usable.
	 */
	scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
														 estate->es_snapshot,
														 0,
														 NULL);

	/*
	 * get the scan type from the relation descriptor.
	 */
	ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));

	/*
	 * Initialize result tuple type and projection info.
	 */
	ExecAssignResultTypeFromTL(&scanstate->ss.ps);
	ExecAssignScanProjectionInfo(&scanstate->ss);

	/*
	 * initialize child nodes
	 *
	 * We do this last because the child nodes will open indexscans on our
	 * relation's indexes, and we want to be sure we have acquired a lock on
	 * the relation first.
	 */
	outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);

	/*
	 * all done.
	 */
	return scanstate;
}
Пример #11
0
/* ----------------
 *		index_getnext - get the next heap tuple from a scan
 *
 * The result is the next heap tuple satisfying the scan keys and the
 * snapshot, or NULL if no more matching tuples exist.	On success,
 * the buffer containing the heap tuple is pinned (the pin will be dropped
 * at the next index_getnext or index_endscan).
 *
 * Note: caller must check scan->xs_recheck, and perform rechecking of the
 * scan keys if required.  We do not do that here because we don't have
 * enough information to do it efficiently in the general case.
 * ----------------
 */
HeapTuple
index_getnext(IndexScanDesc scan, ScanDirection direction)
{
	HeapTuple	heapTuple = &scan->xs_ctup;
	ItemPointer tid = &heapTuple->t_self;
	FmgrInfo   *procedure;

	SCAN_CHECKS;
	GET_SCAN_PROCEDURE(amgettuple);

	Assert(TransactionIdIsValid(RecentGlobalXmin));

	/*
	 * We always reset xs_hot_dead; if we are here then either we are just
	 * starting the scan, or we previously returned a visible tuple, and in
	 * either case it's inappropriate to kill the prior index entry.
	 */
	scan->xs_hot_dead = false;

	for (;;)
	{
		OffsetNumber offnum;
		bool		at_chain_start;
		Page		dp;

		if (scan->xs_next_hot != InvalidOffsetNumber)
		{
			/*
			 * We are resuming scan of a HOT chain after having returned an
			 * earlier member.	Must still hold pin on current heap page.
			 */
			Assert(BufferIsValid(scan->xs_cbuf));
			Assert(ItemPointerGetBlockNumber(tid) ==
				   BufferGetBlockNumber(scan->xs_cbuf));
			Assert(TransactionIdIsValid(scan->xs_prev_xmax));
			offnum = scan->xs_next_hot;
			at_chain_start = false;
			scan->xs_next_hot = InvalidOffsetNumber;
		}
		else
		{
			bool		found;
			Buffer		prev_buf;

			/*
			 * If we scanned a whole HOT chain and found only dead tuples,
			 * tell index AM to kill its entry for that TID. We do not do this
			 * when in recovery because it may violate MVCC to do so. see
			 * comments in RelationGetIndexScan().
			 */
			if (!scan->xactStartedInRecovery)
				scan->kill_prior_tuple = scan->xs_hot_dead;

			/*
			 * The AM's gettuple proc finds the next index entry matching the
			 * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It
			 * should also set scan->xs_recheck, though we pay no attention to
			 * that here.
			 */
			found = DatumGetBool(FunctionCall2(procedure,
											   PointerGetDatum(scan),
											   Int32GetDatum(direction)));

			/* Reset kill flag immediately for safety */
			scan->kill_prior_tuple = false;

			/* If we're out of index entries, break out of outer loop */
			if (!found)
				break;

			pgstat_count_index_tuples(scan->indexRelation, 1);

			/* Switch to correct buffer if we don't have it already */
			prev_buf = scan->xs_cbuf;
			scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
												 scan->heapRelation,
											 ItemPointerGetBlockNumber(tid));

			/*
			 * Prune page, but only if we weren't already on this page
			 */
			if (prev_buf != scan->xs_cbuf)
				heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
									RecentGlobalXmin);

			/* Prepare to scan HOT chain starting at index-referenced offnum */
			offnum = ItemPointerGetOffsetNumber(tid);
			at_chain_start = true;

			/* We don't know what the first tuple's xmin should be */
			scan->xs_prev_xmax = InvalidTransactionId;

			/* Initialize flag to detect if all entries are dead */
			scan->xs_hot_dead = true;
		}

		/* Obtain share-lock on the buffer so we can examine visibility */
		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);

		dp = (Page) BufferGetPage(scan->xs_cbuf);

		/* Scan through possible multiple members of HOT-chain */
		for (;;)
		{
			ItemId		lp;
			ItemPointer ctid;
			bool		valid;

			/* check for bogus TID */
			if (offnum < FirstOffsetNumber ||
				offnum > PageGetMaxOffsetNumber(dp))
				break;

			lp = PageGetItemId(dp, offnum);

			/* check for unused, dead, or redirected items */
			if (!ItemIdIsNormal(lp))
			{
				/* We should only see a redirect at start of chain */
				if (ItemIdIsRedirected(lp) && at_chain_start)
				{
					/* Follow the redirect */
					offnum = ItemIdGetRedirect(lp);
					at_chain_start = false;
					continue;
				}
				/* else must be end of chain */
				break;
			}

			/*
			 * We must initialize all of *heapTuple (ie, scan->xs_ctup) since
			 * it is returned to the executor on success.
			 */
			heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
			heapTuple->t_len = ItemIdGetLength(lp);
			ItemPointerSetOffsetNumber(tid, offnum);
			heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
			ctid = &heapTuple->t_data->t_ctid;

			/*
			 * Shouldn't see a HEAP_ONLY tuple at chain start.  (This test
			 * should be unnecessary, since the chain root can't be removed
			 * while we have pin on the index entry, but let's make it
			 * anyway.)
			 */
			if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
				break;

			/*
			 * The xmin should match the previous xmax value, else chain is
			 * broken.	(Note: this test is not optional because it protects
			 * us against the case where the prior chain member's xmax aborted
			 * since we looked at it.)
			 */
			if (TransactionIdIsValid(scan->xs_prev_xmax) &&
				!TransactionIdEquals(scan->xs_prev_xmax,
								  HeapTupleHeaderGetXmin(heapTuple->t_data)))
				break;

			/* If it's visible per the snapshot, we must return it */
			valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot,
												 scan->xs_cbuf);

			CheckForSerializableConflictOut(valid, scan->heapRelation,
											heapTuple, scan->xs_cbuf);

			if (valid)
			{
				/*
				 * If the snapshot is MVCC, we know that it could accept at
				 * most one member of the HOT chain, so we can skip examining
				 * any more members.  Otherwise, check for continuation of the
				 * HOT-chain, and set state for next time.
				 */
				if (IsMVCCSnapshot(scan->xs_snapshot)
					&& !IsolationIsSerializable())
					scan->xs_next_hot = InvalidOffsetNumber;
				else if (HeapTupleIsHotUpdated(heapTuple))
				{
					Assert(ItemPointerGetBlockNumber(ctid) ==
						   ItemPointerGetBlockNumber(tid));
					scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid);
					scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
				}
				else
					scan->xs_next_hot = InvalidOffsetNumber;

				PredicateLockTuple(scan->heapRelation, heapTuple);

				LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);

				pgstat_count_heap_fetch(scan->indexRelation);

				return heapTuple;
			}

			/*
			 * If we can't see it, maybe no one else can either.  Check to see
			 * if the tuple is dead to all transactions.  If we find that all
			 * the tuples in the HOT chain are dead, we'll signal the index AM
			 * to not return that TID on future indexscans.
			 */
			if (scan->xs_hot_dead &&
				HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
										 scan->xs_cbuf) != HEAPTUPLE_DEAD)
				scan->xs_hot_dead = false;

			/*
			 * Check to see if HOT chain continues past this tuple; if so
			 * fetch the next offnum (we don't bother storing it into
			 * xs_next_hot, but must store xs_prev_xmax), and loop around.
			 */
			if (HeapTupleIsHotUpdated(heapTuple))
			{
				Assert(ItemPointerGetBlockNumber(ctid) ==
					   ItemPointerGetBlockNumber(tid));
				offnum = ItemPointerGetOffsetNumber(ctid);
				at_chain_start = false;
				scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
			}
			else
				break;			/* end of chain */
		}						/* loop over a single HOT chain */

		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);

		/* Loop around to ask index AM for another TID */
		scan->xs_next_hot = InvalidOffsetNumber;
	}

	/* Release any held pin on a heap page */
	if (BufferIsValid(scan->xs_cbuf))
	{
		ReleaseBuffer(scan->xs_cbuf);
		scan->xs_cbuf = InvalidBuffer;
	}

	return NULL;				/* failure exit */
}
Пример #12
0
/* ----------------
 *		index_getnext - get the next heap tuple from a scan
 *
 * The result is the next heap tuple satisfying the scan keys and the
 * snapshot, or NULL if no more matching tuples exist.	On success,
 * the buffer containing the heap tuple is pinned (the pin will be dropped
 * at the next index_getnext or index_endscan).
 *
 * Note: caller must check scan->xs_recheck, and perform rechecking of the
 * scan keys if required.  We do not do that here because we don't have
 * enough information to do it efficiently in the general case.
 * ----------------
 */
HeapTuple
index_getnext(IndexScanDesc scan, ScanDirection direction)
{
	HeapTuple	heapTuple = &scan->xs_ctup;
	ItemPointer tid = &heapTuple->t_self;
	FmgrInfo   *procedure;
	bool		all_dead = false;

	SCAN_CHECKS;
	GET_SCAN_PROCEDURE(amgettuple);

	Assert(TransactionIdIsValid(RecentGlobalXmin));

	for (;;)
	{
		bool	got_heap_tuple;

		if (scan->xs_continue_hot)
		{
			/*
			 * We are resuming scan of a HOT chain after having returned an
			 * earlier member.	Must still hold pin on current heap page.
			 */
			Assert(BufferIsValid(scan->xs_cbuf));
			Assert(ItemPointerGetBlockNumber(tid) ==
				   BufferGetBlockNumber(scan->xs_cbuf));
		}
		else
		{
			bool		found;
			Buffer		prev_buf;

			/*
			 * If we scanned a whole HOT chain and found only dead tuples,
			 * tell index AM to kill its entry for that TID. We do not do this
			 * when in recovery because it may violate MVCC to do so. see
			 * comments in RelationGetIndexScan().
			 */
			if (!scan->xactStartedInRecovery)
				scan->kill_prior_tuple = all_dead;

			/*
			 * The AM's gettuple proc finds the next index entry matching the
			 * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It
			 * should also set scan->xs_recheck, though we pay no attention to
			 * that here.
			 */
			found = DatumGetBool(FunctionCall2(procedure,
											   PointerGetDatum(scan),
											   Int32GetDatum(direction)));

			/* Reset kill flag immediately for safety */
			scan->kill_prior_tuple = false;

			/* If we're out of index entries, break out of outer loop */
			if (!found)
				break;

			pgstat_count_index_tuples(scan->indexRelation, 1);

			/* Switch to correct buffer if we don't have it already */
			prev_buf = scan->xs_cbuf;
			scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
												 scan->heapRelation,
											 ItemPointerGetBlockNumber(tid));

			/*
			 * Prune page, but only if we weren't already on this page
			 */
			if (prev_buf != scan->xs_cbuf)
				heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
									RecentGlobalXmin);
		}

		/* Obtain share-lock on the buffer so we can examine visibility */
		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
		got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
												scan->xs_cbuf,
												scan->xs_snapshot,
												&scan->xs_ctup,
												&all_dead,
												!scan->xs_continue_hot);
		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);

		if (got_heap_tuple)
		{
			/*
			 * Only in a non-MVCC snapshot can more than one member of the
			 * HOT chain be visible.
			 */
			scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
			pgstat_count_heap_fetch(scan->indexRelation);
			return heapTuple;
		}

		/* Loop around to ask index AM for another TID */
		scan->xs_continue_hot = false;
	}

	/* Release any held pin on a heap page */
	if (BufferIsValid(scan->xs_cbuf))
	{
		ReleaseBuffer(scan->xs_cbuf);
		scan->xs_cbuf = InvalidBuffer;
	}

	return NULL;				/* failure exit */
}
/* ----------------------------------------------------------------
 *		ExecInitBitmapHeapScan
 *
 *		Initializes the scan's state information.
 * ----------------------------------------------------------------
 */
BitmapHeapScanState *
ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
{
	BitmapHeapScanState *scanstate;
	Relation	currentRelation;

	/* check for unsupported flags */
	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));

	/*
	 * Assert caller didn't ask for an unsafe snapshot --- see comments at
	 * head of file.
	 */
	Assert(IsMVCCSnapshot(estate->es_snapshot));

	/*
	 * create state structure
	 */
	scanstate = makeNode(BitmapHeapScanState);
	scanstate->ss.ps.plan = (Plan *) node;
	scanstate->ss.ps.state = estate;

	scanstate->tbm = NULL;
	scanstate->tbmres = NULL;

	/*
	 * Miscellaneous initialization
	 *
	 * create expression context for node
	 */
	ExecAssignExprContext(estate, &scanstate->ss.ps);

	scanstate->ss.ps.ps_TupFromTlist = false;

	/*
	 * initialize child expressions
	 */
	scanstate->ss.ps.targetlist = (List *)
		ExecInitExpr((Expr *) node->scan.plan.targetlist,
					 (PlanState *) scanstate);
	scanstate->ss.ps.qual = (List *)
		ExecInitExpr((Expr *) node->scan.plan.qual,
					 (PlanState *) scanstate);
	scanstate->bitmapqualorig = (List *)
		ExecInitExpr((Expr *) node->bitmapqualorig,
					 (PlanState *) scanstate);

#define BITMAPHEAPSCAN_NSLOTS 2

	/*
	 * tuple table initialization
	 */
	ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
	ExecInitScanTupleSlot(estate, &scanstate->ss);

	/*
	 * open the base relation and acquire appropriate lock on it.
	 */
	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);

	scanstate->ss.ss_currentRelation = currentRelation;

	/*
	 * Even though we aren't going to do a conventional seqscan, it is useful
	 * to create a HeapScanDesc --- this checks the relation size and sets up
	 * statistical infrastructure for us.
	 */
	scanstate->ss.ss_currentScanDesc = heap_beginscan(currentRelation,
													  estate->es_snapshot,
													  0,
													  NULL);

	/*
	 * One problem is that heap_beginscan counts a "sequential scan" start,
	 * when we actually aren't doing any such thing.  Reverse out the added
	 * scan count.	(Eventually we may want to count bitmap scans separately.)
	 */
	pgstat_discount_heap_scan(&scanstate->ss.ss_currentScanDesc->rs_pgstat_info);

	/*
	 * get the scan type from the relation descriptor.
	 */
	ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));

	/*
	 * Initialize result tuple type and projection info.
	 */
	ExecAssignResultTypeFromTL(&scanstate->ss.ps);
	ExecAssignScanProjectionInfo(&scanstate->ss);

	/*
	 * initialize child nodes
	 *
	 * We do this last because the child nodes will open indexscans on our
	 * relation's indexes, and we want to be sure we have acquired a lock on
	 * the relation first.
	 */
	outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);

	/*
	 * all done.
	 */
	return scanstate;
}
Пример #14
0
/* ----------------------------------------------------------------
 *		ExecInitBitmapHeapScan
 *
 *		Initializes the scan's state information.
 * ----------------------------------------------------------------
 */
BitmapHeapScanState *
ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
{
	BitmapHeapScanState *scanstate;
	Relation	currentRelation;
	int			io_concurrency;

	/* check for unsupported flags */
	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));

	/*
	 * Assert caller didn't ask for an unsafe snapshot --- see comments at
	 * head of file.
	 */
	Assert(IsMVCCSnapshot(estate->es_snapshot));

	/*
	 * create state structure
	 */
	scanstate = makeNode(BitmapHeapScanState);
	scanstate->ss.ps.plan = (Plan *) node;
	scanstate->ss.ps.state = estate;
	scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;

	scanstate->tbm = NULL;
	scanstate->tbmiterator = NULL;
	scanstate->tbmres = NULL;
	scanstate->skip_fetch = false;
	scanstate->vmbuffer = InvalidBuffer;
	scanstate->pvmbuffer = InvalidBuffer;
	scanstate->exact_pages = 0;
	scanstate->lossy_pages = 0;
	scanstate->prefetch_iterator = NULL;
	scanstate->prefetch_pages = 0;
	scanstate->prefetch_target = 0;
	/* may be updated below */
	scanstate->prefetch_maximum = target_prefetch_pages;
	scanstate->pscan_len = 0;
	scanstate->initialized = false;
	scanstate->shared_tbmiterator = NULL;
	scanstate->shared_prefetch_iterator = NULL;
	scanstate->pstate = NULL;

	/*
	 * We can potentially skip fetching heap pages if we do not need any
	 * columns of the table, either for checking non-indexable quals or for
	 * returning data.  This test is a bit simplistic, as it checks the
	 * stronger condition that there's no qual or return tlist at all.  But in
	 * most cases it's probably not worth working harder than that.
	 */
	scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
								 node->scan.plan.targetlist == NIL);

	/*
	 * Miscellaneous initialization
	 *
	 * create expression context for node
	 */
	ExecAssignExprContext(estate, &scanstate->ss.ps);

	/*
	 * open the scan relation
	 */
	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);

	/*
	 * initialize child nodes
	 */
	outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);

	/*
	 * get the scan type from the relation descriptor.
	 */
	ExecInitScanTupleSlot(estate, &scanstate->ss,
						  RelationGetDescr(currentRelation),
						  &TTSOpsBufferHeapTuple);


	/*
	 * Initialize result type and projection.
	 */
	ExecInitResultTypeTL(&scanstate->ss.ps);
	ExecAssignScanProjectionInfo(&scanstate->ss);

	/*
	 * initialize child expressions
	 */
	scanstate->ss.ps.qual =
		ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
	scanstate->bitmapqualorig =
		ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);

	/*
	 * Determine the maximum for prefetch_target.  If the tablespace has a
	 * specific IO concurrency set, use that to compute the corresponding
	 * maximum value; otherwise, we already initialized to the value computed
	 * by the GUC machinery.
	 */
	io_concurrency =
		get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
	if (io_concurrency != effective_io_concurrency)
	{
		double		maximum;

		if (ComputeIoConcurrency(io_concurrency, &maximum))
			scanstate->prefetch_maximum = rint(maximum);
	}

	scanstate->ss.ss_currentRelation = currentRelation;

	/*
	 * Even though we aren't going to do a conventional seqscan, it is useful
	 * to create a HeapScanDesc --- most of the fields in it are usable.
	 */
	scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
														 estate->es_snapshot,
														 0,
														 NULL);

	/*
	 * all done.
	 */
	return scanstate;
}