/* * Initializes the BitmapTableScanState, including creation of the * scan description and the bitmapqualorig. */ BitmapTableScanState * ExecInitBitmapTableScan(BitmapTableScan *node, EState *estate, int eflags) { /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. * * MPP-4703: the MVCC-snapshot restriction is required for correct results. * our test-mode may deliberately return incorrect results, but that's OK. */ Assert(IsMVCCSnapshot(estate->es_snapshot) || gp_select_invisible); BitmapTableScanState *state = makeNode(BitmapTableScanState); BitmapTableScanBegin(state, (Plan *) node, estate, eflags); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(state) = ExecInitNode(outerPlan(node), estate, eflags); initGpmonPktForBitmapTableScan((Plan *)node, &state->ss.ps.gpmon_pkt, estate); return state; }
/* ---------------- * index_fetch_heap - get the scan's next heap tuple * * The result is a visible heap tuple associated with the index TID most * recently fetched by index_getnext_tid, or NULL if no more matching tuples * exist. (There can be more than one matching tuple because of HOT chains, * although when using an MVCC snapshot it should be impossible for more than * one such tuple to exist.) * * On success, the buffer containing the heap tup is pinned (the pin will be * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan * call). * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ HeapTuple index_fetch_heap(IndexScanDesc scan) { ItemPointer tid = &scan->xs_ctup.t_self; bool all_dead = false; bool got_heap_tuple; /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ if (!scan->xs_continue_hot) { /* Switch to correct buffer if we don't have it already */ Buffer prev_buf = scan->xs_cbuf; scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, scan->heapRelation, ItemPointerGetBlockNumber(tid)); /* * Prune page, but only if we weren't already on this page */ if (prev_buf != scan->xs_cbuf) heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf); } /* Obtain share-lock on the buffer so we can examine visibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, scan->xs_cbuf, scan->xs_snapshot, &scan->xs_ctup, &all_dead, !scan->xs_continue_hot); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); if (got_heap_tuple) { /* * Only in a non-MVCC snapshot can more than one member of the HOT * chain be visible. */ scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); pgstat_count_heap_fetch(scan->indexRelation); return &scan->xs_ctup; } /* We've reached the end of the HOT chain. */ scan->xs_continue_hot = false; /* * If we scanned a whole HOT chain and found only dead tuples, tell index * AM to kill its entry for that TID (this will take effect in the next * amgettuple call, in index_getnext_tid). We do not do this when in * recovery because it may violate MVCC to do so. See comments in * RelationGetIndexScan(). */ if (!scan->xactStartedInRecovery) scan->kill_prior_tuple = all_dead; return NULL; }
void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot) { Assert(IsMVCCSnapshot(snapshot)); RegisterSnapshot(snapshot); scan->rs_snapshot = snapshot; scan->rs_temp_snap = true; }
/* * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple * * In particular, determine if this tuple would be visible to a catalog scan * that started now. We don't handle the case of a non-MVCC scan snapshot, * because no caller needs that yet. * * This is useful to test whether an object was deleted while we waited to * acquire lock on it. * * Note: we don't actually *need* the tuple to be passed in, but it's a * good crosscheck that the caller is interested in the right tuple. */ bool systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) { Snapshot freshsnap; bool result; /* * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves * acquire snapshots, so we need not register the snapshot. Those * facilities are too low-level to have any business scanning tables. */ freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel)); if (sysscan->irel) { IndexScanDesc scan = sysscan->iscan; Assert(IsMVCCSnapshot(scan->xs_snapshot)); Assert(tup == &scan->xs_ctup); Assert(BufferIsValid(scan->xs_cbuf)); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); } else { HeapScanDesc scan = sysscan->scan; Assert(IsMVCCSnapshot(scan->rs_snapshot)); Assert(tup == &scan->rs_ctup); Assert(BufferIsValid(scan->rs_cbuf)); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); } return result; }
Size table_parallelscan_estimate(Relation rel, Snapshot snapshot) { Size sz = 0; if (IsMVCCSnapshot(snapshot)) sz = add_size(sz, EstimateSnapshotSpace(snapshot)); else Assert(snapshot == SnapshotAny); sz = add_size(sz, rel->rd_tableam->parallelscan_estimate(rel)); return sz; }
/* ---------------- * index_restrpos - restore a scan position * * NOTE: this only restores the internal scan state of the index AM. * The current result tuple (scan->xs_ctup) doesn't change. See comments * for ExecRestrPos(). * * NOTE: in the presence of HOT chains, mark/restore only works correctly * if the scan's snapshot is MVCC-safe; that ensures that there's at most one * returnable tuple in each HOT chain, and so restoring the prior state at the * granularity of the index AM is sufficient. Since the only current user * of mark/restore functionality is nodeMergejoin.c, this effectively means * that merge-join plans only work for MVCC snapshots. This could be fixed * if necessary, but for now it seems unimportant. * ---------------- */ void index_restrpos(IndexScanDesc scan) { Assert(IsMVCCSnapshot(scan->xs_snapshot)); SCAN_CHECKS; CHECK_SCAN_PROCEDURE(amrestrpos); scan->xs_continue_hot = false; scan->kill_prior_tuple = false; /* for safety */ scan->indexRelation->rd_amroutine->amrestrpos(scan); }
/* ---------------- * index_restrpos - restore a scan position * * NOTE: this only restores the internal scan state of the index AM. * The current result tuple (scan->xs_ctup) doesn't change. See comments * for ExecRestrPos(). * * NOTE: in the presence of HOT chains, mark/restore only works correctly * if the scan's snapshot is MVCC-safe; that ensures that there's at most one * returnable tuple in each HOT chain, and so restoring the prior state at the * granularity of the index AM is sufficient. Since the only current user * of mark/restore functionality is nodeMergejoin.c, this effectively means * that merge-join plans only work for MVCC snapshots. This could be fixed * if necessary, but for now it seems unimportant. * ---------------- */ void index_restrpos(IndexScanDesc scan) { FmgrInfo *procedure; Assert(IsMVCCSnapshot(scan->xs_snapshot)); SCAN_CHECKS; GET_SCAN_PROCEDURE(amrestrpos); scan->xs_continue_hot = false; scan->kill_prior_tuple = false; /* for safety */ FunctionCall1(procedure, PointerGetDatum(scan)); }
void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot) { Size snapshot_off = rel->rd_tableam->parallelscan_initialize(rel, pscan); pscan->phs_snapshot_off = snapshot_off; if (IsMVCCSnapshot(snapshot)) { SerializeSnapshot(snapshot, (char *) pscan + pscan->phs_snapshot_off); pscan->phs_snapshot_any = false; } else { Assert(snapshot == SnapshotAny); pscan->phs_snapshot_any = true; } }
/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. */ Assert(IsMVCCSnapshot(estate->es_snapshot)); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->tbm = NULL; scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; scanstate->prefetch_iterator = NULL; scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); scanstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = (List *) ExecInitExpr((Expr *) node->bitmapqualorig, (PlanState *) scanstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, estate->es_snapshot, 0, NULL); /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * all done. */ return scanstate; }
/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; int io_concurrency; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. */ Assert(IsMVCCSnapshot(estate->es_snapshot)); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->tbm = NULL; scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; scanstate->exact_pages = 0; scanstate->lossy_pages = 0; scanstate->prefetch_iterator = NULL; scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; /* may be updated below */ scanstate->prefetch_maximum = target_prefetch_pages; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); scanstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = (List *) ExecInitExpr((Expr *) node->bitmapqualorig, (PlanState *) scanstate); /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); /* * Determine the maximum for prefetch_target. If the tablespace has a * specific IO concurrency set, use that to compute the corresponding * maximum value; otherwise, we already initialized to the value computed * by the GUC machinery. */ io_concurrency = get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); if (io_concurrency != effective_io_concurrency) { double maximum; if (ComputeIoConcurrency(io_concurrency, &maximum)) scanstate->prefetch_maximum = rint(maximum); } scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, estate->es_snapshot, 0, NULL); /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * all done. */ return scanstate; }
/* ---------------- * index_getnext - get the next heap tuple from a scan * * The result is the next heap tuple satisfying the scan keys and the * snapshot, or NULL if no more matching tuples exist. On success, * the buffer containing the heap tuple is pinned (the pin will be dropped * at the next index_getnext or index_endscan). * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { HeapTuple heapTuple = &scan->xs_ctup; ItemPointer tid = &heapTuple->t_self; FmgrInfo *procedure; SCAN_CHECKS; GET_SCAN_PROCEDURE(amgettuple); Assert(TransactionIdIsValid(RecentGlobalXmin)); /* * We always reset xs_hot_dead; if we are here then either we are just * starting the scan, or we previously returned a visible tuple, and in * either case it's inappropriate to kill the prior index entry. */ scan->xs_hot_dead = false; for (;;) { OffsetNumber offnum; bool at_chain_start; Page dp; if (scan->xs_next_hot != InvalidOffsetNumber) { /* * We are resuming scan of a HOT chain after having returned an * earlier member. Must still hold pin on current heap page. */ Assert(BufferIsValid(scan->xs_cbuf)); Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(scan->xs_cbuf)); Assert(TransactionIdIsValid(scan->xs_prev_xmax)); offnum = scan->xs_next_hot; at_chain_start = false; scan->xs_next_hot = InvalidOffsetNumber; } else { bool found; Buffer prev_buf; /* * If we scanned a whole HOT chain and found only dead tuples, * tell index AM to kill its entry for that TID. We do not do this * when in recovery because it may violate MVCC to do so. see * comments in RelationGetIndexScan(). */ if (!scan->xactStartedInRecovery) scan->kill_prior_tuple = scan->xs_hot_dead; /* * The AM's gettuple proc finds the next index entry matching the * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It * should also set scan->xs_recheck, though we pay no attention to * that here. */ found = DatumGetBool(FunctionCall2(procedure, PointerGetDatum(scan), Int32GetDatum(direction))); /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; /* If we're out of index entries, break out of outer loop */ if (!found) break; pgstat_count_index_tuples(scan->indexRelation, 1); /* Switch to correct buffer if we don't have it already */ prev_buf = scan->xs_cbuf; scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, scan->heapRelation, ItemPointerGetBlockNumber(tid)); /* * Prune page, but only if we weren't already on this page */ if (prev_buf != scan->xs_cbuf) heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf, RecentGlobalXmin); /* Prepare to scan HOT chain starting at index-referenced offnum */ offnum = ItemPointerGetOffsetNumber(tid); at_chain_start = true; /* We don't know what the first tuple's xmin should be */ scan->xs_prev_xmax = InvalidTransactionId; /* Initialize flag to detect if all entries are dead */ scan->xs_hot_dead = true; } /* Obtain share-lock on the buffer so we can examine visibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); dp = (Page) BufferGetPage(scan->xs_cbuf); /* Scan through possible multiple members of HOT-chain */ for (;;) { ItemId lp; ItemPointer ctid; bool valid; /* check for bogus TID */ if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) break; lp = PageGetItemId(dp, offnum); /* check for unused, dead, or redirected items */ if (!ItemIdIsNormal(lp)) { /* We should only see a redirect at start of chain */ if (ItemIdIsRedirected(lp) && at_chain_start) { /* Follow the redirect */ offnum = ItemIdGetRedirect(lp); at_chain_start = false; continue; } /* else must be end of chain */ break; } /* * We must initialize all of *heapTuple (ie, scan->xs_ctup) since * it is returned to the executor on success. */ heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); heapTuple->t_len = ItemIdGetLength(lp); ItemPointerSetOffsetNumber(tid, offnum); heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation); ctid = &heapTuple->t_data->t_ctid; /* * Shouldn't see a HEAP_ONLY tuple at chain start. (This test * should be unnecessary, since the chain root can't be removed * while we have pin on the index entry, but let's make it * anyway.) */ if (at_chain_start && HeapTupleIsHeapOnly(heapTuple)) break; /* * The xmin should match the previous xmax value, else chain is * broken. (Note: this test is not optional because it protects * us against the case where the prior chain member's xmax aborted * since we looked at it.) */ if (TransactionIdIsValid(scan->xs_prev_xmax) && !TransactionIdEquals(scan->xs_prev_xmax, HeapTupleHeaderGetXmin(heapTuple->t_data))) break; /* If it's visible per the snapshot, we must return it */ valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot, scan->xs_cbuf); CheckForSerializableConflictOut(valid, scan->heapRelation, heapTuple, scan->xs_cbuf); if (valid) { /* * If the snapshot is MVCC, we know that it could accept at * most one member of the HOT chain, so we can skip examining * any more members. Otherwise, check for continuation of the * HOT-chain, and set state for next time. */ if (IsMVCCSnapshot(scan->xs_snapshot) && !IsolationIsSerializable()) scan->xs_next_hot = InvalidOffsetNumber; else if (HeapTupleIsHotUpdated(heapTuple)) { Assert(ItemPointerGetBlockNumber(ctid) == ItemPointerGetBlockNumber(tid)); scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid); scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data); } else scan->xs_next_hot = InvalidOffsetNumber; PredicateLockTuple(scan->heapRelation, heapTuple); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); pgstat_count_heap_fetch(scan->indexRelation); return heapTuple; } /* * If we can't see it, maybe no one else can either. Check to see * if the tuple is dead to all transactions. If we find that all * the tuples in the HOT chain are dead, we'll signal the index AM * to not return that TID on future indexscans. */ if (scan->xs_hot_dead && HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin, scan->xs_cbuf) != HEAPTUPLE_DEAD) scan->xs_hot_dead = false; /* * Check to see if HOT chain continues past this tuple; if so * fetch the next offnum (we don't bother storing it into * xs_next_hot, but must store xs_prev_xmax), and loop around. */ if (HeapTupleIsHotUpdated(heapTuple)) { Assert(ItemPointerGetBlockNumber(ctid) == ItemPointerGetBlockNumber(tid)); offnum = ItemPointerGetOffsetNumber(ctid); at_chain_start = false; scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data); } else break; /* end of chain */ } /* loop over a single HOT chain */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); /* Loop around to ask index AM for another TID */ scan->xs_next_hot = InvalidOffsetNumber; } /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) { ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } return NULL; /* failure exit */ }
/* ---------------- * index_getnext - get the next heap tuple from a scan * * The result is the next heap tuple satisfying the scan keys and the * snapshot, or NULL if no more matching tuples exist. On success, * the buffer containing the heap tuple is pinned (the pin will be dropped * at the next index_getnext or index_endscan). * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { HeapTuple heapTuple = &scan->xs_ctup; ItemPointer tid = &heapTuple->t_self; FmgrInfo *procedure; bool all_dead = false; SCAN_CHECKS; GET_SCAN_PROCEDURE(amgettuple); Assert(TransactionIdIsValid(RecentGlobalXmin)); for (;;) { bool got_heap_tuple; if (scan->xs_continue_hot) { /* * We are resuming scan of a HOT chain after having returned an * earlier member. Must still hold pin on current heap page. */ Assert(BufferIsValid(scan->xs_cbuf)); Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(scan->xs_cbuf)); } else { bool found; Buffer prev_buf; /* * If we scanned a whole HOT chain and found only dead tuples, * tell index AM to kill its entry for that TID. We do not do this * when in recovery because it may violate MVCC to do so. see * comments in RelationGetIndexScan(). */ if (!scan->xactStartedInRecovery) scan->kill_prior_tuple = all_dead; /* * The AM's gettuple proc finds the next index entry matching the * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It * should also set scan->xs_recheck, though we pay no attention to * that here. */ found = DatumGetBool(FunctionCall2(procedure, PointerGetDatum(scan), Int32GetDatum(direction))); /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; /* If we're out of index entries, break out of outer loop */ if (!found) break; pgstat_count_index_tuples(scan->indexRelation, 1); /* Switch to correct buffer if we don't have it already */ prev_buf = scan->xs_cbuf; scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, scan->heapRelation, ItemPointerGetBlockNumber(tid)); /* * Prune page, but only if we weren't already on this page */ if (prev_buf != scan->xs_cbuf) heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf, RecentGlobalXmin); } /* Obtain share-lock on the buffer so we can examine visibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, scan->xs_cbuf, scan->xs_snapshot, &scan->xs_ctup, &all_dead, !scan->xs_continue_hot); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); if (got_heap_tuple) { /* * Only in a non-MVCC snapshot can more than one member of the * HOT chain be visible. */ scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); pgstat_count_heap_fetch(scan->indexRelation); return heapTuple; } /* Loop around to ask index AM for another TID */ scan->xs_continue_hot = false; } /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) { ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } return NULL; /* failure exit */ }
/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. */ Assert(IsMVCCSnapshot(estate->es_snapshot)); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->tbm = NULL; scanstate->tbmres = NULL; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); scanstate->ss.ps.ps_TupFromTlist = false; /* * initialize child expressions */ scanstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, (PlanState *) scanstate); scanstate->ss.ps.qual = (List *) ExecInitExpr((Expr *) node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = (List *) ExecInitExpr((Expr *) node->bitmapqualorig, (PlanState *) scanstate); #define BITMAPHEAPSCAN_NSLOTS 2 /* * tuple table initialization */ ExecInitResultTupleSlot(estate, &scanstate->ss.ps); ExecInitScanTupleSlot(estate, &scanstate->ss); /* * open the base relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- this checks the relation size and sets up * statistical infrastructure for us. */ scanstate->ss.ss_currentScanDesc = heap_beginscan(currentRelation, estate->es_snapshot, 0, NULL); /* * One problem is that heap_beginscan counts a "sequential scan" start, * when we actually aren't doing any such thing. Reverse out the added * scan count. (Eventually we may want to count bitmap scans separately.) */ pgstat_discount_heap_scan(&scanstate->ss.ss_currentScanDesc->rs_pgstat_info); /* * get the scan type from the relation descriptor. */ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); /* * Initialize result tuple type and projection info. */ ExecAssignResultTypeFromTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child nodes * * We do this last because the child nodes will open indexscans on our * relation's indexes, and we want to be sure we have acquired a lock on * the relation first. */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * all done. */ return scanstate; }
/* ---------------------------------------------------------------- * ExecInitBitmapHeapScan * * Initializes the scan's state information. * ---------------------------------------------------------------- */ BitmapHeapScanState * ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; int io_concurrency; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * Assert caller didn't ask for an unsafe snapshot --- see comments at * head of file. */ Assert(IsMVCCSnapshot(estate->es_snapshot)); /* * create state structure */ scanstate = makeNode(BitmapHeapScanState); scanstate->ss.ps.plan = (Plan *) node; scanstate->ss.ps.state = estate; scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan; scanstate->tbm = NULL; scanstate->tbmiterator = NULL; scanstate->tbmres = NULL; scanstate->skip_fetch = false; scanstate->vmbuffer = InvalidBuffer; scanstate->pvmbuffer = InvalidBuffer; scanstate->exact_pages = 0; scanstate->lossy_pages = 0; scanstate->prefetch_iterator = NULL; scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; /* may be updated below */ scanstate->prefetch_maximum = target_prefetch_pages; scanstate->pscan_len = 0; scanstate->initialized = false; scanstate->shared_tbmiterator = NULL; scanstate->shared_prefetch_iterator = NULL; scanstate->pstate = NULL; /* * We can potentially skip fetching heap pages if we do not need any * columns of the table, either for checking non-indexable quals or for * returning data. This test is a bit simplistic, as it checks the * stronger condition that there's no qual or return tlist at all. But in * most cases it's probably not worth working harder than that. */ scanstate->can_skip_fetch = (node->scan.plan.qual == NIL && node->scan.plan.targetlist == NIL); /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &scanstate->ss.ps); /* * open the scan relation */ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); /* * initialize child nodes */ outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); /* * get the scan type from the relation descriptor. */ ExecInitScanTupleSlot(estate, &scanstate->ss, RelationGetDescr(currentRelation), &TTSOpsBufferHeapTuple); /* * Initialize result type and projection. */ ExecInitResultTypeTL(&scanstate->ss.ps); ExecAssignScanProjectionInfo(&scanstate->ss); /* * initialize child expressions */ scanstate->ss.ps.qual = ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate); scanstate->bitmapqualorig = ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); /* * Determine the maximum for prefetch_target. If the tablespace has a * specific IO concurrency set, use that to compute the corresponding * maximum value; otherwise, we already initialized to the value computed * by the GUC machinery. */ io_concurrency = get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); if (io_concurrency != effective_io_concurrency) { double maximum; if (ComputeIoConcurrency(io_concurrency, &maximum)) scanstate->prefetch_maximum = rint(maximum); } scanstate->ss.ss_currentRelation = currentRelation; /* * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, estate->es_snapshot, 0, NULL); /* * all done. */ return scanstate; }