/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; HeapScanDesc scan; TIDBitmap *tbm; TBMIterator *tbmiterator; TBMIterateResult *tbmres; #ifdef USE_PREFETCH TBMIterator *prefetch_iterator; #endif OffsetNumber targoffset; TupleTableSlot *slot; /* * extract necessary information from index scan node */ econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; tbm = node->tbm; tbmiterator = node->tbmiterator; tbmres = node->tbmres; #ifdef USE_PREFETCH prefetch_iterator = node->prefetch_iterator; #endif /* * If we haven't yet performed the underlying index scan, do it, and begin * the iteration over the bitmap. * * For prefetching, we use *two* iterators, one for the pages we are * actually scanning and another that runs ahead of the first for * prefetching. node->prefetch_pages tracks exactly how many pages ahead * the prefetch iterator is. Also, node->prefetch_target tracks the * desired prefetch distance, which starts small and increases up to the * node->prefetch_maximum. This is to avoid doing a lot of prefetching in * a scan that stops after a few tuples because of a LIMIT. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) { node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm); node->prefetch_pages = 0; node->prefetch_target = -1; } #endif /* USE_PREFETCH */ } for (;;) { Page dp; ItemId lp; /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbmiterator); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } #ifdef USE_PREFETCH if (node->prefetch_pages > 0) { /* The main iterator has closed the distance by one page */ node->prefetch_pages--; } else if (prefetch_iterator) { /* Do not let the prefetch iterator get behind the main one */ TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) elog(ERROR, "prefetch and main iterators are out of sync"); } #endif /* USE_PREFETCH */ /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); if (tbmres->ntuples >= 0) node->exact_pages++; else node->lossy_pages++; /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; #ifdef USE_PREFETCH /* * Increase prefetch target if it's not yet at the max. Note that * we will increase it to zero after fetching the very first * page/tuple, then to one after the second tuple is fetched, then * it doubles as later pages are fetched. */ if (node->prefetch_target >= node->prefetch_maximum) /* don't increase any further */ ; else if (node->prefetch_target >= node->prefetch_maximum / 2) node->prefetch_target = node->prefetch_maximum; else if (node->prefetch_target > 0) node->prefetch_target *= 2; else node->prefetch_target++; #endif /* USE_PREFETCH */ } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; #ifdef USE_PREFETCH /* * Try to prefetch at least a few pages even before we get to the * second page if we don't stop reading after the first tuple. */ if (node->prefetch_target < node->prefetch_maximum) node->prefetch_target++; #endif /* USE_PREFETCH */ } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } #ifdef USE_PREFETCH /* * We issue prefetch requests *after* fetching the current page to try * to avoid having prefetching interfere with the main I/O. Also, this * should happen only when we have determined there is still something * to do on the current page, else we may uselessly prefetch the same * page we are just about to request for real. */ if (prefetch_iterator) { while (node->prefetch_pages < node->prefetch_target) { TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); if (tbmpre == NULL) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); node->prefetch_iterator = prefetch_iterator = NULL; break; } node->prefetch_pages++; PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } #endif /* USE_PREFETCH */ /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = BufferGetPage(scan->rs_cbuf, NULL, NULL, BGP_NO_SNAPSHOT_TEST); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ InstrCountFiltered2(node, 1); ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { EState *estate; ExprContext *econtext; HeapScanDesc scan; Index scanrelid; TIDBitmap *tbm; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; tbmres = node->tbmres; /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) return ExecClearTuple(slot); ExecStoreTuple(estate->es_evTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) ExecClearTuple(slot); /* would not be returned by scan */ /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } /* * If we haven't yet performed the underlying index scan, do it, and * prepare the bitmap to be iterated over. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmres = tbmres = NULL; tbm_begin_iterate(tbm); } for (;;) { Page dp; ItemId lp; /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbm); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = (Page) BufferGetPage(scan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsUsed(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(&scan->rs_pgstat_info); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->ntuples < 0) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; HeapScanDesc scan; TIDBitmap *tbm; TBMIterator *tbmiterator = NULL; TBMSharedIterator *shared_tbmiterator = NULL; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; ParallelBitmapHeapState *pstate = node->pstate; dsa_area *dsa = node->ss.ps.state->es_query_dsa; /* * extract necessary information from index scan node */ econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scan = node->ss.ss_currentScanDesc; tbm = node->tbm; if (pstate == NULL) tbmiterator = node->tbmiterator; else shared_tbmiterator = node->shared_tbmiterator; tbmres = node->tbmres; /* * If we haven't yet performed the underlying index scan, do it, and begin * the iteration over the bitmap. * * For prefetching, we use *two* iterators, one for the pages we are * actually scanning and another that runs ahead of the first for * prefetching. node->prefetch_pages tracks exactly how many pages ahead * the prefetch iterator is. Also, node->prefetch_target tracks the * desired prefetch distance, which starts small and increases up to the * node->prefetch_maximum. This is to avoid doing a lot of prefetching in * a scan that stops after a few tuples because of a LIMIT. */ if (!node->initialized) { if (!pstate) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) { node->prefetch_iterator = tbm_begin_iterate(tbm); node->prefetch_pages = 0; node->prefetch_target = -1; } #endif /* USE_PREFETCH */ } else { /* * The leader will immediately come out of the function, but * others will be blocked until leader populates the TBM and wakes * them up. */ if (BitmapShouldInitializeSharedState(pstate)) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; /* * Prepare to iterate over the TBM. This will return the * dsa_pointer of the iterator state which will be used by * multiple processes to iterate jointly. */ pstate->tbmiterator = tbm_prepare_shared_iterate(tbm); #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) { pstate->prefetch_iterator = tbm_prepare_shared_iterate(tbm); /* * We don't need the mutex here as we haven't yet woke up * others. */ pstate->prefetch_pages = 0; pstate->prefetch_target = -1; } #endif /* We have initialized the shared state so wake up others. */ BitmapDoneInitializingSharedState(pstate); } /* Allocate a private iterator and attach the shared state to it */ node->shared_tbmiterator = shared_tbmiterator = tbm_attach_shared_iterate(dsa, pstate->tbmiterator); node->tbmres = tbmres = NULL; #ifdef USE_PREFETCH if (node->prefetch_maximum > 0) { node->shared_prefetch_iterator = tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator); } #endif /* USE_PREFETCH */ } node->initialized = true; } for (;;) { Page dp; ItemId lp; CHECK_FOR_INTERRUPTS(); /* * Get next page of results if needed */ if (tbmres == NULL) { if (!pstate) node->tbmres = tbmres = tbm_iterate(tbmiterator); else node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } BitmapAdjustPrefetchIterator(node, tbmres); /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got at * least AccessShareLock on the table before performing any of the * indexscans, but let's be safe.) */ if (tbmres->blockno >= scan->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Fetch the current heap page and identify candidate tuples. */ bitgetpage(scan, tbmres); if (tbmres->ntuples >= 0) node->exact_pages++; else node->lossy_pages++; /* * Set rs_cindex to first slot to examine */ scan->rs_cindex = 0; /* Adjust the prefetch target */ BitmapAdjustPrefetchTarget(node); } else { /* * Continuing in previously obtained page; advance rs_cindex */ scan->rs_cindex++; #ifdef USE_PREFETCH /* * Try to prefetch at least a few pages even before we get to the * second page if we don't stop reading after the first tuple. */ if (!pstate) { if (node->prefetch_target < node->prefetch_maximum) node->prefetch_target++; } else if (pstate->prefetch_target < node->prefetch_maximum) { /* take spinlock while updating shared state */ SpinLockAcquire(&pstate->mutex); if (pstate->prefetch_target < node->prefetch_maximum) pstate->prefetch_target++; SpinLockRelease(&pstate->mutex); } #endif /* USE_PREFETCH */ } /* * Out of range? If so, nothing more to look at on this page */ if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) { node->tbmres = tbmres = NULL; continue; } /* * We issue prefetch requests *after* fetching the current page to try * to avoid having prefetching interfere with the main I/O. Also, this * should happen only when we have determined there is still something * to do on the current page, else we may uselessly prefetch the same * page we are just about to request for real. */ BitmapPrefetch(node, scan); /* * Okay to fetch the tuple */ targoffset = scan->rs_vistuples[scan->rs_cindex]; dp = (Page) BufferGetPage(scan->rs_cbuf); lp = PageGetItemId(dp, targoffset); Assert(ItemIdIsNormal(lp)); scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ ExecStoreTuple(&scan->rs_ctup, slot, scan->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual conditions * at every tuple. */ if (tbmres->recheck) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext)) { /* Fails recheck, so drop it and loop back for another */ InstrCountFiltered2(node, 1); ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target */ static inline void BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) { #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = node->pstate; if (pstate == NULL) { TBMIterator *prefetch_iterator = node->prefetch_iterator; if (prefetch_iterator) { while (node->prefetch_pages < node->prefetch_target) { TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); if (tbmpre == NULL) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); node->prefetch_iterator = NULL; break; } node->prefetch_pages++; PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } return; } if (pstate->prefetch_pages < pstate->prefetch_target) { TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; if (prefetch_iterator) { while (1) { TBMIterateResult *tbmpre; bool do_prefetch = false; /* * Recheck under the mutex. If some other process has already * done enough prefetching then we need not to do anything. */ SpinLockAcquire(&pstate->mutex); if (pstate->prefetch_pages < pstate->prefetch_target) { pstate->prefetch_pages++; do_prefetch = true; } SpinLockRelease(&pstate->mutex); if (!do_prefetch) return; tbmpre = tbm_shared_iterate(prefetch_iterator); if (tbmpre == NULL) { /* No more pages to prefetch */ tbm_end_shared_iterate(prefetch_iterator); node->shared_prefetch_iterator = NULL; break; } PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } } #endif /* USE_PREFETCH */ }
/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { EState *estate; ExprContext *econtext; HeapScanDesc scandesc; Index scanrelid; TIDBitmap *tbm; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; OnDiskBitmapWords *odbm; ODBMIterateResult *odbmres; bool inmem = false; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scandesc = node->ss.ss_currentScanDesc; scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; tbmres = node->tbmres; odbm = node->odbm; odbmres = node->odbmres; /* * Clear any reference to the previously returned tuple. The idea here is * to not have the tuple slot be the last holder of a pin on that tuple's * buffer; if it is, we'll need a separate visit to the bufmgr to release * the buffer. By clearing here, we get to have the release done by * ReleaseAndReadBuffer, below. */ ExecClearTuple(slot); /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) return slot; /* return empty slot */ ExecStoreTuple(estate->es_evTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) ExecClearTuple(slot); /* would not be returned by scan */ /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } /* check if this requires in-mem bitmap scan or on-disk bitmap index. */ inmem = ((BitmapHeapScan*)(((PlanState*)node)->plan))->inmem; /* * If the underline indexes are on disk bitmap indexes */ if (!inmem) { uint64 nextTid = 0; if (odbm == NULL) { odbm = odbm_create(ODBM_MAX_WORDS); node->odbm = odbm; } if (odbmres == NULL) { odbmres = odbm_res_create(odbm); node->odbmres = odbmres; } for (;;) { /* If we have used up the words from previous scan, or we haven't scan the underlying index scan for wrods yet, then do it. */ if (odbm->numOfWords == 0 && odbmres->nextTidLoc >= odbmres->numOfTids) { Plan* outerPlan = (((PlanState*)node)->lefttree)->plan; odbm_set_bitmaptype(outerPlan, false); odbm->firstTid = odbmres->nextTid; odbm->startNo = 0; odbm_set_child_resultnode(((PlanState*)node)->lefttree, odbm); odbm = (OnDiskBitmapWords *) MultiExecProcNode(outerPlanState(node)); if (!odbm || !IsA(odbm, OnDiskBitmapWords)) elog(ERROR, "unrecognized result from subplan"); odbm_begin_iterate(node->odbm, node->odbmres); } /* If we can not find more words, then this scan is over. */ if (odbm == NULL || (odbm->numOfWords == 0 && odbmres->nextTidLoc >= odbmres->numOfTids)) return ExecClearTuple(slot); nextTid = odbm_findnexttid(odbm, odbmres); if (nextTid == 0) continue; ItemPointerSet(&scandesc->rs_ctup.t_self, (nextTid-1)/MaxNumHeapTuples, ((nextTid-1)%MaxNumHeapTuples)+1); /* fetch the heap tuple and see if it matches the snapshot. */ if (heap_release_fetch(scandesc->rs_rd, scandesc->rs_snapshot, &scandesc->rs_ctup, &scandesc->rs_cbuf, true, &scandesc->rs_pgstat_info)) { /* * Set up the result slot to point to this tuple. * Note that the slot acquires a pin on the buffer. */ ExecStoreTuple(&scandesc->rs_ctup, slot, scandesc->rs_cbuf, false); /* return this tuple */ return slot; } } } /* * If we haven't yet performed the underlying index scan, do it, and * prepare the bitmap to be iterated over. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmres = tbmres = NULL; tbm_begin_iterate(tbm); } for (;;) { /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbm); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got * AccessShareLock before performing any of the indexscans, but * let's be safe.) */ if (tbmres->blockno >= scandesc->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Acquire pin on the current heap page. We'll hold the pin until * done looking at the page. We trade in any pin we held before. */ scandesc->rs_cbuf = ReleaseAndReadBuffer(scandesc->rs_cbuf, scandesc->rs_rd, tbmres->blockno); /* * Determine how many entries we need to look at on this page. If * the bitmap is lossy then we need to look at each physical item * pointer; otherwise we just look through the offsets listed in * tbmres. */ if (tbmres->ntuples >= 0) { /* non-lossy case */ node->minslot = 0; node->maxslot = tbmres->ntuples - 1; } else { /* lossy case */ Page dp; LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_SHARE); dp = (Page) BufferGetPage(scandesc->rs_cbuf); node->minslot = FirstOffsetNumber; node->maxslot = PageGetMaxOffsetNumber(dp); LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_UNLOCK); } /* * Set curslot to first slot to examine */ node->curslot = node->minslot; } else { /* * Continuing in previously obtained page; advance curslot */ node->curslot++; } /* * Out of range? If so, nothing more to look at on this page */ if (node->curslot < node->minslot || node->curslot > node->maxslot) { node->tbmres = tbmres = NULL; continue; } /* * Okay to try to fetch the tuple */ if (tbmres->ntuples >= 0) { /* non-lossy case */ targoffset = tbmres->offsets[node->curslot]; } else { /* lossy case */ targoffset = (OffsetNumber) node->curslot; } ItemPointerSet(&scandesc->rs_ctup.t_self, tbmres->blockno, targoffset); /* * Fetch the heap tuple and see if it matches the snapshot. We use * heap_release_fetch to avoid useless bufmgr traffic. */ if (heap_release_fetch(scandesc->rs_rd, scandesc->rs_snapshot, &scandesc->rs_ctup, &scandesc->rs_cbuf, true, &scandesc->rs_pgstat_info)) { /* * Set up the result slot to point to this tuple. Note that the * slot acquires a pin on the buffer. */ ExecStoreTuple(&scandesc->rs_ctup, slot, scandesc->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual * conditions at every tuple. */ if (tbmres->ntuples < 0) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * Failed the snap, so loop back and try again. */ } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target */ static inline void BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) { #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = node->pstate; if (pstate == NULL) { TBMIterator *prefetch_iterator = node->prefetch_iterator; if (prefetch_iterator) { while (node->prefetch_pages < node->prefetch_target) { TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); bool skip_fetch; if (tbmpre == NULL) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); node->prefetch_iterator = NULL; break; } node->prefetch_pages++; /* * If we expect not to have to actually read this heap page, * skip this prefetch call, but continue to run the prefetch * logic normally. (Would it be better not to increment * prefetch_pages?) * * This depends on the assumption that the index AM will * report the same recheck flag for this future heap page as * it did for the current heap page; which is not a certainty * but is true in many cases. */ skip_fetch = (node->can_skip_fetch && (node->tbmres ? !node->tbmres->recheck : false) && VM_ALL_VISIBLE(node->ss.ss_currentRelation, tbmpre->blockno, &node->pvmbuffer)); if (!skip_fetch) PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } return; } if (pstate->prefetch_pages < pstate->prefetch_target) { TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; if (prefetch_iterator) { while (1) { TBMIterateResult *tbmpre; bool do_prefetch = false; bool skip_fetch; /* * Recheck under the mutex. If some other process has already * done enough prefetching then we need not to do anything. */ SpinLockAcquire(&pstate->mutex); if (pstate->prefetch_pages < pstate->prefetch_target) { pstate->prefetch_pages++; do_prefetch = true; } SpinLockRelease(&pstate->mutex); if (!do_prefetch) return; tbmpre = tbm_shared_iterate(prefetch_iterator); if (tbmpre == NULL) { /* No more pages to prefetch */ tbm_end_shared_iterate(prefetch_iterator); node->shared_prefetch_iterator = NULL; break; } /* As above, skip prefetch if we expect not to need page */ skip_fetch = (node->can_skip_fetch && (node->tbmres ? !node->tbmres->recheck : false) && VM_ALL_VISIBLE(node->ss.ss_currentRelation, tbmpre->blockno, &node->pvmbuffer)); if (!skip_fetch) PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } } #endif /* USE_PREFETCH */ }