/* ---------------------------------------------------------------- * BitmapHeapNext * * Retrieve next tuple from the BitmapHeapScan node's currentRelation * ---------------------------------------------------------------- */ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { EState *estate; ExprContext *econtext; HeapScanDesc scandesc; Index scanrelid; TIDBitmap *tbm; TBMIterateResult *tbmres; OffsetNumber targoffset; TupleTableSlot *slot; OnDiskBitmapWords *odbm; ODBMIterateResult *odbmres; bool inmem = false; /* * extract necessary information from index scan node */ estate = node->ss.ps.state; econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; scandesc = node->ss.ss_currentScanDesc; scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid; tbm = node->tbm; tbmres = node->tbmres; odbm = node->odbm; odbmres = node->odbmres; /* * Clear any reference to the previously returned tuple. The idea here is * to not have the tuple slot be the last holder of a pin on that tuple's * buffer; if it is, we'll need a separate visit to the bufmgr to release * the buffer. By clearing here, we get to have the release done by * ReleaseAndReadBuffer, below. */ ExecClearTuple(slot); /* * Check if we are evaluating PlanQual for tuple of this relation. * Additional checking is not good, but no other way for now. We could * introduce new nodes for this case and handle IndexScan --> NewNode * switching in Init/ReScan plan... */ if (estate->es_evTuple != NULL && estate->es_evTuple[scanrelid - 1] != NULL) { if (estate->es_evTupleNull[scanrelid - 1]) return slot; /* return empty slot */ ExecStoreTuple(estate->es_evTuple[scanrelid - 1], slot, InvalidBuffer, false); /* Does the tuple meet the original qual conditions? */ econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) ExecClearTuple(slot); /* would not be returned by scan */ /* Flag for the next call that no more tuples */ estate->es_evTupleNull[scanrelid - 1] = true; return slot; } /* check if this requires in-mem bitmap scan or on-disk bitmap index. */ inmem = ((BitmapHeapScan*)(((PlanState*)node)->plan))->inmem; /* * If the underline indexes are on disk bitmap indexes */ if (!inmem) { uint64 nextTid = 0; if (odbm == NULL) { odbm = odbm_create(ODBM_MAX_WORDS); node->odbm = odbm; } if (odbmres == NULL) { odbmres = odbm_res_create(odbm); node->odbmres = odbmres; } for (;;) { /* If we have used up the words from previous scan, or we haven't scan the underlying index scan for wrods yet, then do it. */ if (odbm->numOfWords == 0 && odbmres->nextTidLoc >= odbmres->numOfTids) { Plan* outerPlan = (((PlanState*)node)->lefttree)->plan; odbm_set_bitmaptype(outerPlan, false); odbm->firstTid = odbmres->nextTid; odbm->startNo = 0; odbm_set_child_resultnode(((PlanState*)node)->lefttree, odbm); odbm = (OnDiskBitmapWords *) MultiExecProcNode(outerPlanState(node)); if (!odbm || !IsA(odbm, OnDiskBitmapWords)) elog(ERROR, "unrecognized result from subplan"); odbm_begin_iterate(node->odbm, node->odbmres); } /* If we can not find more words, then this scan is over. */ if (odbm == NULL || (odbm->numOfWords == 0 && odbmres->nextTidLoc >= odbmres->numOfTids)) return ExecClearTuple(slot); nextTid = odbm_findnexttid(odbm, odbmres); if (nextTid == 0) continue; ItemPointerSet(&scandesc->rs_ctup.t_self, (nextTid-1)/MaxNumHeapTuples, ((nextTid-1)%MaxNumHeapTuples)+1); /* fetch the heap tuple and see if it matches the snapshot. */ if (heap_release_fetch(scandesc->rs_rd, scandesc->rs_snapshot, &scandesc->rs_ctup, &scandesc->rs_cbuf, true, &scandesc->rs_pgstat_info)) { /* * Set up the result slot to point to this tuple. * Note that the slot acquires a pin on the buffer. */ ExecStoreTuple(&scandesc->rs_ctup, slot, scandesc->rs_cbuf, false); /* return this tuple */ return slot; } } } /* * If we haven't yet performed the underlying index scan, do it, and * prepare the bitmap to be iterated over. */ if (tbm == NULL) { tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); if (!tbm || !IsA(tbm, TIDBitmap)) elog(ERROR, "unrecognized result from subplan"); node->tbm = tbm; node->tbmres = tbmres = NULL; tbm_begin_iterate(tbm); } for (;;) { /* * Get next page of results if needed */ if (tbmres == NULL) { node->tbmres = tbmres = tbm_iterate(tbm); if (tbmres == NULL) { /* no more entries in the bitmap */ break; } /* * Ignore any claimed entries past what we think is the end of the * relation. (This is probably not necessary given that we got * AccessShareLock before performing any of the indexscans, but * let's be safe.) */ if (tbmres->blockno >= scandesc->rs_nblocks) { node->tbmres = tbmres = NULL; continue; } /* * Acquire pin on the current heap page. We'll hold the pin until * done looking at the page. We trade in any pin we held before. */ scandesc->rs_cbuf = ReleaseAndReadBuffer(scandesc->rs_cbuf, scandesc->rs_rd, tbmres->blockno); /* * Determine how many entries we need to look at on this page. If * the bitmap is lossy then we need to look at each physical item * pointer; otherwise we just look through the offsets listed in * tbmres. */ if (tbmres->ntuples >= 0) { /* non-lossy case */ node->minslot = 0; node->maxslot = tbmres->ntuples - 1; } else { /* lossy case */ Page dp; LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_SHARE); dp = (Page) BufferGetPage(scandesc->rs_cbuf); node->minslot = FirstOffsetNumber; node->maxslot = PageGetMaxOffsetNumber(dp); LockBuffer(scandesc->rs_cbuf, BUFFER_LOCK_UNLOCK); } /* * Set curslot to first slot to examine */ node->curslot = node->minslot; } else { /* * Continuing in previously obtained page; advance curslot */ node->curslot++; } /* * Out of range? If so, nothing more to look at on this page */ if (node->curslot < node->minslot || node->curslot > node->maxslot) { node->tbmres = tbmres = NULL; continue; } /* * Okay to try to fetch the tuple */ if (tbmres->ntuples >= 0) { /* non-lossy case */ targoffset = tbmres->offsets[node->curslot]; } else { /* lossy case */ targoffset = (OffsetNumber) node->curslot; } ItemPointerSet(&scandesc->rs_ctup.t_self, tbmres->blockno, targoffset); /* * Fetch the heap tuple and see if it matches the snapshot. We use * heap_release_fetch to avoid useless bufmgr traffic. */ if (heap_release_fetch(scandesc->rs_rd, scandesc->rs_snapshot, &scandesc->rs_ctup, &scandesc->rs_cbuf, true, &scandesc->rs_pgstat_info)) { /* * Set up the result slot to point to this tuple. Note that the * slot acquires a pin on the buffer. */ ExecStoreTuple(&scandesc->rs_ctup, slot, scandesc->rs_cbuf, false); /* * If we are using lossy info, we have to recheck the qual * conditions at every tuple. */ if (tbmres->ntuples < 0) { econtext->ecxt_scantuple = slot; ResetExprContext(econtext); if (!ExecQual(node->bitmapqualorig, econtext, false)) { /* Fails recheck, so drop it and loop back for another */ ExecClearTuple(slot); continue; } } /* OK to return this tuple */ return slot; } /* * Failed the snap, so loop back and try again. */ } /* * if we get here it means we are at the end of the scan.. */ return ExecClearTuple(slot); }
/* * Post vacuum, iterate over all entries in index, check if the h_tid * of each entry exists and is not dead. For specific system tables, * also ensure that the key in index entry matches the corresponding * attribute in the heap tuple. */ void _bt_validate_vacuum(Relation irel, Relation hrel, TransactionId oldest_xmin) { MIRROREDLOCK_BUFMGR_DECLARE; BlockNumber blkno; BlockNumber num_pages; Buffer ibuf = InvalidBuffer; Buffer hbuf = InvalidBuffer; Page ipage; BTPageOpaque opaque; IndexTuple itup; HeapTupleData htup; OffsetNumber maxoff, minoff, offnum; Oid ioid, hoid; bool isnull; blkno = BTREE_METAPAGE + 1; num_pages = RelationGetNumberOfBlocks(irel); elog(LOG, "btvalidatevacuum: index %s, heap %s", RelationGetRelationName(irel), RelationGetRelationName(hrel)); MIRROREDLOCK_BUFMGR_LOCK; for (; blkno < num_pages; blkno++) { ibuf = ReadBuffer(irel, blkno); ipage = BufferGetPage(ibuf); opaque = (BTPageOpaque) PageGetSpecialPointer(ipage); if (!PageIsNew(ipage)) _bt_checkpage(irel, ibuf); if (P_ISLEAF(opaque)) { minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(ipage); for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { itup = (IndexTuple) PageGetItem(ipage, PageGetItemId(ipage, offnum)); ItemPointerCopy(&itup->t_tid, &htup.t_self); /* * TODO: construct a tid bitmap based on index tids * and fetch heap tids in order afterwards. That will * also allow validating if a heap tid appears twice * in a unique index. */ if (!heap_release_fetch(hrel, SnapshotAny, &htup, &hbuf, true, NULL)) { elog(ERROR, "btvalidatevacuum: tid (%d,%d) from index %s " "not found in heap %s", ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel), RelationGetRelationName(hrel)); } switch (HeapTupleSatisfiesVacuum(hrel, htup.t_data, oldest_xmin, hbuf)) { case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_LIVE: case HEAPTUPLE_INSERT_IN_PROGRESS: case HEAPTUPLE_DELETE_IN_PROGRESS: /* these tuples are considered alive by vacuum */ break; case HEAPTUPLE_DEAD: elog(ERROR, "btvalidatevacuum: vacuum did not remove " "dead tuple (%d,%d) from heap %s and index %s", ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(hrel), RelationGetRelationName(irel)); break; default: elog(ERROR, "btvalidatevacuum: invalid visibility"); break; } switch(RelationGetRelid(irel)) { case DatabaseOidIndexId: case TypeOidIndexId: case ClassOidIndexId: case ConstraintOidIndexId: hoid = HeapTupleGetOid(&htup); ioid = index_getattr(itup, 1, RelationGetDescr(irel), &isnull); if (hoid != ioid) { elog(ERROR, "btvalidatevacuum: index oid(%d) != heap oid(%d)" " tuple (%d,%d) index %s", ioid, hoid, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } break; case GpRelationNodeOidIndexId: hoid = heap_getattr(&htup, 1, RelationGetDescr(hrel), &isnull); ioid = index_getattr(itup, 1, RelationGetDescr(irel), &isnull); if (hoid != ioid) { elog(ERROR, "btvalidatevacuum: index oid(%d) != heap oid(%d)" " tuple (%d,%d) index %s", ioid, hoid, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } int4 hsegno = heap_getattr(&htup, 2, RelationGetDescr(hrel), &isnull); int4 isegno = index_getattr(itup, 2, RelationGetDescr(irel), &isnull); if (isegno != hsegno) { elog(ERROR, "btvalidatevacuum: index segno(%d) != heap segno(%d)" " tuple (%d,%d) index %s", isegno, hsegno, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } break; default: break; } if (RelationGetNamespace(irel) == PG_AOSEGMENT_NAMESPACE) { int4 isegno = index_getattr(itup, 1, RelationGetDescr(irel), &isnull); int4 hsegno = heap_getattr(&htup, 1, RelationGetDescr(hrel), &isnull); if (isegno != hsegno) { elog(ERROR, "btvalidatevacuum: index segno(%d) != heap segno(%d)" " tuple (%d,%d) index %s", isegno, hsegno, ItemPointerGetBlockNumber(&itup->t_tid), ItemPointerGetOffsetNumber(&itup->t_tid), RelationGetRelationName(irel)); } } } } if (BufferIsValid(ibuf)) ReleaseBuffer(ibuf); } if (BufferIsValid(hbuf)) ReleaseBuffer(hbuf); MIRROREDLOCK_BUFMGR_UNLOCK; }
/* ---------------- * index_getnext - get the next heap tuple from a scan * * The result is the next heap tuple satisfying the scan keys and the * snapshot, or NULL if no more matching tuples exist. On success, * the buffer containing the heap tuple is pinned (the pin will be dropped * at the next index_getnext or index_endscan). * ---------------- */ HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { MIRROREDLOCK_BUFMGR_DECLARE; HeapTuple heapTuple = &scan->xs_ctup; FmgrInfo *procedure; SCAN_CHECKS; GET_SCAN_PROCEDURE(amgettuple); /* just make sure this is false... */ scan->kill_prior_tuple = false; for (;;) { bool found; /* * The AM's gettuple proc finds the next tuple matching the scan keys. */ found = DatumGetBool(FunctionCall2(procedure, PointerGetDatum(scan), Int32GetDatum(direction))); /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; if (!found) { /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) { ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } return NULL; /* failure exit */ } pgstat_count_index_tuples(scan->indexRelation, 1); /* * Fetch the heap tuple and see if it matches the snapshot. */ if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot, heapTuple, &scan->xs_cbuf, true, scan->indexRelation)) break; /* Skip if no undeleted tuple at this location */ if (heapTuple->t_data == NULL) continue; /* * If we can't see it, maybe no one else can either. Check to see if * the tuple is dead to all transactions. If so, signal the index AM * to not return it on future indexscans. * * We told heap_release_fetch to keep a pin on the buffer, so we can * re-access the tuple here. But we must re-lock the buffer first. */ // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin, scan->xs_cbuf, true) == HEAPTUPLE_DEAD) scan->kill_prior_tuple = true; LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- } /* Success exit */ return heapTuple; }