/* * Start remote transaction or subtransaction, if needed. * * Note that we always use at least REPEATABLE READ in the remote session. * This is so that, if a query initiates multiple scans of the same or * different foreign tables, we will get snapshot-consistent results from * those scans. A disadvantage is that we can't provide sane emulation of * READ COMMITTED behavior --- it would be nice if we had some other way to * control which remote queries share a snapshot. */ static void begin_remote_xact(ConnCacheEntry *entry) { int curlevel = GetCurrentTransactionNestLevel(); /* Start main transaction if we haven't yet */ if (entry->xact_depth <= 0) { const char *sql; elog(DEBUG3, "starting remote transaction on connection %p", entry->conn); if (IsolationIsSerializable()) sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE"; else sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ"; do_sql_command(entry->conn, sql); entry->xact_depth = 1; } /* * If we're in a subtransaction, stack up savepoints to match our level. * This ensures we can rollback just the desired effects when a * subtransaction aborts. */ while (entry->xact_depth < curlevel) { char sql[64]; snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1); do_sql_command(entry->conn, sql); entry->xact_depth++; } }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(RegisteredSnapshots == 0); Assert(FirstXactSnapshot == NULL); /* * In transaction-snapshot mode, the first snapshot must live until * end of xact regardless of what the caller does with it, so we must * make a copy of it rather than returning CurrentSnapshotData * directly. Furthermore, if we're running in serializable mode, * predicate.c needs to wrap the snapshot fetch in its own processing. */ if (IsolationUsesXactSnapshot()) { /* First, create the snapshot in CurrentSnapshotData */ if (IsolationIsSerializable()) CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData); else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* Make a saved copy */ CurrentSnapshot = CopySnapshot(CurrentSnapshot); FirstXactSnapshot = CurrentSnapshot; /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; RegisteredSnapshots++; } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); FirstSnapshotSet = true; return CurrentSnapshot; } if (IsolationUsesXactSnapshot()) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(RegisteredSnapshots == 0); /* * In transaction-snapshot mode, the first snapshot must live until * end of xact regardless of what the caller does with it, so we must * register it internally here and unregister it at end of xact. */ if (IsolationUsesXactSnapshot()) { if (IsolationIsSerializable()) CurrentSnapshot = RegisterSerializableTransaction(&CurrentSnapshotData); else { CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot, TopTransactionResourceOwner); } registered_xact_snapshot = true; } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); FirstSnapshotSet = true; return CurrentSnapshot; } if (IsolationUsesXactSnapshot()) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }
/* * ImportSnapshot * Import a previously exported snapshot. The argument should be a * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file. * This is called by "SET TRANSACTION SNAPSHOT 'foo'". */ void ImportSnapshot(const char *idstr) { char path[MAXPGPATH]; FILE *f; struct stat stat_buf; char *filebuf; int xcnt; int i; TransactionId src_xid; Oid src_dbid; int src_isolevel; bool src_readonly; SnapshotData snapshot; /* * Must be at top level of a fresh transaction. Note in particular that * we check we haven't acquired an XID --- if we have, it's conceivable * that the snapshot would show it as not running, making for very * screwy behavior. */ if (FirstSnapshotSet || GetTopTransactionIdIfAny() != InvalidTransactionId || IsSubTransaction()) ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("SET TRANSACTION SNAPSHOT must be called before any query"))); /* * If we are in read committed mode then the next query would execute * with a new snapshot thus making this function call quite useless. */ if (!IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ"))); /* * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do * this mainly to prevent reading arbitrary files. */ if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid snapshot identifier \"%s\"", idstr))); /* OK, read the file */ snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr); f = AllocateFile(path, PG_BINARY_R); if (!f) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid snapshot identifier \"%s\"", idstr))); /* get the size of the file so that we know how much memory we need */ if (fstat(fileno(f), &stat_buf)) elog(ERROR, "could not stat file \"%s\": %m", path); /* and read the file into a palloc'd string */ filebuf = (char *) palloc(stat_buf.st_size + 1); if (fread(filebuf, stat_buf.st_size, 1, f) != 1) elog(ERROR, "could not read file \"%s\": %m", path); filebuf[stat_buf.st_size] = '\0'; FreeFile(f); /* * Construct a snapshot struct by parsing the file content. */ memset(&snapshot, 0, sizeof(snapshot)); src_xid = parseXidFromText("xid:", &filebuf, path); /* we abuse parseXidFromText a bit here ... */ src_dbid = parseXidFromText("dbid:", &filebuf, path); src_isolevel = parseIntFromText("iso:", &filebuf, path); src_readonly = parseIntFromText("ro:", &filebuf, path); snapshot.xmin = parseXidFromText("xmin:", &filebuf, path); snapshot.xmax = parseXidFromText("xmax:", &filebuf, path); snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path); /* sanity-check the xid count before palloc */ if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount()) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", path))); snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId)); for (i = 0; i < xcnt; i++) snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path); snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path); if (!snapshot.suboverflowed) { snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path); /* sanity-check the xid count before palloc */ if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount()) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", path))); snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId)); for (i = 0; i < xcnt; i++) snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path); } else { snapshot.subxcnt = 0; snapshot.subxip = NULL; } snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path); /* * Do some additional sanity checking, just to protect ourselves. We * don't trouble to check the array elements, just the most critical * fields. */ if (!TransactionIdIsNormal(src_xid) || !OidIsValid(src_dbid) || !TransactionIdIsNormal(snapshot.xmin) || !TransactionIdIsNormal(snapshot.xmax)) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", path))); /* * If we're serializable, the source transaction must be too, otherwise * predicate.c has problems (SxactGlobalXmin could go backwards). Also, * a non-read-only transaction can't adopt a snapshot from a read-only * transaction, as predicate.c handles the cases very differently. */ if (IsolationIsSerializable()) { if (src_isolevel != XACT_SERIALIZABLE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction"))); if (src_readonly && !XactReadOnly) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction"))); } /* * We cannot import a snapshot that was taken in a different database, * because vacuum calculates OldestXmin on a per-database basis; so the * source transaction's xmin doesn't protect us from data loss. This * restriction could be removed if the source transaction were to mark * its xmin as being globally applicable. But that would require some * additional syntax, since that has to be known when the snapshot is * initially taken. (See pgsql-hackers discussion of 2011-10-21.) */ if (src_dbid != MyDatabaseId) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot import a snapshot from a different database"))); /* OK, install the snapshot */ SetTransactionSnapshot(&snapshot, src_xid); }
/* * SetTransactionSnapshot * Set the transaction's snapshot from an imported MVCC snapshot. * * Note that this is very closely tied to GetTransactionSnapshot --- it * must take care of all the same considerations as the first-snapshot case * in GetTransactionSnapshot. */ static void SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid) { /* Caller should have checked this already */ Assert(!FirstSnapshotSet); Assert(RegisteredSnapshots == 0); Assert(FirstXactSnapshot == NULL); /* * Even though we are not going to use the snapshot it computes, we must * call GetSnapshotData, for two reasons: (1) to be sure that * CurrentSnapshotData's XID arrays have been allocated, and (2) to update * RecentXmin and RecentGlobalXmin. (We could alternatively include those * two variables in exported snapshot files, but it seems better to have * snapshot importers compute reasonably up-to-date values for them.) */ CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* * Now copy appropriate fields from the source snapshot. */ CurrentSnapshot->xmin = sourcesnap->xmin; CurrentSnapshot->xmax = sourcesnap->xmax; CurrentSnapshot->xcnt = sourcesnap->xcnt; Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount()); memcpy(CurrentSnapshot->xip, sourcesnap->xip, sourcesnap->xcnt * sizeof(TransactionId)); CurrentSnapshot->subxcnt = sourcesnap->subxcnt; Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount()); memcpy(CurrentSnapshot->subxip, sourcesnap->subxip, sourcesnap->subxcnt * sizeof(TransactionId)); CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed; CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery; /* NB: curcid should NOT be copied, it's a local matter */ /* * Now we have to fix what GetSnapshotData did with MyProc->xmin and * TransactionXmin. There is a race condition: to make sure we are not * causing the global xmin to go backwards, we have to test that the * source transaction is still running, and that has to be done atomically. * So let procarray.c do it. * * Note: in serializable mode, predicate.c will do this a second time. * It doesn't seem worth contorting the logic here to avoid two calls, * especially since it's not clear that predicate.c *must* do this. */ if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcexid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not import the requested snapshot"), errdetail("The source transaction %u is not running anymore.", sourcexid))); /* * In transaction-snapshot mode, the first snapshot must live until end of * xact, so we must make a copy of it. Furthermore, if we're running in * serializable mode, predicate.c needs to do its own processing. */ if (IsolationUsesXactSnapshot()) { if (IsolationIsSerializable()) SetSerializableTransactionSnapshot(CurrentSnapshot, sourcexid); /* Make a saved copy */ CurrentSnapshot = CopySnapshot(CurrentSnapshot); FirstXactSnapshot = CurrentSnapshot; /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; RegisteredSnapshots++; } FirstSnapshotSet = true; }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* * Return historic snapshot if doing logical decoding. We'll never need a * non-historic transaction snapshot in this (sub-)transaction, so there's * no need to be careful to set one up for later calls to * GetTransactionSnapshot(). */ if (HistoricSnapshotActive()) { Assert(!FirstSnapshotSet); return HistoricSnapshot; } /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(pairingheap_is_empty(&RegisteredSnapshots)); Assert(FirstXactSnapshot == NULL); if (IsInParallelMode()) elog(ERROR, "cannot take query snapshot during a parallel operation"); /* * In transaction-snapshot mode, the first snapshot must live until * end of xact regardless of what the caller does with it, so we must * make a copy of it rather than returning CurrentSnapshotData * directly. Furthermore, if we're running in serializable mode, * predicate.c needs to wrap the snapshot fetch in its own processing. */ if (IsolationUsesXactSnapshot()) { /* First, create the snapshot in CurrentSnapshotData */ if (IsolationIsSerializable()) CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData); else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* Make a saved copy */ CurrentSnapshot = CopySnapshot(CurrentSnapshot); FirstXactSnapshot = CurrentSnapshot; /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node); } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* Don't allow catalog snapshot to be older than xact snapshot. */ CatalogSnapshotStale = true; FirstSnapshotSet = true; return CurrentSnapshot; } if (IsolationUsesXactSnapshot()) return CurrentSnapshot; /* Don't allow catalog snapshot to be older than xact snapshot. */ CatalogSnapshotStale = true; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(RegisteredSnapshots == 0); Assert(FirstXactSnapshot == NULL); /* * In transaction-snapshot mode, the first snapshot must live until * end of xact regardless of what the caller does with it, so we must * make a copy of it rather than returning CurrentSnapshotData * directly. Furthermore, if we're running in serializable mode, * predicate.c needs to wrap the snapshot fetch in its own processing. */ if (IsolationUsesXactSnapshot()) { /* First, create the snapshot in CurrentSnapshotData */ if (IsolationIsSerializable()) CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData); else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* Make a saved copy */ CurrentSnapshot = CopySnapshot(CurrentSnapshot); FirstXactSnapshot = CurrentSnapshot; /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; RegisteredSnapshots++; } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); FirstSnapshotSet = true; return CurrentSnapshot; } if (IsolationUsesXactSnapshot()) { #ifdef PGXC /* * Consider this test case taken from portals.sql * * CREATE TABLE cursor (a int, b int) distribute by replication; * INSERT INTO cursor VALUES (10); * BEGIN; * SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; * DECLARE c1 NO SCROLL CURSOR FOR SELECT * FROM cursor FOR UPDATE; * INSERT INTO cursor VALUES (2); * FETCH ALL FROM c1; * would result in * ERROR: attempted to lock invisible tuple * because FETCH would be sent as a select to the remote nodes * with command id 0, whereas the command id would be 2 * in the current snapshot. * (1 sent by Coordinator due to declare cursor & * 2 because of the insert inside the transaction) * The command id should therefore be updated in the * current snapshot. */ if (IsConnFromCoord()) SnapshotSetCommandId(GetCurrentCommandId(false)); #endif return CurrentSnapshot; } CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }
/* ---------------- * index_getnext - get the next heap tuple from a scan * * The result is the next heap tuple satisfying the scan keys and the * snapshot, or NULL if no more matching tuples exist. On success, * the buffer containing the heap tuple is pinned (the pin will be dropped * at the next index_getnext or index_endscan). * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction) { HeapTuple heapTuple = &scan->xs_ctup; ItemPointer tid = &heapTuple->t_self; FmgrInfo *procedure; SCAN_CHECKS; GET_SCAN_PROCEDURE(amgettuple); Assert(TransactionIdIsValid(RecentGlobalXmin)); /* * We always reset xs_hot_dead; if we are here then either we are just * starting the scan, or we previously returned a visible tuple, and in * either case it's inappropriate to kill the prior index entry. */ scan->xs_hot_dead = false; for (;;) { OffsetNumber offnum; bool at_chain_start; Page dp; if (scan->xs_next_hot != InvalidOffsetNumber) { /* * We are resuming scan of a HOT chain after having returned an * earlier member. Must still hold pin on current heap page. */ Assert(BufferIsValid(scan->xs_cbuf)); Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(scan->xs_cbuf)); Assert(TransactionIdIsValid(scan->xs_prev_xmax)); offnum = scan->xs_next_hot; at_chain_start = false; scan->xs_next_hot = InvalidOffsetNumber; } else { bool found; Buffer prev_buf; /* * If we scanned a whole HOT chain and found only dead tuples, * tell index AM to kill its entry for that TID. We do not do this * when in recovery because it may violate MVCC to do so. see * comments in RelationGetIndexScan(). */ if (!scan->xactStartedInRecovery) scan->kill_prior_tuple = scan->xs_hot_dead; /* * The AM's gettuple proc finds the next index entry matching the * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It * should also set scan->xs_recheck, though we pay no attention to * that here. */ found = DatumGetBool(FunctionCall2(procedure, PointerGetDatum(scan), Int32GetDatum(direction))); /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; /* If we're out of index entries, break out of outer loop */ if (!found) break; pgstat_count_index_tuples(scan->indexRelation, 1); /* Switch to correct buffer if we don't have it already */ prev_buf = scan->xs_cbuf; scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, scan->heapRelation, ItemPointerGetBlockNumber(tid)); /* * Prune page, but only if we weren't already on this page */ if (prev_buf != scan->xs_cbuf) heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf, RecentGlobalXmin); /* Prepare to scan HOT chain starting at index-referenced offnum */ offnum = ItemPointerGetOffsetNumber(tid); at_chain_start = true; /* We don't know what the first tuple's xmin should be */ scan->xs_prev_xmax = InvalidTransactionId; /* Initialize flag to detect if all entries are dead */ scan->xs_hot_dead = true; } /* Obtain share-lock on the buffer so we can examine visibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); dp = (Page) BufferGetPage(scan->xs_cbuf); /* Scan through possible multiple members of HOT-chain */ for (;;) { ItemId lp; ItemPointer ctid; bool valid; /* check for bogus TID */ if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) break; lp = PageGetItemId(dp, offnum); /* check for unused, dead, or redirected items */ if (!ItemIdIsNormal(lp)) { /* We should only see a redirect at start of chain */ if (ItemIdIsRedirected(lp) && at_chain_start) { /* Follow the redirect */ offnum = ItemIdGetRedirect(lp); at_chain_start = false; continue; } /* else must be end of chain */ break; } /* * We must initialize all of *heapTuple (ie, scan->xs_ctup) since * it is returned to the executor on success. */ heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); heapTuple->t_len = ItemIdGetLength(lp); ItemPointerSetOffsetNumber(tid, offnum); heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation); ctid = &heapTuple->t_data->t_ctid; /* * Shouldn't see a HEAP_ONLY tuple at chain start. (This test * should be unnecessary, since the chain root can't be removed * while we have pin on the index entry, but let's make it * anyway.) */ if (at_chain_start && HeapTupleIsHeapOnly(heapTuple)) break; /* * The xmin should match the previous xmax value, else chain is * broken. (Note: this test is not optional because it protects * us against the case where the prior chain member's xmax aborted * since we looked at it.) */ if (TransactionIdIsValid(scan->xs_prev_xmax) && !TransactionIdEquals(scan->xs_prev_xmax, HeapTupleHeaderGetXmin(heapTuple->t_data))) break; /* If it's visible per the snapshot, we must return it */ valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot, scan->xs_cbuf); CheckForSerializableConflictOut(valid, scan->heapRelation, heapTuple, scan->xs_cbuf); if (valid) { /* * If the snapshot is MVCC, we know that it could accept at * most one member of the HOT chain, so we can skip examining * any more members. Otherwise, check for continuation of the * HOT-chain, and set state for next time. */ if (IsMVCCSnapshot(scan->xs_snapshot) && !IsolationIsSerializable()) scan->xs_next_hot = InvalidOffsetNumber; else if (HeapTupleIsHotUpdated(heapTuple)) { Assert(ItemPointerGetBlockNumber(ctid) == ItemPointerGetBlockNumber(tid)); scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid); scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data); } else scan->xs_next_hot = InvalidOffsetNumber; PredicateLockTuple(scan->heapRelation, heapTuple); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); pgstat_count_heap_fetch(scan->indexRelation); return heapTuple; } /* * If we can't see it, maybe no one else can either. Check to see * if the tuple is dead to all transactions. If we find that all * the tuples in the HOT chain are dead, we'll signal the index AM * to not return that TID on future indexscans. */ if (scan->xs_hot_dead && HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin, scan->xs_cbuf) != HEAPTUPLE_DEAD) scan->xs_hot_dead = false; /* * Check to see if HOT chain continues past this tuple; if so * fetch the next offnum (we don't bother storing it into * xs_next_hot, but must store xs_prev_xmax), and loop around. */ if (HeapTupleIsHotUpdated(heapTuple)) { Assert(ItemPointerGetBlockNumber(ctid) == ItemPointerGetBlockNumber(tid)); offnum = ItemPointerGetOffsetNumber(ctid); at_chain_start = false; scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data); } else break; /* end of chain */ } /* loop over a single HOT chain */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); /* Loop around to ask index AM for another TID */ scan->xs_next_hot = InvalidOffsetNumber; } /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) { ReleaseBuffer(scan->xs_cbuf); scan->xs_cbuf = InvalidBuffer; } return NULL; /* failure exit */ }