/* * Decide whether a cached PLyProcedure struct is still valid */ static bool PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup) { int i; bool valid; Assert(proc != NULL); /* If the pg_proc tuple has changed, it's not valid */ if (!(proc->fn_xmin == HeapTupleHeaderGetXmin(procTup->t_data) && ItemPointerEquals(&proc->fn_tid, &procTup->t_self))) return false; /* Else check the input argument datatypes */ valid = true; for (i = 0; i < proc->nargs; i++) { valid = PLy_procedure_argument_valid(&proc->args[i]); /* Short-circuit on first changed argument */ if (!valid) break; } /* if the output type is composite, it might have changed */ if (valid) valid = PLy_procedure_argument_valid(&proc->result); return valid; }
/* * Check if our cached information about a datatype is still valid */ static bool PLy_procedure_argument_valid(PLyTypeInfo *arg) { HeapTuple relTup; bool valid; /* Nothing to cache unless type is composite */ if (arg->is_rowtype != 1) return true; /* * Zero typ_relid means that we got called on an output argument of a * function returning a unnamed record type; the info for it can't change. */ if (!OidIsValid(arg->typ_relid)) return true; /* Else we should have some cached data */ Assert(TransactionIdIsValid(arg->typrel_xmin)); Assert(ItemPointerIsValid(&arg->typrel_tid)); /* Get the pg_class tuple for the data type */ relTup = SearchSysCache1(RELOID, ObjectIdGetDatum(arg->typ_relid)); if (!HeapTupleIsValid(relTup)) elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* If it has changed, the cached data is not valid */ valid = (arg->typrel_xmin == HeapTupleHeaderGetXmin(relTup->t_data) && ItemPointerEquals(&arg->typrel_tid, &relTup->t_self)); ReleaseSysCache(relTup); return valid; }
/* * MUST BE CALLED ONLY ON RECOVERY. * * Check if exists valid (inserted by not aborted xaction) heap tuple * for given item pointer */ bool XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr) { Relation reln; Buffer buffer; Page page; ItemId lp; HeapTupleHeader htup; reln = XLogOpenRelation(false, RM_HEAP_ID, hnode); if (!RelationIsValid(reln)) return (false); buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr)); if (!BufferIsValid(buffer)) return (false); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = (Page) BufferGetPage(buffer); if (PageIsNew((PageHeader) page) || ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page)) { UnlockAndReleaseBuffer(buffer); return (false); } if (PageGetSUI(page) != ThisStartUpID) { Assert(PageGetSUI(page) < ThisStartUpID); UnlockAndReleaseBuffer(buffer); return (true); } lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr)); if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp)) { UnlockAndReleaseBuffer(buffer); return (false); } htup = (HeapTupleHeader) PageGetItem(page, lp); /* MUST CHECK WASN'T TUPLE INSERTED IN PREV STARTUP */ if (!(htup->t_infomask & HEAP_XMIN_COMMITTED)) { if (htup->t_infomask & HEAP_XMIN_INVALID || (htup->t_infomask & HEAP_MOVED_IN && TransactionIdDidAbort(HeapTupleHeaderGetXvac(htup))) || TransactionIdDidAbort(HeapTupleHeaderGetXmin(htup))) { UnlockAndReleaseBuffer(buffer); return (false); } } UnlockAndReleaseBuffer(buffer); return (true); }
/* * This function performs checks for certain system tables to validate tuple * fetched from table has the key, using which it was fetched from index. */ static void CrossCheckTuple(int cacheId, Datum key1, Datum key2, Datum key3, Datum key4, HeapTuple tuple) { Form_pg_class rd_rel; switch (cacheId) { case RELOID: if (HeapTupleGetOid(tuple) != DatumGetObjectId(key1)) { elog(ERROR, "pg_class_oid_index is broken, oid=%d is pointing to tuple with oid=%d (xmin:%u xmax:%u)", DatumGetObjectId(key1), HeapTupleGetOid(tuple), HeapTupleHeaderGetXmin((tuple)->t_data), HeapTupleHeaderGetXmax((tuple)->t_data)); } break; case RELNAMENSP: rd_rel = (Form_pg_class) GETSTRUCT(tuple); if (strncmp(rd_rel->relname.data, DatumGetCString(key1), NAMEDATALEN) != 0) { elog(ERROR, "pg_class_relname_nsp_index is broken, intended tuple with name \"%s\" fetched \"%s\"" " (xmin:%u xmax:%u)", DatumGetCString(key1), rd_rel->relname.data, HeapTupleHeaderGetXmin((tuple)->t_data), HeapTupleHeaderGetXmax((tuple)->t_data)); } break; case TYPEOID: if (HeapTupleGetOid(tuple) != DatumGetObjectId(key1)) { elog(ERROR, "pg_type_oid_index is broken, oid=%d is pointing to tuple with oid=%d (xmin:%u xmax:%u)", DatumGetObjectId(key1), HeapTupleGetOid(tuple), HeapTupleHeaderGetXmin((tuple)->t_data), HeapTupleHeaderGetXmax((tuple)->t_data)); } break; } }
/* ---------------- * heap_getsysattr * * Fetch the value of a system attribute for a tuple. * * This is a support routine for the heap_getattr macro. The macro * has already determined that the attnum refers to a system attribute. * ---------------- */ Datum heap_getsysattr(HeapTuple tup, int attnum, bool *isnull) { Datum result; Assert(tup); Assert(!is_heaptuple_memtuple(tup)); /* Currently, no sys attribute ever reads as NULL. */ if (isnull) *isnull = false; switch (attnum) { case SelfItemPointerAttributeNumber: /* pass-by-reference datatype */ result = PointerGetDatum(&(tup->t_self)); break; case ObjectIdAttributeNumber: result = ObjectIdGetDatum(HeapTupleGetOid(tup)); break; case MinTransactionIdAttributeNumber: result = TransactionIdGetDatum(HeapTupleHeaderGetXmin(tup->t_data)); break; case MaxTransactionIdAttributeNumber: result = TransactionIdGetDatum(HeapTupleHeaderGetXmax(tup->t_data)); break; case MinCommandIdAttributeNumber: case MaxCommandIdAttributeNumber: /* * cmin and cmax are now both aliases for the same field, which * can in fact also be a combo command id. XXX perhaps we should * return the "real" cmin or cmax if possible, that is if we are * inside the originating transaction? */ result = CommandIdGetDatum(HeapTupleHeaderGetRawCommandId(tup->t_data)); break; case TableOidAttributeNumber: /* CDB: Must now use a TupleTableSlot to access the 'tableoid'. */ result = ObjectIdGetDatum(InvalidOid); elog(ERROR, "Invalid reference to \"tableoid\" system attribute"); break; case GpSegmentIdAttributeNumber: /*CDB*/ result = Int32GetDatum(Gp_segment); break; default: elog(ERROR, "invalid attnum: %d", attnum); result = 0; /* keep compiler quiet */ break; } return result; }
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup) { CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmin(HeapTupleHeaderGetXmin(tup), cid); else return cid; }
/* * Given a tuple we are about to delete, determine the correct value to store * into its t_cid field. * * If we don't need a combo CID, *cmax is unchanged and *iscombo is set to * FALSE. If we do need one, *cmax is replaced by a combo CID and *iscombo * is set to TRUE. * * The reason this is separate from the actual HeapTupleHeaderSetCmax() * operation is that this could fail due to out-of-memory conditions. Hence * we need to do this before entering the critical section that actually * changes the tuple in shared buffers. */ void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo) { /* * If we're marking a tuple deleted that was inserted by (any * subtransaction of) our transaction, we need to use a combo command id. * Test for HEAP_XMIN_COMMITTED first, because it's cheaper than a * TransactionIdIsCurrentTransactionId call. */ if (!(tup->t_infomask & HEAP_XMIN_COMMITTED) && TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup))) { CommandId cmin = HeapTupleHeaderGetRawCommandId(tup); *cmax = GetComboCommandId(HeapTupleHeaderGetXmin(tup), cmin, *cmax); *iscombo = true; } else { *iscombo = false; } }
/* * Check if specified heap tuple was inserted by given * xaction/command and return * * - -1 if not * - 0 if there is no tuple at all * - 1 if yes */ int XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr, TransactionId xid, CommandId cid) { Relation reln; Buffer buffer; Page page; ItemId lp; HeapTupleHeader htup; reln = XLogOpenRelation(false, RM_HEAP_ID, hnode); if (!RelationIsValid(reln)) return (0); buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr)); if (!BufferIsValid(buffer)) return (0); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = (Page) BufferGetPage(buffer); if (PageIsNew((PageHeader) page) || ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page)) { UnlockAndReleaseBuffer(buffer); return (0); } lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr)); if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp)) { UnlockAndReleaseBuffer(buffer); return (0); } htup = (HeapTupleHeader) PageGetItem(page, lp); Assert(PageGetSUI(page) == ThisStartUpID); if (!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), xid) || HeapTupleHeaderGetCmin(htup) != cid) { UnlockAndReleaseBuffer(buffer); return (-1); } UnlockAndReleaseBuffer(buffer); return (1); }
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup) { CommandId cid = HeapTupleHeaderGetRawCommandId(tup); /* We do not store cmax when locking a tuple */ Assert(!(tup->t_infomask & (HEAP_MOVED | HEAP_IS_LOCKED))); /* * MPP-8317: cursors can't always *tell* that this is the current transaction. */ Assert(QEDtxContextInfo.cursorContext || TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tup))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmax(HeapTupleHeaderGetXmin(tup), cid); else return cid; }
/* * Register a dead tuple with an ongoing rewrite. Dead tuples are not * copied to the new table, but we still make note of them so that we * can release some resources earlier. * * Returns true if a tuple was removed from the unresolved_tups table. * This indicates that that tuple, previously thought to be "recently dead", * is now known really dead and won't be written to the output. */ bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple) { /* * If we have already seen an earlier tuple in the update chain that * points to this tuple, let's forget about that earlier tuple. It's in * fact dead as well, our simple xmax < OldestXmin test in * HeapTupleSatisfiesVacuum just wasn't enough to detect it. It happens * when xmin of a tuple is greater than xmax, which sounds * counter-intuitive but is perfectly valid. * * We don't bother to try to detect the situation the other way round, * when we encounter the dead tuple first and then the recently dead one * that points to it. If that happens, we'll have some unmatched entries * in the UnresolvedTups hash table at the end. That can happen anyway, * because a vacuum might have removed the dead tuple in the chain before * us. */ UnresolvedTup unresolved; TidHashKey hashkey; bool found; memset(&hashkey, 0, sizeof(hashkey)); hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data); hashkey.tid = old_tuple->t_self; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, HASH_FIND, NULL); if (unresolved != NULL) { /* Need to free the contained tuple as well as the hashtable entry */ heap_freetuple(unresolved->tuple); hash_search(state->rs_unresolved_tups, &hashkey, HASH_REMOVE, &found); Assert(found); return true; } return false; }
/* * PyPgFunction_IsCurrent - determine if the current pg_proc entry is newer than 'func' */ bool PyPgFunction_IsCurrent(PyObj func) { HeapTuple ht; ItemPointerData fn_tid; TransactionId fn_xmin, last_fn_xmin; last_fn_xmin = PyPgFunction_GetXMin(func); if (last_fn_xmin == InvalidTransactionId) { /* pseudo-function */ return(true); } ht = SearchSysCache(PROCOID, PyPgFunction_GetOid(func), 0, 0, 0); if (!HeapTupleIsValid(ht)) return(false); fn_xmin = HeapTupleHeaderGetXmin(ht->t_data); fn_tid = ht->t_self; ReleaseSysCache(ht); if (last_fn_xmin != fn_xmin || !ItemPointerEquals(PyPgFunction_GetItemPointer(func), &fn_tid)) { return(false); } if (!PyPgTupleDesc_IsCurrent(PyPgFunction_GetInput(func))) { return(false); } if (!PyPgType_IsCurrent(PyPgFunction_GetOutput(func))) return(false); return(true); }
/* * Check whether a tuple is all-visible relative to a given OldestXmin value. * The buffer should contain the tuple and should be locked and pinned. */ static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer) { HTSV_Result state; TransactionId xmin; state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer); if (state != HEAPTUPLE_LIVE) return false; /* all-visible implies live */ /* * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page * all-visible unless every tuple is hinted committed. However, those hint * bits could be lost after a crash, so we can't be certain that they'll * be set here. So just check the xmin. */ xmin = HeapTupleHeaderGetXmin(tup->t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) return false; /* xmin not old enough for all to see */ return true; }
/* * HeapTupleSatisfiesItself * True iff heap tuple is valid "for itself". * * Here, we consider the effects of: * all committed transactions (as of the current instant) * previous commands of this transaction * changes made by the current command * * Note: * Assumes heap tuple is valid. * * The satisfaction of "itself" requires the following: * * ((Xmin == my-transaction && the row was updated by the current transaction, and * (Xmax is null it was not deleted * [|| Xmax != my-transaction)]) [or it was deleted by another transaction] * || * * (Xmin is committed && the row was modified by a committed transaction, and * (Xmax is null || the row has not been deleted, or * (Xmax != my-transaction && the row was deleted by another transaction * Xmax is not committed))) that has not been committed */ bool HeapTupleSatisfiesItself(HeapTupleHeader tuple) { if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return false; if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } tuple->t_infomask |= HEAP_XMIN_COMMITTED; } } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; return false; } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) return false; else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { /* it must have aborted or crashed */ tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } } /* by here, the inserting transaction has committed */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; if (tuple->t_infomask & HEAP_XMAX_COMMITTED) { if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; return false; /* updated by other */ } if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; return false; } if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return true; if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) { /* it must have aborted or crashed */ tuple->t_infomask |= HEAP_XMAX_INVALID; return true; } /* xmax transaction committed */ if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) { tuple->t_infomask |= HEAP_XMAX_INVALID; return true; } tuple->t_infomask |= HEAP_XMAX_COMMITTED; return false; }
/* * Add a tuple to the new heap. * * Visibility information is copied from the original tuple, except that * we "freeze" very-old tuples. Note that since we scribble on new_tuple, * it had better be temp storage not a pointer to the original tuple. * * state opaque state as returned by begin_heap_rewrite * old_tuple original tuple in the old heap * new_tuple new, rewritten tuple to be inserted to new heap */ void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple) { MemoryContext old_cxt; ItemPointerData old_tid; TidHashKey hashkey; bool found; bool free_new; old_cxt = MemoryContextSwitchTo(state->rs_cxt); /* * Copy the original tuple's visibility information into new_tuple. * * XXX we might later need to copy some t_infomask2 bits, too? Right now, * we intentionally clear the HOT status bits. */ memcpy(&new_tuple->t_data->t_choice.t_heap, &old_tuple->t_data->t_choice.t_heap, sizeof(HeapTupleFields)); new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; new_tuple->t_data->t_infomask |= old_tuple->t_data->t_infomask & HEAP_XACT_MASK; /* * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid, state->rs_cutoff_multi); /* * Invalid ctid means that ctid should point to the tuple itself. We'll * override it later if the tuple is part of an update chain. */ ItemPointerSetInvalid(&new_tuple->t_data->t_ctid); /* * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) { OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) hash_search(state->rs_old_new_tid_map, &hashkey, HASH_FIND, NULL); if (mapping != NULL) { /* * We've already copied the tuple that t_ctid points to, so we can * set the ctid of this tuple to point to the new location, and * insert it right away. */ new_tuple->t_data->t_ctid = mapping->new_tid; /* We don't need the mapping entry anymore */ hash_search(state->rs_old_new_tid_map, &hashkey, HASH_REMOVE, &found); Assert(found); } else { /* * We haven't seen the tuple t_ctid points to yet. Stash this * tuple into unresolved_tups to be written later. */ UnresolvedTup unresolved; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, HASH_ENTER, &found); Assert(!found); unresolved->old_tid = old_tuple->t_self; unresolved->tuple = heap_copytuple(new_tuple); /* * We can't do anything more now, since we don't know where the * tuple will be written. */ MemoryContextSwitchTo(old_cxt); return; } } /* * Now we will write the tuple, and then check to see if it is the B tuple * in any new or known pair. When we resolve a known pair, we will be * able to write that pair's A tuple, and then we have to check if it * resolves some other pair. Hence, we need a loop here. */ old_tid = old_tuple->t_self; free_new = false; for (;;) { ItemPointerData new_tid; /* Insert the tuple and find out where it's put in new_heap */ raw_heap_insert(state, new_tuple); new_tid = new_tuple->t_self; /* * If the tuple is the updated version of a row, and the prior version * wouldn't be DEAD yet, then we need to either resolve the prior * version (if it's waiting in rs_unresolved_tups), or make an entry * in rs_old_new_tid_map (so we can resolve it when we do see it). The * previous tuple's xmax would equal this one's xmin, so it's * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data), state->rs_oldest_xmin)) { /* * Okay, this is B in an update pair. See if we've seen A. */ UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, HASH_FIND, NULL); if (unresolved != NULL) { /* * We have seen and memorized the previous tuple already. Now * that we know where we inserted the tuple its t_ctid points * to, fix its t_ctid and insert it to the new heap. */ if (free_new) heap_freetuple(new_tuple); new_tuple = unresolved->tuple; free_new = true; old_tid = unresolved->old_tid; new_tuple->t_data->t_ctid = new_tid; /* * We don't need the hash entry anymore, but don't free its * tuple just yet. */ hash_search(state->rs_unresolved_tups, &hashkey, HASH_REMOVE, &found); Assert(found); /* loop back to insert the previous tuple in the chain */ continue; } else { /* * Remember the new tid of this tuple. We'll use it to set the * ctid when we find the previous tuple in the chain. */ OldToNewMapping mapping; mapping = hash_search(state->rs_old_new_tid_map, &hashkey, HASH_ENTER, &found); Assert(!found); mapping->new_tid = new_tid; } } /* Done with this (chain of) tuples, for now */ if (free_new) heap_freetuple(new_tuple); break; } MemoryContextSwitchTo(old_cxt); }
/* * See the comments for HeapTupleSatisfiesMVCC for the semantics this function * obeys. * * Only usable on tuples from catalog tables! * * We don't need to support HEAP_MOVED_(IN|OFF) for now because we only support * reading catalog pages which couldn't have been created in an older version. * * We don't set any hint bits in here as it seems unlikely to be beneficial as * those should already be set by normal access and it seems to be too * dangerous to do so as the semantics of doing so during timetravel are more * complicated than when dealing "only" with the present. */ bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; TransactionId xmin = HeapTupleHeaderGetXmin(tuple); TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple); Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); /* inserting transaction aborted */ if (HeapTupleHeaderXminInvalid(tuple)) { Assert(!TransactionIdDidCommit(xmin)); return false; } /* check if it's one of our txids, toplevel is also in there */ else if (TransactionIdInArray(xmin, snapshot->subxip, snapshot->subxcnt)) { bool resolved; CommandId cmin = HeapTupleHeaderGetRawCommandId(tuple); CommandId cmax = InvalidCommandId; /* * another transaction might have (tried to) delete this tuple or * cmin/cmax was stored in a combocid. So we need to lookup the actual * values externally. */ resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot, htup, buffer, &cmin, &cmax); if (!resolved) elog(ERROR, "could not resolve cmin/cmax of catalog tuple"); Assert(cmin != InvalidCommandId); if (cmin >= snapshot->curcid) return false; /* inserted after scan started */ /* fall through */ } /* committed before our xmin horizon. Do a normal visibility check. */ else if (TransactionIdPrecedes(xmin, snapshot->xmin)) { Assert(!(HeapTupleHeaderXminCommitted(tuple) && !TransactionIdDidCommit(xmin))); /* check for hint bit first, consult clog afterwards */ if (!HeapTupleHeaderXminCommitted(tuple) && !TransactionIdDidCommit(xmin)) return false; /* fall through */ } /* beyond our xmax horizon, i.e. invisible */ else if (TransactionIdFollowsOrEquals(xmin, snapshot->xmax)) { return false; } /* check if it's a committed transaction in [xmin, xmax) */ else if (TransactionIdInArray(xmin, snapshot->xip, snapshot->xcnt)) { /* fall through */ } /* * none of the above, i.e. between [xmin, xmax) but hasn't committed. I.e. * invisible. */ else { return false; } /* at this point we know xmin is visible, go on to check xmax */ /* xid invalid or aborted */ if (tuple->t_infomask & HEAP_XMAX_INVALID) return true; /* locked tuples are always visible */ else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; /* * We can see multis here if we're looking at user tables or if somebody * SELECT ... FOR SHARE/UPDATE a system table. */ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { xmax = HeapTupleGetUpdateXid(tuple); } /* check if it's one of our txids, toplevel is also in there */ if (TransactionIdInArray(xmax, snapshot->subxip, snapshot->subxcnt)) { bool resolved; CommandId cmin; CommandId cmax = HeapTupleHeaderGetRawCommandId(tuple); /* Lookup actual cmin/cmax values */ resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot, htup, buffer, &cmin, &cmax); if (!resolved) elog(ERROR, "could not resolve combocid to cmax"); Assert(cmax != InvalidCommandId); if (cmax >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } /* below xmin horizon, normal transaction state is valid */ else if (TransactionIdPrecedes(xmax, snapshot->xmin)) { Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED && !TransactionIdDidCommit(xmax))); /* check hint bit first */ if (tuple->t_infomask & HEAP_XMAX_COMMITTED) return false; /* check clog */ return !TransactionIdDidCommit(xmax); } /* above xmax horizon, we cannot possibly see the deleting transaction */ else if (TransactionIdFollowsOrEquals(xmax, snapshot->xmax)) return true; /* xmax is between [xmin, xmax), check known committed array */ else if (TransactionIdInArray(xmax, snapshot->xip, snapshot->xcnt)) return false; /* xmax is between [xmin, xmax), but known not to have committed yet */ else return true; }
/* * HeapTupleSatisfiesMVCC * True iff heap tuple is valid for the given MVCC snapshot. * * Here, we consider the effects of: * all transactions committed as of the time of the given snapshot * previous commands of this transaction * * Does _not_ include: * transactions shown as in-progress by the snapshot * transactions started after the snapshot was taken * changes made by the current command * * This is the same as HeapTupleSatisfiesNow, except that transactions that * were in progress or as yet unstarted when the snapshot was taken will * be treated as uncommitted, even if they have committed by now. * * (Notice, however, that the tuple status hint bits will be updated on the * basis of the true state of the transaction, even if we then pretend we * can't see it.) */ bool HeapTupleSatisfiesMVCC(HeapTupleHeader tuple, Snapshot snapshot, Buffer buffer) { if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return false; if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) return false; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */ return true; Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) return false; else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetXmin(tuple)); else { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } /* * By here, the inserting transaction has committed - have to check * when... */ if (XidInMVCCSnapshot(HeapTupleHeaderGetXmin(tuple), snapshot)) return false; /* treat as still in progress */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; if (tuple->t_infomask & HEAP_IS_LOCKED) return true; if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { /* MultiXacts are currently only allowed to lock tuples */ Assert(tuple->t_infomask & HEAP_IS_LOCKED); return true; } if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return true; if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetXmax(tuple)); } /* * OK, the deleting transaction committed too ... but when? */ if (XidInMVCCSnapshot(HeapTupleHeaderGetXmax(tuple), snapshot)) return true; /* treat as still in progress */ return false; }
/* * HeapTupleSatisfiesDirty * True iff heap tuple is valid including effects of open transactions. * * Here, we consider the effects of: * all committed and in-progress transactions (as of the current instant) * previous commands of this transaction * changes made by the current command * * This is essentially like HeapTupleSatisfiesSelf as far as effects of * the current transaction and committed/aborted xacts are concerned. * However, we also include the effects of other xacts still in progress. * * A special hack is that the passed-in snapshot struct is used as an * output argument to return the xids of concurrent xacts that affected the * tuple. snapshot->xmin is set to the tuple's xmin if that is another * transaction that's still in progress; or to InvalidTransactionId if the * tuple's xmin is committed good, committed dead, or my own xact. Similarly * for snapshot->xmax and the tuple's xmax. */ bool HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Snapshot snapshot, Buffer buffer) { snapshot->xmin = snapshot->xmax = InvalidTransactionId; if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return false; if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */ return true; Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } return false; } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) { snapshot->xmin = HeapTupleHeaderGetXmin(tuple); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetXmin(tuple)); else { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } /* by here, the inserting transaction has committed */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; if (tuple->t_infomask & HEAP_XMAX_COMMITTED) { if (tuple->t_infomask & HEAP_IS_LOCKED) return true; return false; /* updated by other */ } if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { /* MultiXacts are currently only allowed to lock tuples */ Assert(tuple->t_infomask & HEAP_IS_LOCKED); return true; } if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { if (tuple->t_infomask & HEAP_IS_LOCKED) return true; return false; } if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) { snapshot->xmax = HeapTupleHeaderGetXmax(tuple); return true; } if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } /* xmax transaction committed */ if (tuple->t_infomask & HEAP_IS_LOCKED) { SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetXmax(tuple)); return false; /* updated by other */ }
/* * HeapTupleSatisfiesUpdate * * Same logic as HeapTupleSatisfiesNow, but returns a more detailed result * code, since UPDATE needs to know more than "is it visible?". Also, * tuples of my own xact are tested against the passed CommandId not * CurrentCommandId. * * The possible return codes are: * * HeapTupleInvisible: the tuple didn't exist at all when the scan started, * e.g. it was created by a later CommandId. * * HeapTupleMayBeUpdated: The tuple is valid and visible, so it may be * updated. * * HeapTupleSelfUpdated: The tuple was updated by the current transaction, * after the current scan started. * * HeapTupleUpdated: The tuple was updated by a committed transaction. * * HeapTupleBeingUpdated: The tuple is being updated by an in-progress * transaction other than the current transaction. (Note: this includes * the case where the tuple is share-locked by a MultiXact, even if the * MultiXact includes the current transaction. Callers that want to * distinguish that case must test for it themselves.) */ HTSU_Result HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid, Buffer buffer) { if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return HeapTupleInvisible; if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return HeapTupleInvisible; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HeapTupleInvisible; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return HeapTupleInvisible; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HeapTupleInvisible; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= curcid) return HeapTupleInvisible; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HeapTupleMayBeUpdated; if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */ return HeapTupleMayBeUpdated; Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } if (HeapTupleHeaderGetCmax(tuple) >= curcid) return HeapTupleSelfUpdated; /* updated after scan started */ else return HeapTupleInvisible; /* updated before scan started */ } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) return HeapTupleInvisible; else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetXmin(tuple)); else { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HeapTupleInvisible; } } /* by here, the inserting transaction has committed */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return HeapTupleMayBeUpdated; if (tuple->t_infomask & HEAP_XMAX_COMMITTED) { if (tuple->t_infomask & HEAP_IS_LOCKED) return HeapTupleMayBeUpdated; return HeapTupleUpdated; /* updated by other */ } if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { /* MultiXacts are currently only allowed to lock tuples */ Assert(tuple->t_infomask & HEAP_IS_LOCKED); if (MultiXactIdIsRunning(HeapTupleHeaderGetXmax(tuple))) return HeapTupleBeingUpdated; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { if (tuple->t_infomask & HEAP_IS_LOCKED) return HeapTupleMayBeUpdated; if (HeapTupleHeaderGetCmax(tuple) >= curcid) return HeapTupleSelfUpdated; /* updated after scan started */ else return HeapTupleInvisible; /* updated before scan started */ } if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return HeapTupleBeingUpdated; if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } /* xmax transaction committed */ if (tuple->t_infomask & HEAP_IS_LOCKED) { SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetXmax(tuple)); return HeapTupleUpdated; /* updated by other */ }
/* * HeapTupleSatisfiesVacuum * * Determine the status of tuples for VACUUM purposes. Here, what * we mainly want to know is if a tuple is potentially visible to *any* * running transaction. If so, it can't be removed yet by VACUUM. * * OldestXmin is a cutoff XID (obtained from GetOldestXmin()). Tuples * deleted by XIDs >= OldestXmin are deemed "recently dead"; they might * still be visible to some open transaction, so we can't remove them, * even if we see that the deleting transaction has committed. */ HTSV_Result HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin, Buffer buffer) { /* * Has inserting transaction committed? * * If the inserting transaction aborted, then the tuple was never visible * to any other transaction, so we can delete it immediately. */ if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return HEAPTUPLE_DEAD; else if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return HEAPTUPLE_DELETE_IN_PROGRESS; if (TransactionIdIsInProgress(xvac)) return HEAPTUPLE_DELETE_IN_PROGRESS; if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HEAPTUPLE_DEAD; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return HEAPTUPLE_INSERT_IN_PROGRESS; if (TransactionIdIsInProgress(xvac)) return HEAPTUPLE_INSERT_IN_PROGRESS; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HEAPTUPLE_DEAD; } } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; if (tuple->t_infomask & HEAP_IS_LOCKED) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ return HEAPTUPLE_DELETE_IN_PROGRESS; } else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetXmin(tuple)); else { /* * Not in Progress, Not Committed, so either Aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HEAPTUPLE_DEAD; } /* * At this point the xmin is known committed, but we might not have * been able to set the hint bit yet; so we can no longer Assert that * it's set. */ } /* * Okay, the inserter committed, so it was good at some point. Now what * about the deleting transaction? */ if (tuple->t_infomask & HEAP_XMAX_INVALID) return HEAPTUPLE_LIVE; if (tuple->t_infomask & HEAP_IS_LOCKED) { /* * "Deleting" xact really only locked it, so the tuple is live in any * case. However, we should make sure that either XMAX_COMMITTED or * XMAX_INVALID gets set once the xact is gone, to reduce the costs of * examining the tuple for future xacts. Also, marking dead * MultiXacts as invalid here provides defense against MultiXactId * wraparound (see also comments in heap_freeze_tuple()). */ if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { if (MultiXactIdIsRunning(HeapTupleHeaderGetXmax(tuple))) return HEAPTUPLE_LIVE; } else { if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return HEAPTUPLE_LIVE; } /* * We don't really care whether xmax did commit, abort or crash. * We know that xmax did lock the tuple, but it did not and will * never actually update it. */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); } return HEAPTUPLE_LIVE; } if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { /* MultiXacts are currently only allowed to lock tuples */ Assert(tuple->t_infomask & HEAP_IS_LOCKED); return HEAPTUPLE_LIVE; } if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return HEAPTUPLE_DELETE_IN_PROGRESS; else if (TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetXmax(tuple)); else { /* * Not in Progress, Not Committed, so either Aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HEAPTUPLE_LIVE; } /* * At this point the xmax is known committed, but we might not have * been able to set the hint bit yet; so we can no longer Assert that * it's set. */ } /* * Deleter committed, but check special cases. */ if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple), HeapTupleHeaderGetXmax(tuple))) { /* * Inserter also deleted it, so it was never visible to anyone else. * However, we can only remove it early if it's not an updated tuple; * else its parent tuple is linking to it via t_ctid, and this tuple * mustn't go away before the parent does. */ if (!(tuple->t_infomask & HEAP_UPDATED)) return HEAPTUPLE_DEAD; } if (!TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin)) { /* deleting xact is too recent, tuple could still be visible */ return HEAPTUPLE_RECENTLY_DEAD; } /* Otherwise, it's dead and removable */ return HEAPTUPLE_DEAD; }
Datum heap_page_items(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); heap_page_items_state *inter_call_data = NULL; FuncCallContext *fctx; int raw_page_size; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); raw_page_size = VARSIZE(raw_page) - VARHDRSZ; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext mctx; if (raw_page_size < SizeOfPageHeaderData) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("input page too small (%d bytes)", raw_page_size))); fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); inter_call_data = palloc(sizeof(heap_page_items_state)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; inter_call_data->page = VARDATA(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); inter_call_data = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { Page page = inter_call_data->page; HeapTuple resultTuple; Datum result; ItemId id; Datum values[13]; bool nulls[13]; uint16 lp_offset; uint16 lp_flags; uint16 lp_len; memset(nulls, 0, sizeof(nulls)); /* Extract information from the line pointer */ id = PageGetItemId(page, inter_call_data->offset); lp_offset = ItemIdGetOffset(id); lp_flags = ItemIdGetFlags(id); lp_len = ItemIdGetLength(id); values[0] = UInt16GetDatum(inter_call_data->offset); values[1] = UInt16GetDatum(lp_offset); values[2] = UInt16GetDatum(lp_flags); values[3] = UInt16GetDatum(lp_len); /* * We do just enough validity checking to make sure we don't reference * data outside the page passed to us. The page could be corrupt in * many other ways, but at least we won't crash. */ if (ItemIdHasStorage(id) && lp_len >= sizeof(HeapTupleHeader) && lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { HeapTupleHeader tuphdr; int bits_len; /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); values[4] = UInt32GetDatum(HeapTupleHeaderGetXmin(tuphdr)); values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); /* shared with xvac */ values[7] = PointerGetDatum(&tuphdr->t_ctid); values[8] = UInt32GetDatum(tuphdr->t_infomask2); values[9] = UInt32GetDatum(tuphdr->t_infomask); values[10] = UInt8GetDatum(tuphdr->t_hoff); /* * We already checked that the item as is completely within the * raw page passed to us, with the length given in the line * pointer.. Let's check that t_hoff doesn't point over lp_len, * before using it to access t_bits and oid. */ if (tuphdr->t_hoff >= sizeof(HeapTupleHeader) && tuphdr->t_hoff <= lp_len) { if (tuphdr->t_infomask & HEAP_HASNULL) { bits_len = tuphdr->t_hoff - (((char *) tuphdr->t_bits) -((char *) tuphdr)); values[11] = CStringGetTextDatum( bits_to_text(tuphdr->t_bits, bits_len * 8)); } else nulls[11] = true; if (tuphdr->t_infomask & HEAP_HASOID) values[12] = HeapTupleHeaderGetOid(tuphdr); else nulls[12] = true; } else { nulls[11] = true; nulls[12] = true; } } else { /* * The line pointer is not used, or it's invalid. Set the rest of * the fields to NULL */ int i; for (i = 4; i <= 12; i++) nulls[i] = true; } /* Build and return the result tuple. */ resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls); result = HeapTupleGetDatum(resultTuple); inter_call_data->offset++; SRF_RETURN_NEXT(fctx, result); } else SRF_RETURN_DONE(fctx); }
/* * Disallow use of an uncommitted pg_enum tuple. * * We need to make sure that uncommitted enum values don't get into indexes. * If they did, and if we then rolled back the pg_enum addition, we'd have * broken the index because value comparisons will not work reliably without * an underlying pg_enum entry. (Note that removal of the heap entry * containing an enum value is not sufficient to ensure that it doesn't appear * in upper levels of indexes.) To do this we prevent an uncommitted row from * being used for any SQL-level purpose. This is stronger than necessary, * since the value might not be getting inserted into a table or there might * be no index on its column, but it's easy to enforce centrally. * * However, it's okay to allow use of uncommitted values belonging to enum * types that were themselves created in the same transaction, because then * any such index would also be new and would go away altogether on rollback. * (This case is required by pg_upgrade.) * * This function needs to be called (directly or indirectly) in any of the * functions below that could return an enum value to SQL operations. */ static void check_safe_enum_use(HeapTuple enumval_tup) { TransactionId xmin; Form_pg_enum en; HeapTuple enumtyp_tup; /* * If the row is hinted as committed, it's surely safe. This provides a * fast path for all normal use-cases. */ if (HeapTupleHeaderXminCommitted(enumval_tup->t_data)) return; /* * Usually, a row would get hinted as committed when it's read or loaded * into syscache; but just in case not, let's check the xmin directly. */ xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data); if (!TransactionIdIsInProgress(xmin) && TransactionIdDidCommit(xmin)) return; /* It is a new enum value, so check to see if the whole enum is new */ en = (Form_pg_enum) GETSTRUCT(enumval_tup); enumtyp_tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(en->enumtypid)); if (!HeapTupleIsValid(enumtyp_tup)) elog(ERROR, "cache lookup failed for type %u", en->enumtypid); /* * We insist that the type have been created in the same (sub)transaction * as the enum value. It would be safe to allow the type's originating * xact to be a subcommitted child of the enum value's xact, but not vice * versa (since we might now be in a subxact of the type's originating * xact, which could roll back along with the enum value's subxact). The * former case seems a sufficiently weird usage pattern as to not be worth * spending code for, so we're left with a simple equality check. * * We also insist that the type's pg_type row not be HEAP_UPDATED. If it * is, we can't tell whether the row was created or only modified in the * apparent originating xact, so it might be older than that xact. (We do * not worry whether the enum value is HEAP_UPDATED; if it is, we might * think it's too new and throw an unnecessary error, but we won't allow * an unsafe case.) */ if (xmin == HeapTupleHeaderGetXmin(enumtyp_tup->t_data) && !(enumtyp_tup->t_data->t_infomask & HEAP_UPDATED)) { /* same (sub)transaction, so safe */ ReleaseSysCache(enumtyp_tup); return; } /* * There might well be other tests we could do here to narrow down the * unsafe conditions, but for now just raise an exception. */ ereport(ERROR, (errcode(ERRCODE_UNSAFE_NEW_ENUM_VALUE_USAGE), errmsg("unsafe use of new value \"%s\" of enum type %s", NameStr(en->enumlabel), format_type_be(en->enumtypid)), errhint("New enum values must be committed before they can be used."))); }
static plperl_proc_desc * compile_plperl_function(Oid fn_oid, bool is_trigger) { HeapTuple procTup; Form_pg_proc procStruct; char internal_proname[64]; int proname_len; plperl_proc_desc *prodesc = NULL; int i; SV **svp; /* We'll need the pg_proc tuple in any case... */ procTup = SearchSysCache(PROCOID, ObjectIdGetDatum(fn_oid), 0, 0, 0); if (!HeapTupleIsValid(procTup)) elog(ERROR, "cache lookup failed for function %u", fn_oid); procStruct = (Form_pg_proc) GETSTRUCT(procTup); /************************************************************ * Build our internal proc name from the functions Oid ************************************************************/ if (!is_trigger) sprintf(internal_proname, "__PLPerl_proc_%u", fn_oid); else sprintf(internal_proname, "__PLPerl_proc_%u_trigger", fn_oid); proname_len = strlen(internal_proname); /************************************************************ * Lookup the internal proc name in the hashtable ************************************************************/ svp = hv_fetch(plperl_proc_hash, internal_proname, proname_len, FALSE); if (svp) { bool uptodate; prodesc = (plperl_proc_desc *) SvIV(*svp); /************************************************************ * If it's present, must check whether it's still up to date. * This is needed because CREATE OR REPLACE FUNCTION can modify the * function's pg_proc entry without changing its OID. ************************************************************/ uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetXmin(procTup->t_data) && prodesc->fn_cmin == HeapTupleHeaderGetCmin(procTup->t_data)); if (!uptodate) { /* need we delete old entry? */ prodesc = NULL; } } /************************************************************ * If we haven't found it in the hashtable, we analyze * the functions arguments and returntype and store * the in-/out-functions in the prodesc block and create * a new hashtable entry for it. * * Then we load the procedure into the Perl interpreter. ************************************************************/ if (prodesc == NULL) { HeapTuple langTup; HeapTuple typeTup; Form_pg_language langStruct; Form_pg_type typeStruct; Datum prosrcdatum; bool isnull; char *proc_source; /************************************************************ * Allocate a new procedure description block ************************************************************/ prodesc = (plperl_proc_desc *) malloc(sizeof(plperl_proc_desc)); if (prodesc == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); MemSet(prodesc, 0, sizeof(plperl_proc_desc)); prodesc->proname = strdup(internal_proname); prodesc->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data); prodesc->fn_cmin = HeapTupleHeaderGetCmin(procTup->t_data); /* Remember if function is STABLE/IMMUTABLE */ prodesc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE); /************************************************************ * Lookup the pg_language tuple by Oid ************************************************************/ langTup = SearchSysCache(LANGOID, ObjectIdGetDatum(procStruct->prolang), 0, 0, 0); if (!HeapTupleIsValid(langTup)) { free(prodesc->proname); free(prodesc); elog(ERROR, "cache lookup failed for language %u", procStruct->prolang); } langStruct = (Form_pg_language) GETSTRUCT(langTup); prodesc->lanpltrusted = langStruct->lanpltrusted; ReleaseSysCache(langTup); /************************************************************ * Get the required information for input conversion of the * return value. ************************************************************/ if (!is_trigger) { typeTup = SearchSysCache(TYPEOID, ObjectIdGetDatum(procStruct->prorettype), 0, 0, 0); if (!HeapTupleIsValid(typeTup)) { free(prodesc->proname); free(prodesc); elog(ERROR, "cache lookup failed for type %u", procStruct->prorettype); } typeStruct = (Form_pg_type) GETSTRUCT(typeTup); /* Disallow pseudotype result, except VOID or RECORD */ if (typeStruct->typtype == 'p') { if (procStruct->prorettype == VOIDOID || procStruct->prorettype == RECORDOID) /* okay */ ; else if (procStruct->prorettype == TRIGGEROID) { free(prodesc->proname); free(prodesc); ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("trigger functions may only be called " "as triggers"))); } else { free(prodesc->proname); free(prodesc); ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("plperl functions cannot return type %s", format_type_be(procStruct->prorettype)))); } } prodesc->result_oid = procStruct->prorettype; prodesc->fn_retisset = procStruct->proretset; prodesc->fn_retistuple = (typeStruct->typtype == 'c' || procStruct->prorettype == RECORDOID); prodesc->fn_retisarray = (typeStruct->typlen == -1 && typeStruct->typelem); perm_fmgr_info(typeStruct->typinput, &(prodesc->result_in_func)); prodesc->result_typioparam = getTypeIOParam(typeTup); ReleaseSysCache(typeTup); } /************************************************************ * Get the required information for output conversion * of all procedure arguments ************************************************************/ if (!is_trigger) { prodesc->nargs = procStruct->pronargs; for (i = 0; i < prodesc->nargs; i++) { typeTup = SearchSysCache(TYPEOID, ObjectIdGetDatum(procStruct->proargtypes.values[i]), 0, 0, 0); if (!HeapTupleIsValid(typeTup)) { free(prodesc->proname); free(prodesc); elog(ERROR, "cache lookup failed for type %u", procStruct->proargtypes.values[i]); } typeStruct = (Form_pg_type) GETSTRUCT(typeTup); /* Disallow pseudotype argument */ if (typeStruct->typtype == 'p') { free(prodesc->proname); free(prodesc); ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("plperl functions cannot take type %s", format_type_be(procStruct->proargtypes.values[i])))); } if (typeStruct->typtype == 'c') prodesc->arg_is_rowtype[i] = true; else { prodesc->arg_is_rowtype[i] = false; perm_fmgr_info(typeStruct->typoutput, &(prodesc->arg_out_func[i])); } ReleaseSysCache(typeTup); } } /************************************************************ * create the text of the anonymous subroutine. * we do not use a named subroutine so that we can call directly * through the reference. ************************************************************/ prosrcdatum = SysCacheGetAttr(PROCOID, procTup, Anum_pg_proc_prosrc, &isnull); if (isnull) elog(ERROR, "null prosrc"); proc_source = DatumGetCString(DirectFunctionCall1(textout, prosrcdatum)); /************************************************************ * Create the procedure in the interpreter ************************************************************/ prodesc->reference = plperl_create_sub(proc_source, prodesc->lanpltrusted); pfree(proc_source); if (!prodesc->reference) /* can this happen? */ { free(prodesc->proname); free(prodesc); elog(ERROR, "could not create internal procedure \"%s\"", internal_proname); } hv_store(plperl_proc_hash, internal_proname, proname_len, newSViv((IV) prodesc), 0); } ReleaseSysCache(procTup); return prodesc; }
/* * get_relation_info - * Retrieves catalog information for a given relation. * * Given the Oid of the relation, return the following info into fields * of the RelOptInfo struct: * * min_attr lowest valid AttrNumber * max_attr highest valid AttrNumber * indexlist list of IndexOptInfos for relation's indexes * pages number of pages * tuples number of tuples * * Also, initialize the attr_needed[] and attr_widths[] arrays. In most * cases these are left as zeroes, but sometimes we need to compute attr * widths here, and we may as well cache the results for costsize.c. * * If inhparent is true, all we need to do is set up the attr arrays: * the RelOptInfo actually represents the appendrel formed by an inheritance * tree, and so the parent rel's physical size and index information isn't * important for it. */ void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel) { Index varno = rel->relid; Relation relation; bool hasindex; List *indexinfos = NIL; /* * We need not lock the relation since it was already locked, either by * the rewriter or when expand_inherited_rtentry() added it to the query's * rangetable. */ relation = heap_open(relationObjectId, NoLock); /* Temporary and unlogged relations are inaccessible during recovery. */ if (!RelationNeedsWAL(relation) && RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary or unlogged relations during recovery"))); rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1; rel->max_attr = RelationGetNumberOfAttributes(relation); rel->reltablespace = RelationGetForm(relation)->reltablespace; Assert(rel->max_attr >= rel->min_attr); rel->attr_needed = (Relids *) palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids)); rel->attr_widths = (int32 *) palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32)); /* * Estimate relation size --- unless it's an inheritance parent, in which * case the size will be computed later in set_append_rel_pathlist, and we * must leave it zero for now to avoid bollixing the total_table_pages * calculation. */ if (!inhparent) estimate_rel_size(relation, rel->attr_widths - rel->min_attr, &rel->pages, &rel->tuples, &rel->allvisfrac); /* * Make list of indexes. Ignore indexes on system catalogs if told to. * Don't bother with indexes for an inheritance parent, either. */ if (inhparent || (IgnoreSystemIndexes && IsSystemClass(relation->rd_rel))) hasindex = false; else hasindex = relation->rd_rel->relhasindex; if (hasindex) { List *indexoidlist; ListCell *l; LOCKMODE lmode; indexoidlist = RelationGetIndexList(relation); /* * For each index, we get the same type of lock that the executor will * need, and do not release it. This saves a couple of trips to the * shared lock manager while not creating any real loss of * concurrency, because no schema changes could be happening on the * index while we hold lock on the parent rel, and neither lock type * blocks any other kind of index operation. */ if (rel->relid == root->parse->resultRelation) lmode = RowExclusiveLock; else lmode = AccessShareLock; foreach(l, indexoidlist) { Oid indexoid = lfirst_oid(l); Relation indexRelation; Form_pg_index index; IndexOptInfo *info; int ncolumns; int i; /* * Extract info from the relation descriptor for the index. */ indexRelation = index_open(indexoid, lmode); index = indexRelation->rd_index; /* * Ignore invalid indexes, since they can't safely be used for * queries. Note that this is OK because the data structure we * are constructing is only used by the planner --- the executor * still needs to insert into "invalid" indexes! */ if (!index->indisvalid) { index_close(indexRelation, NoLock); continue; } /* * If the index is valid, but cannot yet be used, ignore it; but * mark the plan we are generating as transient. See * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), TransactionXmin)) { root->glob->transientPlan = true; index_close(indexRelation, NoLock); continue; } info = makeNode(IndexOptInfo); info->indexoid = index->indexrelid; info->reltablespace = RelationGetForm(indexRelation)->reltablespace; info->rel = rel; info->ncolumns = ncolumns = index->indnatts; info->indexkeys = (int *) palloc(sizeof(int) * ncolumns); info->indexcollations = (Oid *) palloc(sizeof(Oid) * ncolumns); info->opfamily = (Oid *) palloc(sizeof(Oid) * ncolumns); info->opcintype = (Oid *) palloc(sizeof(Oid) * ncolumns); for (i = 0; i < ncolumns; i++) { info->indexkeys[i] = index->indkey.values[i]; info->indexcollations[i] = indexRelation->rd_indcollation[i]; info->opfamily[i] = indexRelation->rd_opfamily[i]; info->opcintype[i] = indexRelation->rd_opcintype[i]; } info->relam = indexRelation->rd_rel->relam; info->amcostestimate = indexRelation->rd_am->amcostestimate; info->canreturn = index_can_return(indexRelation); info->amcanorderbyop = indexRelation->rd_am->amcanorderbyop; info->amoptionalkey = indexRelation->rd_am->amoptionalkey; info->amsearcharray = indexRelation->rd_am->amsearcharray; info->amsearchnulls = indexRelation->rd_am->amsearchnulls; info->amhasgettuple = OidIsValid(indexRelation->rd_am->amgettuple); info->amhasgetbitmap = OidIsValid(indexRelation->rd_am->amgetbitmap); /* * Fetch the ordering information for the index, if any. */ if (info->relam == BTREE_AM_OID) { /* * If it's a btree index, we can use its opfamily OIDs * directly as the sort ordering opfamily OIDs. */ Assert(indexRelation->rd_am->amcanorder); info->sortopfamily = info->opfamily; info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns); info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns); for (i = 0; i < ncolumns; i++) { int16 opt = indexRelation->rd_indoption[i]; info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0; info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; } } else if (indexRelation->rd_am->amcanorder) { /* * Otherwise, identify the corresponding btree opfamilies by * trying to map this index's "<" operators into btree. Since * "<" uniquely defines the behavior of a sort order, this is * a sufficient test. * * XXX This method is rather slow and also requires the * undesirable assumption that the other index AM numbers its * strategies the same as btree. It'd be better to have a way * to explicitly declare the corresponding btree opfamily for * each opfamily of the other index type. But given the lack * of current or foreseeable amcanorder index types, it's not * worth expending more effort on now. */ info->sortopfamily = (Oid *) palloc(sizeof(Oid) * ncolumns); info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns); info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns); for (i = 0; i < ncolumns; i++) { int16 opt = indexRelation->rd_indoption[i]; Oid ltopr; Oid btopfamily; Oid btopcintype; int16 btstrategy; info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0; info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; ltopr = get_opfamily_member(info->opfamily[i], info->opcintype[i], info->opcintype[i], BTLessStrategyNumber); if (OidIsValid(ltopr) && get_ordering_op_properties(ltopr, &btopfamily, &btopcintype, &btstrategy) && btopcintype == info->opcintype[i] && btstrategy == BTLessStrategyNumber) { /* Successful mapping */ info->sortopfamily[i] = btopfamily; } else { /* Fail ... quietly treat index as unordered */ info->sortopfamily = NULL; info->reverse_sort = NULL; info->nulls_first = NULL; break; } } } else { info->sortopfamily = NULL; info->reverse_sort = NULL; info->nulls_first = NULL; } /* * Fetch the index expressions and predicate, if any. We must * modify the copies we obtain from the relcache to have the * correct varno for the parent relation, so that they match up * correctly against qual clauses. */ info->indexprs = RelationGetIndexExpressions(indexRelation); info->indpred = RelationGetIndexPredicate(indexRelation); if (info->indexprs && varno != 1) ChangeVarNodes((Node *) info->indexprs, 1, varno, 0); if (info->indpred && varno != 1) ChangeVarNodes((Node *) info->indpred, 1, varno, 0); /* Build targetlist using the completed indexprs data */ info->indextlist = build_index_tlist(root, info, relation); info->predOK = false; /* set later in indxpath.c */ info->unique = index->indisunique; info->immediate = index->indimmediate; info->hypothetical = false; /* * Estimate the index size. If it's not a partial index, we lock * the number-of-tuples estimate to equal the parent table; if it * is partial then we have to use the same methods as we would for * a table, except we can be sure that the index is not larger * than the table. */ if (info->indpred == NIL) { info->pages = RelationGetNumberOfBlocks(indexRelation); info->tuples = rel->tuples; } else { double allvisfrac; /* dummy */ estimate_rel_size(indexRelation, NULL, &info->pages, &info->tuples, &allvisfrac); if (info->tuples > rel->tuples) info->tuples = rel->tuples; } index_close(indexRelation, NoLock); indexinfos = lcons(info, indexinfos); } list_free(indexoidlist); }
/* * HeapTupleSatisfiesVacuum * * Determine the status of tuples for VACUUM purposes. Here, what * we mainly want to know is if a tuple is potentially visible to *any* * running transaction. If so, it can't be removed yet by VACUUM. * * OldestXmin is a cutoff XID (obtained from GetOldestXmin()). Tuples * deleted by XIDs >= OldestXmin are deemed "recently dead"; they might * still be visible to some open transaction, so we can't remove them, * even if we see that the deleting transaction has committed. */ HTSV_Result HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin) { /* * Has inserting transaction committed? * * If the inserting transaction aborted, then the tuple was never visible * to any other transaction, so we can delete it immediately. */ if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return HEAPTUPLE_DEAD; else if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return HEAPTUPLE_DELETE_IN_PROGRESS; if (TransactionIdIsInProgress(xvac)) return HEAPTUPLE_DELETE_IN_PROGRESS; if (TransactionIdDidCommit(xvac)) { tuple->t_infomask |= HEAP_XMIN_INVALID; return HEAPTUPLE_DEAD; } tuple->t_infomask |= HEAP_XMIN_COMMITTED; } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return HEAPTUPLE_INSERT_IN_PROGRESS; if (TransactionIdIsInProgress(xvac)) return HEAPTUPLE_INSERT_IN_PROGRESS; if (TransactionIdDidCommit(xvac)) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { tuple->t_infomask |= HEAP_XMIN_INVALID; return HEAPTUPLE_DEAD; } } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; Assert(HeapTupleHeaderGetXmin(tuple) == HeapTupleHeaderGetXmax(tuple)); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ return HEAPTUPLE_DELETE_IN_PROGRESS; } else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { /* * Not in Progress, Not Committed, so either Aborted or * crashed */ tuple->t_infomask |= HEAP_XMIN_INVALID; return HEAPTUPLE_DEAD; } /* Should only get here if we set XMIN_COMMITTED */ Assert(tuple->t_infomask & HEAP_XMIN_COMMITTED); } /* * Okay, the inserter committed, so it was good at some point. Now * what about the deleting transaction? */ if (tuple->t_infomask & HEAP_XMAX_INVALID) return HEAPTUPLE_LIVE; if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) { /* * "Deleting" xact really only marked it for update, so the tuple * is live in any case. However, we must make sure that either * XMAX_COMMITTED or XMAX_INVALID gets set once the xact is gone; * otherwise it is unsafe to recycle CLOG status after vacuuming. */ if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return HEAPTUPLE_LIVE; /* * We don't really care whether xmax did commit, abort or * crash. We know that xmax did mark the tuple for update, but * it did not and will never actually update it. */ tuple->t_infomask |= HEAP_XMAX_INVALID; } return HEAPTUPLE_LIVE; } if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return HEAPTUPLE_DELETE_IN_PROGRESS; else if (TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) tuple->t_infomask |= HEAP_XMAX_COMMITTED; else { /* * Not in Progress, Not Committed, so either Aborted or * crashed */ tuple->t_infomask |= HEAP_XMAX_INVALID; return HEAPTUPLE_LIVE; } /* Should only get here if we set XMAX_COMMITTED */ Assert(tuple->t_infomask & HEAP_XMAX_COMMITTED); } /* * Deleter committed, but check special cases. */ if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple), HeapTupleHeaderGetXmax(tuple))) { /* * Inserter also deleted it, so it was never visible to anyone * else. However, we can only remove it early if it's not an * updated tuple; else its parent tuple is linking to it via t_ctid, * and this tuple mustn't go away before the parent does. */ if (!(tuple->t_infomask & HEAP_UPDATED)) return HEAPTUPLE_DEAD; } if (!TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin)) { /* deleting xact is too recent, tuple could still be visible */ return HEAPTUPLE_RECENTLY_DEAD; } /* Otherwise, it's dead and removable */ return HEAPTUPLE_DEAD; }
/* * HeapTupleSatisfiesSnapshot * True iff heap tuple is valid for the given snapshot. * * Here, we consider the effects of: * all transactions committed as of the time of the given snapshot * previous commands of this transaction * * Does _not_ include: * transactions shown as in-progress by the snapshot * transactions started after the snapshot was taken * changes made by the current command * * This is the same as HeapTupleSatisfiesNow, except that transactions that * were in progress or as yet unstarted when the snapshot was taken will * be treated as uncommitted, even if they have committed by now. * * (Notice, however, that the tuple status hint bits will be updated on the * basis of the true state of the transaction, even if we then pretend we * can't see it.) */ bool HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) { if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return false; if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } tuple->t_infomask |= HEAP_XMIN_COMMITTED; } } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) return false; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) return false; else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { /* it must have aborted or crashed */ tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } } /* * By here, the inserting transaction has committed - have to check * when... */ if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple), snapshot->xmin)) { uint32 i; if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple), snapshot->xmax)) return false; for (i = 0; i < snapshot->xcnt; i++) { if (TransactionIdEquals(HeapTupleHeaderGetXmin(tuple), snapshot->xip[i])) return false; } } if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) return true; if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) { /* it must have aborted or crashed */ tuple->t_infomask |= HEAP_XMAX_INVALID; return true; } /* xmax transaction committed */ tuple->t_infomask |= HEAP_XMAX_COMMITTED; } /* * OK, the deleting transaction committed too ... but when? */ if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xmin)) { uint32 i; if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xmax)) return true; for (i = 0; i < snapshot->xcnt; i++) { if (TransactionIdEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xip[i])) return true; } } return false; }
/* * HeapTupleSatisfiesDirty * True iff heap tuple is valid including effects of open transactions. * * Here, we consider the effects of: * all committed and in-progress transactions (as of the current instant) * previous commands of this transaction * changes made by the current command * * This is essentially like HeapTupleSatisfiesItself as far as effects of * the current transaction and committed/aborted xacts are concerned. * However, we also include the effects of other xacts still in progress. * * Returns extra information in the global variable SnapshotDirty, namely * xids of concurrent xacts that affected the tuple. Also, the tuple's * t_ctid (forward link) is returned if it's being updated. */ bool HeapTupleSatisfiesDirty(HeapTupleHeader tuple) { SnapshotDirty->xmin = SnapshotDirty->xmax = InvalidTransactionId; ItemPointerSetInvalid(&(SnapshotDirty->tid)); if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) { if (tuple->t_infomask & HEAP_XMIN_INVALID) return false; if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } tuple->t_infomask |= HEAP_XMIN_COMMITTED; } } else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))); if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; return false; } else if (TransactionIdIsInProgress(HeapTupleHeaderGetXmin(tuple))) { SnapshotDirty->xmin = HeapTupleHeaderGetXmin(tuple); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple))) tuple->t_infomask |= HEAP_XMIN_COMMITTED; else { /* it must have aborted or crashed */ tuple->t_infomask |= HEAP_XMIN_INVALID; return false; } } /* by here, the inserting transaction has committed */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; if (tuple->t_infomask & HEAP_XMAX_COMMITTED) { if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; SnapshotDirty->tid = tuple->t_ctid; return false; /* updated by other */ } if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple))) { if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) return true; return false; } if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple))) { SnapshotDirty->xmax = HeapTupleHeaderGetXmax(tuple); return true; } if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple))) { /* it must have aborted or crashed */ tuple->t_infomask |= HEAP_XMAX_INVALID; return true; } /* xmax transaction committed */ if (tuple->t_infomask & HEAP_MARKED_FOR_UPDATE) { tuple->t_infomask |= HEAP_XMAX_INVALID; return true; } tuple->t_infomask |= HEAP_XMAX_COMMITTED; SnapshotDirty->tid = tuple->t_ctid; return false; /* updated by other */ }
/* * get_relation_info - * Retrieves catalog information for a given relation. * * Given the Oid of the relation, return the following info into fields * of the RelOptInfo struct: * * min_attr lowest valid AttrNumber * max_attr highest valid AttrNumber * indexlist list of IndexOptInfos for relation's indexes * pages number of pages * tuples number of tuples * * Also, initialize the attr_needed[] and attr_widths[] arrays. In most * cases these are left as zeroes, but sometimes we need to compute attr * widths here, and we may as well cache the results for costsize.c. * * If inhparent is true, all we need to do is set up the attr arrays: * the RelOptInfo actually represents the appendrel formed by an inheritance * tree, and so the parent rel's physical size and index information isn't * important for it. */ void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel) { Index varno = rel->relid; Relation relation; bool hasindex; List *indexinfos = NIL; /* * We need not lock the relation since it was already locked, either by * the rewriter or when expand_inherited_rtentry() added it to the query's * rangetable. */ relation = heap_open(relationObjectId, NoLock); rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1; rel->max_attr = RelationGetNumberOfAttributes(relation); rel->reltablespace = RelationGetForm(relation)->reltablespace; Assert(rel->max_attr >= rel->min_attr); rel->attr_needed = (Relids *) palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids)); rel->attr_widths = (int32 *) palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32)); /* * Estimate relation size --- unless it's an inheritance parent, in which * case the size will be computed later in set_append_rel_pathlist, and we * must leave it zero for now to avoid bollixing the total_table_pages * calculation. */ if (!inhparent) estimate_rel_size(relation, rel->attr_widths - rel->min_attr, &rel->pages, &rel->tuples); /* * Make list of indexes. Ignore indexes on system catalogs if told to. * Don't bother with indexes for an inheritance parent, either. */ if (inhparent || (IgnoreSystemIndexes && IsSystemClass(relation->rd_rel))) hasindex = false; else hasindex = relation->rd_rel->relhasindex; if (hasindex) { List *indexoidlist; ListCell *l; LOCKMODE lmode; indexoidlist = RelationGetIndexList(relation); /* * For each index, we get the same type of lock that the executor will * need, and do not release it. This saves a couple of trips to the * shared lock manager while not creating any real loss of * concurrency, because no schema changes could be happening on the * index while we hold lock on the parent rel, and neither lock type * blocks any other kind of index operation. */ if (rel->relid == root->parse->resultRelation) lmode = RowExclusiveLock; else lmode = AccessShareLock; foreach(l, indexoidlist) { Oid indexoid = lfirst_oid(l); Relation indexRelation; Form_pg_index index; IndexOptInfo *info; int ncolumns; int i; /* * Extract info from the relation descriptor for the index. */ indexRelation = index_open(indexoid, lmode); index = indexRelation->rd_index; /* * Ignore invalid indexes, since they can't safely be used for * queries. Note that this is OK because the data structure we * are constructing is only used by the planner --- the executor * still needs to insert into "invalid" indexes! */ if (!index->indisvalid) { index_close(indexRelation, NoLock); continue; } /* * If the index is valid, but cannot yet be used, ignore it; but * mark the plan we are generating as transient. See * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), TransactionXmin)) { root->glob->transientPlan = true; index_close(indexRelation, NoLock); continue; } info = makeNode(IndexOptInfo); info->indexoid = index->indexrelid; info->reltablespace = RelationGetForm(indexRelation)->reltablespace; info->rel = rel; info->ncolumns = ncolumns = index->indnatts; /* * Allocate per-column info arrays. To save a few palloc cycles * we allocate all the Oid-type arrays in one request. Note that * the opfamily array needs an extra, terminating zero at the end. * We pre-zero the ordering info in case the index is unordered. */ info->indexkeys = (int *) palloc(sizeof(int) * ncolumns); info->opfamily = (Oid *) palloc0(sizeof(Oid) * (4 * ncolumns + 1)); info->opcintype = info->opfamily + (ncolumns + 1); info->fwdsortop = info->opcintype + ncolumns; info->revsortop = info->fwdsortop + ncolumns; info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns); for (i = 0; i < ncolumns; i++) { info->indexkeys[i] = index->indkey.values[i]; info->opfamily[i] = indexRelation->rd_opfamily[i]; info->opcintype[i] = indexRelation->rd_opcintype[i]; } info->relam = indexRelation->rd_rel->relam; info->amcostestimate = indexRelation->rd_am->amcostestimate; info->amoptionalkey = indexRelation->rd_am->amoptionalkey; info->amsearchnulls = indexRelation->rd_am->amsearchnulls; info->amhasgettuple = OidIsValid(indexRelation->rd_am->amgettuple); info->amhasgetbitmap = OidIsValid(indexRelation->rd_am->amgetbitmap); /* * Fetch the ordering operators associated with the index, if any. * We expect that all ordering-capable indexes use btree's * strategy numbers for the ordering operators. */ if (indexRelation->rd_am->amcanorder) { int nstrat = indexRelation->rd_am->amstrategies; for (i = 0; i < ncolumns; i++) { int16 opt = indexRelation->rd_indoption[i]; int fwdstrat; int revstrat; if (opt & INDOPTION_DESC) { fwdstrat = BTGreaterStrategyNumber; revstrat = BTLessStrategyNumber; } else { fwdstrat = BTLessStrategyNumber; revstrat = BTGreaterStrategyNumber; } /* * Index AM must have a fixed set of strategies for it to * make sense to specify amcanorder, so we need not allow * the case amstrategies == 0. */ if (fwdstrat > 0) { Assert(fwdstrat <= nstrat); info->fwdsortop[i] = indexRelation->rd_operator[i * nstrat + fwdstrat - 1]; } if (revstrat > 0) { Assert(revstrat <= nstrat); info->revsortop[i] = indexRelation->rd_operator[i * nstrat + revstrat - 1]; } info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; } } /* * Fetch the index expressions and predicate, if any. We must * modify the copies we obtain from the relcache to have the * correct varno for the parent relation, so that they match up * correctly against qual clauses. */ info->indexprs = RelationGetIndexExpressions(indexRelation); info->indpred = RelationGetIndexPredicate(indexRelation); if (info->indexprs && varno != 1) ChangeVarNodes((Node *) info->indexprs, 1, varno, 0); if (info->indpred && varno != 1) ChangeVarNodes((Node *) info->indpred, 1, varno, 0); info->predOK = false; /* set later in indxpath.c */ info->unique = index->indisunique; /* * Estimate the index size. If it's not a partial index, we lock * the number-of-tuples estimate to equal the parent table; if it * is partial then we have to use the same methods as we would for * a table, except we can be sure that the index is not larger * than the table. */ if (info->indpred == NIL) { info->pages = RelationGetNumberOfBlocks(indexRelation); info->tuples = rel->tuples; } else { estimate_rel_size(indexRelation, NULL, &info->pages, &info->tuples); if (info->tuples > rel->tuples) info->tuples = rel->tuples; } index_close(indexRelation, NoLock); indexinfos = lcons(info, indexinfos); } list_free(indexoidlist); }
/* * Prune specified item pointer or a HOT chain originating at that item. * * If the item is an index-referenced tuple (i.e. not a heap-only tuple), * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple. * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really * DEAD, the OldestXmin test is just too coarse to detect it. * * The root line pointer is redirected to the tuple immediately after the * latest DEAD tuple. If all tuples in the chain are DEAD, the root line * pointer is marked LP_DEAD. (This includes the case of a DEAD simple * tuple, which we treat as a chain of length 1.) * * OldestXmin is the cutoff XID used to identify dead tuples. * * We don't actually change the page here, except perhaps for hint-bit updates * caused by HeapTupleSatisfiesVacuum. We just add entries to the arrays in * prstate showing the changes to be made. Items to be redirected are added * to the redirected[] array (two entries per redirection); items to be set to * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED * state are added to nowunused[]. * * If redirect_move is true, we intend to get rid of redirecting line pointers, * not just make redirection entries. * * Returns the number of tuples (to be) deleted from the page. */ static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, PruneState *prstate, bool redirect_move) { int ndeleted = 0; Page dp = (Page) BufferGetPage(buffer); TransactionId priorXmax = InvalidTransactionId; ItemId rootlp; HeapTupleHeader htup; OffsetNumber latestdead = InvalidOffsetNumber, redirect_target = InvalidOffsetNumber, maxoff = PageGetMaxOffsetNumber(dp), offnum; OffsetNumber chainitems[MaxHeapTuplesPerPage]; int nchain = 0, i; rootlp = PageGetItemId(dp, rootoffnum); /* * If it's a heap-only tuple, then it is not the start of a HOT chain. */ if (ItemIdIsNormal(rootlp)) { htup = (HeapTupleHeader) PageGetItem(dp, rootlp); if (HeapTupleHeaderIsHeapOnly(htup)) { /* * If the tuple is DEAD and doesn't chain to anything else, mark * it unused immediately. (If it does chain, we can only remove * it as part of pruning its chain.) * * We need this primarily to handle aborted HOT updates, that is, * XMIN_INVALID heap-only tuples. Those might not be linked to by * any chain, since the parent tuple might be re-updated before * any pruning occurs. So we have to be able to reap them * separately from chain-pruning. (Note that * HeapTupleHeaderIsHotUpdated will never return true for an * XMIN_INVALID tuple, so this code will work even when there were * sequential updates within the aborted transaction.) * * Note that we might first arrive at a dead heap-only tuple * either here or while following a chain below. Whichever path * gets there first will mark the tuple unused. */ if (HeapTupleSatisfiesVacuum(relation, htup, OldestXmin, buffer) == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); ndeleted++; } /* Nothing more to do */ return ndeleted; } } /* Start from the root tuple */ offnum = rootoffnum; /* while not end of the chain */ for (;;) { ItemId lp; bool tupdead, recent_dead; /* Some sanity checks */ if (offnum < FirstOffsetNumber || offnum > maxoff) break; /* If item is already processed, stop --- it must not be same chain */ if (prstate->marked[offnum]) break; lp = PageGetItemId(dp, offnum); /* Unused item obviously isn't part of the chain */ if (!ItemIdIsUsed(lp)) break; /* * If we are looking at the redirected root line pointer, jump to the * first normal tuple in the chain. If we find a redirect somewhere * else, stop --- it must not be same chain. */ if (ItemIdIsRedirected(lp)) { if (nchain > 0) break; /* not at start of chain */ chainitems[nchain++] = offnum; offnum = ItemIdGetRedirect(rootlp); continue; } /* * Likewise, a dead item pointer can't be part of the chain. (We * already eliminated the case of dead root tuple outside this * function.) */ if (ItemIdIsDead(lp)) break; Assert(ItemIdIsNormal(lp)); htup = (HeapTupleHeader) PageGetItem(dp, lp); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) break; /* * OK, this tuple is indeed a member of the chain. */ chainitems[nchain++] = offnum; /* * Check tuple's visibility status. */ tupdead = recent_dead = false; switch (HeapTupleSatisfiesVacuum(relation, htup, OldestXmin, buffer)) { case HEAPTUPLE_DEAD: tupdead = true; break; case HEAPTUPLE_RECENTLY_DEAD: recent_dead = true; /* * This tuple may soon become DEAD. Update the hint field so * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, HeapTupleHeaderGetXmax(htup)); break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* * This tuple may soon become DEAD. Update the hint field so * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, HeapTupleHeaderGetXmax(htup)); break; case HEAPTUPLE_LIVE: case HEAPTUPLE_INSERT_IN_PROGRESS: /* * If we wanted to optimize for aborts, we might consider * marking the page prunable when we see INSERT_IN_PROGRESS. * But we don't. See related decisions about when to mark the * page prunable in heapam.c. */ break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); break; } /* * Remember the last DEAD tuple seen. We will advance past * RECENTLY_DEAD tuples just in case there's a DEAD one after them; * but we can't advance past anything else. (XXX is it really worth * continuing to scan beyond RECENTLY_DEAD? The case where we will * find another DEAD tuple is a fairly unusual corner case.) */ if (tupdead) latestdead = offnum; else if (!recent_dead) break; /* * If the tuple is not HOT-updated, then we are at the end of this * HOT-update chain. */ if (!HeapTupleHeaderIsHotUpdated(htup)) break; /* * Advance to next chain member. */ Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); priorXmax = HeapTupleHeaderGetXmax(htup); } /* * If we found a DEAD tuple in the chain, adjust the HOT chain so that all * the DEAD tuples at the start of the chain are removed and the root line * pointer is appropriately redirected. */ if (OffsetNumberIsValid(latestdead)) { /* * Mark as unused each intermediate item that we are able to remove * from the chain. * * When the previous item is the last dead tuple seen, we are at the * right candidate for redirection. */ for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++) { heap_prune_record_unused(prstate, chainitems[i]); ndeleted++; } /* * If the root entry had been a normal tuple, we are deleting it, so * count it in the result. But changing a redirect (even to DEAD * state) doesn't count. */ if (ItemIdIsNormal(rootlp)) ndeleted++; /* * If the DEAD tuple is at the end of the chain, the entire chain is * dead and the root line pointer can be marked dead. Otherwise just * redirect the root to the correct chain member. */ if (i >= nchain) heap_prune_record_dead(prstate, rootoffnum); else { heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]); /* If the redirection will be a move, need more processing */ if (redirect_move) redirect_target = chainitems[i]; } } else if (nchain < 2 && ItemIdIsRedirected(rootlp)) { /* * We found a redirect item that doesn't point to a valid follow-on * item. This can happen if the loop in heap_page_prune caused us to * visit the dead successor of a redirect item before visiting the * redirect item. We can clean up by setting the redirect item to * DEAD state. */ heap_prune_record_dead(prstate, rootoffnum); } else if (redirect_move && ItemIdIsRedirected(rootlp)) { /* * If we desire to eliminate LP_REDIRECT items by moving tuples, make * a redirection entry for each redirected root item; this will cause * heap_page_prune_execute to actually do the move. (We get here only * when there are no DEAD tuples in the chain; otherwise the * redirection entry was made above.) */ heap_prune_record_redirect(prstate, rootoffnum, chainitems[1]); redirect_target = chainitems[1]; } /* * If we are going to implement a redirect by moving tuples, we have to * issue a cache invalidation against the redirection target tuple, * because its CTID will be effectively changed by the move. Note that * CacheInvalidateHeapTuple only queues the request, it doesn't send it; * if we fail before reaching EndNonTransactionalInvalidation, nothing * happens and no harm is done. */ if (OffsetNumberIsValid(redirect_target)) { ItemId firstlp = PageGetItemId(dp, redirect_target); HeapTupleData firsttup; Assert(ItemIdIsNormal(firstlp)); /* Set up firsttup to reference the tuple at its existing CTID */ firsttup.t_data = (HeapTupleHeader) PageGetItem(dp, firstlp); firsttup.t_len = ItemIdGetLength(firstlp); ItemPointerSet(&firsttup.t_self, BufferGetBlockNumber(buffer), redirect_target); CacheInvalidateHeapTuple(relation, &firsttup); } return ndeleted; }
/* * lazy_scan_heap() -- scan an open heap relation * * This routine sets commit status bits, builds lists of dead tuples * and pages with free space, and calculates statistics on the number * of live tuples in the heap. When done, or when we run low on space * for dead-tuple TIDs, invoke vacuuming of indexes and heap. * * If there are no indexes then we just vacuum each dirty page as we * process it, since there's no point in gathering many tuples. */ static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool scan_all) { BlockNumber nblocks, blkno; HeapTupleData tuple; char *relname; BlockNumber empty_pages, vacuumed_pages; double num_tuples, tups_vacuumed, nkeep, nunused; IndexBulkDeleteResult **indstats; int i; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; BlockNumber next_not_all_visible_block; bool skipping_all_visible_blocks; pg_rusage_init(&ru0); relname = RelationGetRelationName(onerel); ereport(elevel, (errmsg("vacuuming \"%s.%s\"", get_namespace_name(RelationGetNamespace(onerel)), relname))); empty_pages = vacuumed_pages = 0; num_tuples = tups_vacuumed = nkeep = nunused = 0; indstats = (IndexBulkDeleteResult **) palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); nblocks = RelationGetNumberOfBlocks(onerel); vacrelstats->rel_pages = nblocks; vacrelstats->scanned_pages = 0; vacrelstats->nonempty_pages = 0; vacrelstats->latestRemovedXid = InvalidTransactionId; lazy_space_alloc(vacrelstats, nblocks); /* * We want to skip pages that don't require vacuuming according to the * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD * consecutive pages. Since we're reading sequentially, the OS should be * doing readahead for us, so there's no gain in skipping a page now and * then; that's likely to disable readahead and so be counterproductive. * Also, skipping even a single page means that we can't update * relfrozenxid, so we only want to do it if we can skip a goodly number * of pages. * * Before entering the main loop, establish the invariant that * next_not_all_visible_block is the next block number >= blkno that's not * all-visible according to the visibility map, or nblocks if there's no * such block. Also, we set up the skipping_all_visible_blocks flag, * which is needed because we need hysteresis in the decision: once we've * started skipping blocks, we may as well skip everything up to the next * not-all-visible block. * * Note: if scan_all is true, we won't actually skip any pages; but we * maintain next_not_all_visible_block anyway, so as to set up the * all_visible_according_to_vm flag correctly for each page. */ for (next_not_all_visible_block = 0; next_not_all_visible_block < nblocks; next_not_all_visible_block++) { if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer)) break; vacuum_delay_point(); } if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD) skipping_all_visible_blocks = true; else skipping_all_visible_blocks = false; for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; Page page; OffsetNumber offnum, maxoff; bool tupgone, hastup; int prev_dead_count; OffsetNumber frozen[MaxOffsetNumber]; int nfrozen; Size freespace; bool all_visible_according_to_vm; bool all_visible; bool has_dead_tuples; if (blkno == next_not_all_visible_block) { /* Time to advance next_not_all_visible_block */ for (next_not_all_visible_block++; next_not_all_visible_block < nblocks; next_not_all_visible_block++) { if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer)) break; vacuum_delay_point(); } /* * We know we can't skip the current block. But set up * skipping_all_visible_blocks to do the right thing at the * following blocks. */ if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD) skipping_all_visible_blocks = true; else skipping_all_visible_blocks = false; all_visible_according_to_vm = false; } else { /* Current block is all-visible */ if (skipping_all_visible_blocks && !scan_all) continue; all_visible_according_to_vm = true; } vacuum_delay_point(); vacrelstats->scanned_pages++; /* * If we are close to overrunning the available space for dead-tuple * TIDs, pause and do a cycle of vacuuming before we tackle this page. */ if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage && vacrelstats->num_dead_tuples > 0) { /* Log cleanup info before we touch indexes */ vacuum_log_cleanup_info(onerel, vacrelstats); /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); /* * Forget the now-vacuumed tuples, and press on, but be careful * not to reset latestRemovedXid since we want that value to be * valid. */ vacrelstats->num_dead_tuples = 0; vacrelstats->num_index_scans++; } buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); /* We need buffer cleanup lock so that we can prune HOT chains. */ LockBufferForCleanup(buf); page = BufferGetPage(buf); if (PageIsNew(page)) { /* * An all-zeroes page could be left over if a backend extends the * relation but crashes before initializing the page. Reclaim such * pages for use. * * We have to be careful here because we could be looking at a * page that someone has just added to the relation and not yet * been able to initialize (see RelationGetBufferForTuple). To * protect against that, release the buffer lock, grab the * relation extension lock momentarily, and re-lock the buffer. If * the page is still uninitialized by then, it must be left over * from a crashed backend, and we can initialize it. * * We don't really need the relation lock when this is a new or * temp relation, but it's probably not worth the code space to * check that, since this surely isn't a critical path. * * Note: the comparable code in vacuum.c need not worry because * it's got exclusive lock on the whole relation. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); LockRelationForExtension(onerel, ExclusiveLock); UnlockRelationForExtension(onerel, ExclusiveLock); LockBufferForCleanup(buf); if (PageIsNew(page)) { ereport(WARNING, (errmsg("relation \"%s\" page %u is uninitialized --- fixing", relname, blkno))); PageInit(page, BufferGetPageSize(buf), 0); empty_pages++; } freespace = PageGetHeapFreeSpace(page); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, blkno, freespace); continue; } if (PageIsEmpty(page)) { empty_pages++; freespace = PageGetHeapFreeSpace(page); if (!PageIsAllVisible(page)) { PageSetAllVisible(page); SetBufferCommitInfoNeedsSave(buf); } LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* Update the visibility map */ if (!all_visible_according_to_vm) { visibilitymap_pin(onerel, blkno, &vmbuffer); LockBuffer(buf, BUFFER_LOCK_SHARE); if (PageIsAllVisible(page)) visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } ReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, blkno, freespace); continue; } /* * Prune all HOT-update chains in this page. * * We count tuples removed by the pruning step as removed by VACUUM. */ tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false, &vacrelstats->latestRemovedXid); /* * Now scan the page to collect vacuumable items and check for tuples * requiring freezing. */ all_visible = true; has_dead_tuples = false; nfrozen = 0; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId itemid; itemid = PageGetItemId(page, offnum); /* Unused items require no processing, but we count 'em */ if (!ItemIdIsUsed(itemid)) { nunused += 1; continue; } /* Redirect items mustn't be touched */ if (ItemIdIsRedirected(itemid)) { hastup = true; /* this page won't be truncatable */ continue; } ItemPointerSet(&(tuple.t_self), blkno, offnum); /* * DEAD item pointers are to be vacuumed normally; but we don't * count them in tups_vacuumed, else we'd be double-counting (at * least in the common case where heap_page_prune() just freed up * a non-HOT tuple). */ if (ItemIdIsDead(itemid)) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); all_visible = false; continue; } Assert(ItemIdIsNormal(itemid)); tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tupgone = false; switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) { case HEAPTUPLE_DEAD: /* * Ordinarily, DEAD tuples would have been removed by * heap_page_prune(), but it's possible that the tuple * state changed since heap_page_prune() looked. In * particular an INSERT_IN_PROGRESS tuple could have * changed to DEAD if the inserter aborted. So this * cannot be considered an error condition. * * If the tuple is HOT-updated then it must only be * removed by a prune operation; so we keep it just as if * it were RECENTLY_DEAD. Also, if it's a heap-only * tuple, we choose to keep it, because it'll be a lot * cheaper to get rid of it in the next pruning pass than * to treat it like an indexed tuple. */ if (HeapTupleIsHotUpdated(&tuple) || HeapTupleIsHeapOnly(&tuple)) nkeep += 1; else tupgone = true; /* we can delete the tuple */ all_visible = false; break; case HEAPTUPLE_LIVE: /* Tuple is good --- but let's do some validity checks */ if (onerel->rd_rel->relhasoids && !OidIsValid(HeapTupleGetOid(&tuple))) elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", relname, blkno, offnum); /* * Is the tuple definitely visible to all transactions? * * NB: Like with per-tuple hint bits, we can't set the * PD_ALL_VISIBLE flag if the inserter committed * asynchronously. See SetHintBits for more info. Check * that the HEAP_XMIN_COMMITTED hint bit is set because of * that. */ if (all_visible) { TransactionId xmin; if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)) { all_visible = false; break; } /* * The inserter definitely committed. But is it old * enough that everyone sees it as committed? */ xmin = HeapTupleHeaderGetXmin(tuple.t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) { all_visible = false; break; } } break; case HEAPTUPLE_RECENTLY_DEAD: /* * If tuple is recently deleted then we must not remove it * from relation. */ nkeep += 1; all_visible = false; break; case HEAPTUPLE_INSERT_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ all_visible = false; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ all_visible = false; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); break; } if (tupgone) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, &vacrelstats->latestRemovedXid); tups_vacuumed += 1; has_dead_tuples = true; } else { num_tuples += 1; hastup = true; /* * Each non-removable tuple must be checked to see if it needs * freezing. Note we already have exclusive buffer lock. */ if (heap_freeze_tuple(tuple.t_data, FreezeLimit, InvalidBuffer)) frozen[nfrozen++] = offnum; } } /* scan along page */ /* * If we froze any tuples, mark the buffer dirty, and write a WAL * record recording the changes. We must log the changes to be * crash-safe against future truncation of CLOG. */ if (nfrozen > 0) { MarkBufferDirty(buf); if (RelationNeedsWAL(onerel)) { XLogRecPtr recptr; recptr = log_heap_freeze(onerel, buf, FreezeLimit, frozen, nfrozen); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } } /* * If there are no indexes then we can vacuum the page right now * instead of doing a second scan. */ if (nindexes == 0 && vacrelstats->num_dead_tuples > 0) { /* Remove tuples from heap */ lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats); /* * Forget the now-vacuumed tuples, and press on, but be careful * not to reset latestRemovedXid since we want that value to be * valid. */ vacrelstats->num_dead_tuples = 0; vacuumed_pages++; } freespace = PageGetHeapFreeSpace(page); /* Update the all-visible flag on the page */ if (!PageIsAllVisible(page) && all_visible) { PageSetAllVisible(page); SetBufferCommitInfoNeedsSave(buf); } /* * It's possible for the value returned by GetOldestXmin() to move * backwards, so it's not wrong for us to see tuples that appear to * not be visible to everyone yet, while PD_ALL_VISIBLE is already * set. The real safe xmin value never moves backwards, but * GetOldestXmin() is conservative and sometimes returns a value * that's unnecessarily small, so if we see that contradiction it just * means that the tuples that we think are not visible to everyone yet * actually are, and the PD_ALL_VISIBLE flag is correct. * * There should never be dead tuples on a page with PD_ALL_VISIBLE * set, however. */ else if (PageIsAllVisible(page) && has_dead_tuples) { elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", relname, blkno); PageClearAllVisible(page); SetBufferCommitInfoNeedsSave(buf); /* * Normally, we would drop the lock on the heap page before * updating the visibility map, but since this case shouldn't * happen anyway, don't worry about that. */ visibilitymap_clear(onerel, blkno); } LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* Update the visibility map */ if (!all_visible_according_to_vm && all_visible) { visibilitymap_pin(onerel, blkno, &vmbuffer); LockBuffer(buf, BUFFER_LOCK_SHARE); if (PageIsAllVisible(page)) visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } ReleaseBuffer(buf); /* Remember the location of the last page with nonremovable tuples */ if (hastup) vacrelstats->nonempty_pages = blkno + 1; /* * If we remembered any tuples for deletion, then the page will be * visited again by lazy_vacuum_heap, which will compute and record * its post-compaction free space. If not, then we're done with this * page, so remember its free space as-is. (This path will always be * taken if there are no indexes.) */ if (vacrelstats->num_dead_tuples == prev_dead_count) RecordPageWithFreeSpace(onerel, blkno, freespace); } /* save stats for use later */ vacrelstats->scanned_tuples = num_tuples; vacrelstats->tuples_deleted = tups_vacuumed; /* now we can compute the new value for pg_class.reltuples */ vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false, nblocks, vacrelstats->scanned_pages, num_tuples); /* If any tuples need to be deleted, perform final vacuum cycle */ /* XXX put a threshold on min number of tuples here? */ if (vacrelstats->num_dead_tuples > 0) { /* Log cleanup info before we touch indexes */ vacuum_log_cleanup_info(onerel, vacrelstats); /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); vacrelstats->num_index_scans++; } /* Release the pin on the visibility map page */ if (BufferIsValid(vmbuffer)) { ReleaseBuffer(vmbuffer); vmbuffer = InvalidBuffer; } /* Do post-vacuum cleanup and statistics update for each index */ for (i = 0; i < nindexes; i++) lazy_cleanup_index(Irel[i], indstats[i], vacrelstats); /* If no indexes, make log report that lazy_vacuum_heap would've made */ if (vacuumed_pages) ereport(elevel, (errmsg("\"%s\": removed %.0f row versions in %u pages", RelationGetRelationName(onerel), tups_vacuumed, vacuumed_pages))); ereport(elevel, (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages", RelationGetRelationName(onerel), tups_vacuumed, num_tuples, vacrelstats->scanned_pages, nblocks), errdetail("%.0f dead row versions cannot be removed yet.\n" "There were %.0f unused item pointers.\n" "%u pages are entirely empty.\n" "%s.", nkeep, nunused, empty_pages, pg_rusage_show(&ru0)))); }
/* * For all items in this page, find their respective root line pointers. * If item k is part of a HOT-chain with root at item j, then we set * root_offsets[k - 1] = j. * * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries. * We zero out all unused entries. * * The function must be called with at least share lock on the buffer, to * prevent concurrent prune operations. * * Note: The information collected here is valid only as long as the caller * holds a pin on the buffer. Once pin is released, a tuple might be pruned * and reused by a completely unrelated tuple. */ void heap_get_root_tuples(Page page, OffsetNumber *root_offsets) { OffsetNumber offnum, maxoff; MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber)); maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId lp = PageGetItemId(page, offnum); HeapTupleHeader htup; OffsetNumber nextoffnum; TransactionId priorXmax; /* skip unused and dead items */ if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) continue; if (ItemIdIsNormal(lp)) { htup = (HeapTupleHeader) PageGetItem(page, lp); /* * Check if this tuple is part of a HOT-chain rooted at some other * tuple. If so, skip it for now; we'll process it when we find * its root. */ if (HeapTupleHeaderIsHeapOnly(htup)) continue; /* * This is either a plain tuple or the root of a HOT-chain. * Remember it in the mapping. */ root_offsets[offnum - 1] = offnum; /* If it's not the start of a HOT-chain, we're done with it */ if (!HeapTupleHeaderIsHotUpdated(htup)) continue; /* Set up to scan the HOT-chain */ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); priorXmax = HeapTupleHeaderGetXmax(htup); } else { /* Must be a redirect item. We do not set its root_offsets entry */ Assert(ItemIdIsRedirected(lp)); /* Set up to scan the HOT-chain */ nextoffnum = ItemIdGetRedirect(lp); priorXmax = InvalidTransactionId; } /* * Now follow the HOT-chain and collect other tuples in the chain. * * Note: Even though this is a nested loop, the complexity of the * function is O(N) because a tuple in the page should be visited not * more than twice, once in the outer loop and once in HOT-chain * chases. */ for (;;) { lp = PageGetItemId(page, nextoffnum); /* Check for broken chains */ if (!ItemIdIsNormal(lp)) break; htup = (HeapTupleHeader) PageGetItem(page, lp); if (TransactionIdIsValid(priorXmax) && !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup))) break; /* Remember the root line pointer for this item */ root_offsets[nextoffnum - 1] = offnum; /* Advance to next chain member, if any */ if (!HeapTupleHeaderIsHotUpdated(htup)) break; nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); priorXmax = HeapTupleHeaderGetXmax(htup); } } }