/* ---------------- * set relhasindex of relation's pg_class entry * * If isprimary is TRUE, we are defining a primary index, so also set * relhaspkey to TRUE. Otherwise, leave relhaspkey alone. * * If reltoastidxid is not InvalidOid, also set reltoastidxid to that value. * This is only used for TOAST relations. * * NOTE: an important side-effect of this operation is that an SI invalidation * message is sent out to all backends --- including me --- causing relcache * entries to be flushed or updated with the new hasindex data. This must * happen even if we find that no change is needed in the pg_class row. * ---------------- */ void setRelhasindex(Oid relid, bool hasindex, bool isprimary, Oid reltoastidxid) { Relation pg_class; HeapTuple tuple; Form_pg_class classtuple; bool dirty = false; HeapScanDesc pg_class_scan = NULL; /* * Find the tuple to update in pg_class. In bootstrap mode we can't * use heap_update, so cheat and overwrite the tuple in-place. In * normal processing, make a copy to scribble on. */ pg_class = heap_openr(RelationRelationName, RowExclusiveLock); if (!IsBootstrapProcessingMode()) { tuple = SearchSysCacheCopy(RELOID, ObjectIdGetDatum(relid), 0, 0, 0); } else { ScanKeyData key[1]; ScanKeyEntryInitialize(&key[0], 0, ObjectIdAttributeNumber, F_OIDEQ, ObjectIdGetDatum(relid)); pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key); tuple = heap_getnext(pg_class_scan, ForwardScanDirection); } if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for relation %u", relid); classtuple = (Form_pg_class) GETSTRUCT(tuple); /* Apply required updates */ if (pg_class_scan) LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_EXCLUSIVE); if (classtuple->relhasindex != hasindex) { classtuple->relhasindex = hasindex; dirty = true; } if (isprimary) { if (!classtuple->relhaspkey) { classtuple->relhaspkey = true; dirty = true; } } if (OidIsValid(reltoastidxid)) { Assert(classtuple->relkind == RELKIND_TOASTVALUE); if (classtuple->reltoastidxid != reltoastidxid) { classtuple->reltoastidxid = reltoastidxid; dirty = true; } } if (pg_class_scan) LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK); if (pg_class_scan) { /* Write the modified tuple in-place */ WriteNoReleaseBuffer(pg_class_scan->rs_cbuf); /* Send out shared cache inval if necessary */ if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(pg_class, tuple); BufferSync(); } else if (dirty) { simple_heap_update(pg_class, &tuple->t_self, tuple); /* Keep the catalog indexes up to date */ CatalogUpdateIndexes(pg_class, tuple); } else { /* no need to change tuple, but force relcache rebuild anyway */ CacheInvalidateRelcache(relid); } if (!pg_class_scan) heap_freetuple(tuple); else heap_endscan(pg_class_scan); heap_close(pg_class, RowExclusiveLock); }
/* * Prune specified item pointer or a HOT chain originating at that item. * * If the item is an index-referenced tuple (i.e. not a heap-only tuple), * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple. * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really * DEAD, the OldestXmin test is just too coarse to detect it. * * The root line pointer is redirected to the tuple immediately after the * latest DEAD tuple. If all tuples in the chain are DEAD, the root line * pointer is marked LP_DEAD. (This includes the case of a DEAD simple * tuple, which we treat as a chain of length 1.) * * OldestXmin is the cutoff XID used to identify dead tuples. * * We don't actually change the page here, except perhaps for hint-bit updates * caused by HeapTupleSatisfiesVacuum. We just add entries to the arrays in * prstate showing the changes to be made. Items to be redirected are added * to the redirected[] array (two entries per redirection); items to be set to * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED * state are added to nowunused[]. * * If redirect_move is true, we intend to get rid of redirecting line pointers, * not just make redirection entries. * * Returns the number of tuples (to be) deleted from the page. */ static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, PruneState *prstate, bool redirect_move) { int ndeleted = 0; Page dp = (Page) BufferGetPage(buffer); TransactionId priorXmax = InvalidTransactionId; ItemId rootlp; HeapTupleHeader htup; OffsetNumber latestdead = InvalidOffsetNumber, redirect_target = InvalidOffsetNumber, maxoff = PageGetMaxOffsetNumber(dp), offnum; OffsetNumber chainitems[MaxHeapTuplesPerPage]; int nchain = 0, i; rootlp = PageGetItemId(dp, rootoffnum); /* * If it's a heap-only tuple, then it is not the start of a HOT chain. */ if (ItemIdIsNormal(rootlp)) { htup = (HeapTupleHeader) PageGetItem(dp, rootlp); if (HeapTupleHeaderIsHeapOnly(htup)) { /* * If the tuple is DEAD and doesn't chain to anything else, mark * it unused immediately. (If it does chain, we can only remove * it as part of pruning its chain.) * * We need this primarily to handle aborted HOT updates, that is, * XMIN_INVALID heap-only tuples. Those might not be linked to by * any chain, since the parent tuple might be re-updated before * any pruning occurs. So we have to be able to reap them * separately from chain-pruning. (Note that * HeapTupleHeaderIsHotUpdated will never return true for an * XMIN_INVALID tuple, so this code will work even when there were * sequential updates within the aborted transaction.) * * Note that we might first arrive at a dead heap-only tuple * either here or while following a chain below. Whichever path * gets there first will mark the tuple unused. */ if (HeapTupleSatisfiesVacuum(relation, htup, OldestXmin, buffer) == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); ndeleted++; } /* Nothing more to do */ return ndeleted; } } /* Start from the root tuple */ offnum = rootoffnum; /* while not end of the chain */ for (;;) { ItemId lp; bool tupdead, recent_dead; /* Some sanity checks */ if (offnum < FirstOffsetNumber || offnum > maxoff) break; /* If item is already processed, stop --- it must not be same chain */ if (prstate->marked[offnum]) break; lp = PageGetItemId(dp, offnum); /* Unused item obviously isn't part of the chain */ if (!ItemIdIsUsed(lp)) break; /* * If we are looking at the redirected root line pointer, jump to the * first normal tuple in the chain. If we find a redirect somewhere * else, stop --- it must not be same chain. */ if (ItemIdIsRedirected(lp)) { if (nchain > 0) break; /* not at start of chain */ chainitems[nchain++] = offnum; offnum = ItemIdGetRedirect(rootlp); continue; } /* * Likewise, a dead item pointer can't be part of the chain. (We * already eliminated the case of dead root tuple outside this * function.) */ if (ItemIdIsDead(lp)) break; Assert(ItemIdIsNormal(lp)); htup = (HeapTupleHeader) PageGetItem(dp, lp); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) break; /* * OK, this tuple is indeed a member of the chain. */ chainitems[nchain++] = offnum; /* * Check tuple's visibility status. */ tupdead = recent_dead = false; switch (HeapTupleSatisfiesVacuum(relation, htup, OldestXmin, buffer)) { case HEAPTUPLE_DEAD: tupdead = true; break; case HEAPTUPLE_RECENTLY_DEAD: recent_dead = true; /* * This tuple may soon become DEAD. Update the hint field so * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, HeapTupleHeaderGetXmax(htup)); break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* * This tuple may soon become DEAD. Update the hint field so * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, HeapTupleHeaderGetXmax(htup)); break; case HEAPTUPLE_LIVE: case HEAPTUPLE_INSERT_IN_PROGRESS: /* * If we wanted to optimize for aborts, we might consider * marking the page prunable when we see INSERT_IN_PROGRESS. * But we don't. See related decisions about when to mark the * page prunable in heapam.c. */ break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); break; } /* * Remember the last DEAD tuple seen. We will advance past * RECENTLY_DEAD tuples just in case there's a DEAD one after them; * but we can't advance past anything else. (XXX is it really worth * continuing to scan beyond RECENTLY_DEAD? The case where we will * find another DEAD tuple is a fairly unusual corner case.) */ if (tupdead) latestdead = offnum; else if (!recent_dead) break; /* * If the tuple is not HOT-updated, then we are at the end of this * HOT-update chain. */ if (!HeapTupleHeaderIsHotUpdated(htup)) break; /* * Advance to next chain member. */ Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); priorXmax = HeapTupleHeaderGetXmax(htup); } /* * If we found a DEAD tuple in the chain, adjust the HOT chain so that all * the DEAD tuples at the start of the chain are removed and the root line * pointer is appropriately redirected. */ if (OffsetNumberIsValid(latestdead)) { /* * Mark as unused each intermediate item that we are able to remove * from the chain. * * When the previous item is the last dead tuple seen, we are at the * right candidate for redirection. */ for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++) { heap_prune_record_unused(prstate, chainitems[i]); ndeleted++; } /* * If the root entry had been a normal tuple, we are deleting it, so * count it in the result. But changing a redirect (even to DEAD * state) doesn't count. */ if (ItemIdIsNormal(rootlp)) ndeleted++; /* * If the DEAD tuple is at the end of the chain, the entire chain is * dead and the root line pointer can be marked dead. Otherwise just * redirect the root to the correct chain member. */ if (i >= nchain) heap_prune_record_dead(prstate, rootoffnum); else { heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]); /* If the redirection will be a move, need more processing */ if (redirect_move) redirect_target = chainitems[i]; } } else if (nchain < 2 && ItemIdIsRedirected(rootlp)) { /* * We found a redirect item that doesn't point to a valid follow-on * item. This can happen if the loop in heap_page_prune caused us to * visit the dead successor of a redirect item before visiting the * redirect item. We can clean up by setting the redirect item to * DEAD state. */ heap_prune_record_dead(prstate, rootoffnum); } else if (redirect_move && ItemIdIsRedirected(rootlp)) { /* * If we desire to eliminate LP_REDIRECT items by moving tuples, make * a redirection entry for each redirected root item; this will cause * heap_page_prune_execute to actually do the move. (We get here only * when there are no DEAD tuples in the chain; otherwise the * redirection entry was made above.) */ heap_prune_record_redirect(prstate, rootoffnum, chainitems[1]); redirect_target = chainitems[1]; } /* * If we are going to implement a redirect by moving tuples, we have to * issue a cache invalidation against the redirection target tuple, * because its CTID will be effectively changed by the move. Note that * CacheInvalidateHeapTuple only queues the request, it doesn't send it; * if we fail before reaching EndNonTransactionalInvalidation, nothing * happens and no harm is done. */ if (OffsetNumberIsValid(redirect_target)) { ItemId firstlp = PageGetItemId(dp, redirect_target); HeapTupleData firsttup; Assert(ItemIdIsNormal(firstlp)); /* Set up firsttup to reference the tuple at its existing CTID */ firsttup.t_data = (HeapTupleHeader) PageGetItem(dp, firstlp); firsttup.t_len = ItemIdGetLength(firstlp); ItemPointerSet(&firsttup.t_self, BufferGetBlockNumber(buffer), redirect_target); CacheInvalidateHeapTuple(relation, &firsttup); } return ndeleted; }
/* ---------------- * UpdateStats * * Update pg_class' relpages and reltuples statistics for the given relation * (which can be either a table or an index). Note that this is not used * in the context of VACUUM. * ---------------- */ void UpdateStats(Oid relid, double reltuples) { Relation whichRel; Relation pg_class; HeapTuple tuple; BlockNumber relpages; Form_pg_class rd_rel; HeapScanDesc pg_class_scan = NULL; bool in_place_upd; /* * This routine handles updates for both the heap and index relation * statistics. In order to guarantee that we're able to *see* the * index relation tuple, we bump the command counter id here. The * index relation tuple was created in the current transaction. */ CommandCounterIncrement(); /* * CommandCounterIncrement() flushes invalid cache entries, including * those for the heap and index relations for which we're updating * statistics. Now that the cache is flushed, it's safe to open the * relation again. We need the relation open in order to figure out * how many blocks it contains. */ /* * Grabbing lock here is probably redundant ... */ whichRel = relation_open(relid, ShareLock); /* * Find the tuple to update in pg_class. Normally we make a copy of * the tuple using the syscache, modify it, and apply heap_update. * But in bootstrap mode we can't use heap_update, so we cheat and * overwrite the tuple in-place. * * We also must cheat if reindexing pg_class itself, because the * target index may presently not be part of the set of indexes that * CatalogUpdateIndexes would update (see reindex_relation). In this * case the stats updates will not be WAL-logged and so could be lost * in a crash. This seems OK considering VACUUM does the same thing. */ pg_class = heap_openr(RelationRelationName, RowExclusiveLock); in_place_upd = IsBootstrapProcessingMode() || ReindexIsProcessingHeap(RelationGetRelid(pg_class)); if (!in_place_upd) { tuple = SearchSysCacheCopy(RELOID, ObjectIdGetDatum(relid), 0, 0, 0); } else { ScanKeyData key[1]; ScanKeyEntryInitialize(&key[0], 0, ObjectIdAttributeNumber, F_OIDEQ, ObjectIdGetDatum(relid)); pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key); tuple = heap_getnext(pg_class_scan, ForwardScanDirection); } if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for relation %u", relid); rd_rel = (Form_pg_class) GETSTRUCT(tuple); /* * Figure values to insert. * * If we found zero tuples in the scan, do NOT believe it; instead put a * bogus estimate into the statistics fields. Otherwise, the common * pattern "CREATE TABLE; CREATE INDEX; insert data" leaves the table * with zero size statistics until a VACUUM is done. The optimizer * will generate very bad plans if the stats claim the table is empty * when it is actually sizable. See also CREATE TABLE in heap.c. * * Note: this path is also taken during bootstrap, because bootstrap.c * passes reltuples = 0 after loading a table. We have to estimate * some number for reltuples based on the actual number of pages. */ relpages = RelationGetNumberOfBlocks(whichRel); if (reltuples == 0) { if (relpages == 0) { /* Bogus defaults for a virgin table, same as heap.c */ reltuples = 1000; relpages = 10; } else if (whichRel->rd_rel->relkind == RELKIND_INDEX && relpages <= 2) { /* Empty index, leave bogus defaults in place */ reltuples = 1000; } else reltuples = ((double) relpages) * NTUPLES_PER_PAGE(whichRel->rd_rel->relnatts); } /* * Update statistics in pg_class, if they changed. (Avoiding an * unnecessary update is not just a tiny performance improvement; it * also reduces the window wherein concurrent CREATE INDEX commands * may conflict.) */ if (rd_rel->relpages != (int32) relpages || rd_rel->reltuples != (float4) reltuples) { if (in_place_upd) { /* Bootstrap or reindex case: overwrite fields in place. */ LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_EXCLUSIVE); rd_rel->relpages = (int32) relpages; rd_rel->reltuples = (float4) reltuples; LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK); WriteNoReleaseBuffer(pg_class_scan->rs_cbuf); if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(pg_class, tuple); } else { /* During normal processing, must work harder. */ rd_rel->relpages = (int32) relpages; rd_rel->reltuples = (float4) reltuples; simple_heap_update(pg_class, &tuple->t_self, tuple); CatalogUpdateIndexes(pg_class, tuple); } } if (!pg_class_scan) heap_freetuple(tuple); else heap_endscan(pg_class_scan); /* * We shouldn't have to do this, but we do... Modify the reldesc in * place with the new values so that the cache contains the latest * copy. (XXX is this really still necessary? The relcache will get * fixed at next CommandCounterIncrement, so why bother here?) */ whichRel->rd_rel->relpages = (int32) relpages; whichRel->rd_rel->reltuples = (float4) reltuples; heap_close(pg_class, RowExclusiveLock); relation_close(whichRel, NoLock); }