Datum gistvacuumcleanup(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); GistBulkDeleteResult *stats = (GistBulkDeleteResult *) PG_GETARG_POINTER(1); Relation rel = info->index; BlockNumber npages, blkno; BlockNumber totFreePages, nFreePages, *freePages, maxFreePages; BlockNumber lastBlock = GIST_ROOT_BLKNO, lastFilledBlock = GIST_ROOT_BLKNO; bool needLock; /* Set up all-zero stats if gistbulkdelete wasn't called */ if (stats == NULL) { stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult)); /* use heap's tuple count */ Assert(info->num_heap_tuples >= 0); stats->std.num_index_tuples = info->num_heap_tuples; /* * XXX the above is wrong if index is partial. Would it be OK to just * return NULL, or is there work we must do below? */ } /* gistVacuumUpdate may cause hard work */ if (info->vacuum_full) { GistVacuum gv; ArrayTuple res; /* note: vacuum.c already acquired AccessExclusiveLock on index */ gv.index = rel; initGISTstate(&(gv.giststate), rel); gv.opCtx = createTempGistContext(); gv.result = stats; gv.strategy = info->strategy; /* walk through the entire index for update tuples */ res = gistVacuumUpdate(&gv, GIST_ROOT_BLKNO, false); /* cleanup */ if (res.itup) { int i; for (i = 0; i < res.ituplen; i++) pfree(res.itup[i]); pfree(res.itup); } freeGISTstate(&(gv.giststate)); MemoryContextDelete(gv.opCtx); } else if (stats->needFullVacuum) ereport(NOTICE, (errmsg("index \"%s\" needs VACUUM FULL or REINDEX to finish crash recovery", RelationGetRelationName(rel)))); /* * If vacuum full, we already have exclusive lock on the index. Otherwise, * need lock unless it's local to this backend. */ if (info->vacuum_full) needLock = false; else needLock = !RELATION_IS_LOCAL(rel); /* try to find deleted pages */ if (needLock) LockRelationForExtension(rel, ExclusiveLock); npages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); maxFreePages = npages; if (maxFreePages > MaxFSMPages) maxFreePages = MaxFSMPages; totFreePages = nFreePages = 0; freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages); for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); buffer = ReadBufferWithStrategy(rel, blkno, info->strategy); LockBuffer(buffer, GIST_SHARE); page = (Page) BufferGetPage(buffer); if (PageIsNew(page) || GistPageIsDeleted(page)) { if (nFreePages < maxFreePages) freePages[nFreePages++] = blkno; totFreePages++; } else lastFilledBlock = blkno; UnlockReleaseBuffer(buffer); } lastBlock = npages - 1; if (info->vacuum_full && nFreePages > 0) { /* try to truncate index */ int i; for (i = 0; i < nFreePages; i++) if (freePages[i] >= lastFilledBlock) { totFreePages = nFreePages = i; break; } if (lastBlock > lastFilledBlock) RelationTruncate(rel, lastFilledBlock + 1); stats->std.pages_removed = lastBlock - lastFilledBlock; } RecordIndexFreeSpace(&rel->rd_node, totFreePages, nFreePages, freePages); pfree(freePages); /* return statistics */ stats->std.pages_free = totFreePages; if (needLock) LockRelationForExtension(rel, ExclusiveLock); stats->std.num_pages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); PG_RETURN_POINTER(stats); }
/* * btvacuumscan --- scan the index for VACUUMing purposes * * This combines the functions of looking for leaf tuples that are deletable * according to the vacuum callback, looking for empty pages that can be * deleted, and looking for old deleted pages that can be recycled. Both * btbulkdelete and btvacuumcleanup invoke this (the latter only if no * btbulkdelete call occurred). * * The caller is responsible for initially allocating/zeroing a stats struct * and for obtaining a vacuum cycle ID if necessary. */ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; Relation rel = info->index; BTVacState vstate; BlockNumber num_pages; BlockNumber blkno; bool needLock; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; /* * Reset counts that will be incremented during the scan; needed in case * of multiple scans during a single VACUUM command */ stats->num_index_tuples = 0; stats->pages_deleted = 0; /* Set up info to pass down to btvacuumpage */ vstate.info = info; vstate.stats = stats; vstate.callback = callback; vstate.callback_state = callback_state; vstate.cycleid = cycleid; vstate.freePages = NULL; /* temporarily */ vstate.nFreePages = 0; vstate.maxFreePages = 0; vstate.totFreePages = 0; /* Create a temporary memory context to run _bt_pagedel in */ vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext, "_bt_pagedel", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * The outer loop iterates over all index pages except the metapage, in * physical order (we hope the kernel will cooperate in providing * read-ahead for speed). It is critical that we visit all leaf pages, * including ones added after we start the scan, else we might fail to * delete some deletable tuples. Hence, we must repeatedly check the * relation length. We must acquire the relation-extension lock while * doing so to avoid a race condition: if someone else is extending the * relation, there is a window where bufmgr/smgr have created a new * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If * we manage to scan such a page here, we'll improperly assume it can be * recycled. Taking the lock synchronizes things enough to prevent a * problem: either num_pages won't include the new page, or _bt_getbuf * already has write lock on the buffer and it will be fully initialized * before we can examine it. (See also vacuumlazy.c, which has the same * issue.) Also, we need not worry if a page is added immediately after * we look; the page splitting code already has write-lock on the left * page before it adds a right page, so we must already have processed any * tuples due to be moved into such a page. * * We can skip locking for new or temp relations, however, since no one * else could be accessing them. */ needLock = !RELATION_IS_LOCAL(rel); blkno = BTREE_METAPAGE + 1; for (;;) { /* Get the current relation length */ if (needLock) LockRelationForExtension(rel, ExclusiveLock); num_pages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); /* Allocate freePages after we read num_pages the first time */ if (vstate.freePages == NULL) { /* No point in remembering more than MaxFSMPages pages */ vstate.maxFreePages = MaxFSMPages; if ((BlockNumber) vstate.maxFreePages > num_pages) vstate.maxFreePages = (int) num_pages; vstate.freePages = (BlockNumber *) palloc(vstate.maxFreePages * sizeof(BlockNumber)); } /* Quit if we've scanned the whole relation */ if (blkno >= num_pages) break; /* Iterate over pages, then loop back to recheck length */ for (; blkno < num_pages; blkno++) { btvacuumpage(&vstate, blkno, blkno); } } /* * During VACUUM FULL, we truncate off any recyclable pages at the end of * the index. In a normal vacuum it'd be unsafe to do this except by * acquiring exclusive lock on the index and then rechecking all the * pages; doesn't seem worth it. */ if (info->vacuum_full && vstate.nFreePages > 0) { BlockNumber new_pages = num_pages; while (vstate.nFreePages > 0 && vstate.freePages[vstate.nFreePages - 1] == new_pages - 1) { new_pages--; stats->pages_deleted--; vstate.nFreePages--; vstate.totFreePages = vstate.nFreePages; /* can't be more */ } if (new_pages != num_pages) { /* * Okay to truncate. */ RelationTruncate(rel, new_pages, /* markPersistentAsPhysicallyTruncated */ true); /* update statistics */ stats->pages_removed += num_pages - new_pages; num_pages = new_pages; } } /* * Update the shared Free Space Map with the info we now have about free * pages in the index, discarding any old info the map may have. We do not * need to sort the page numbers; they're in order already. */ RecordIndexFreeSpace(&rel->rd_node, vstate.totFreePages, vstate.nFreePages, vstate.freePages); pfree(vstate.freePages); MemoryContextDelete(vstate.pagedelcontext); /* update statistics */ stats->num_pages = num_pages; stats->pages_free = vstate.totFreePages; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; }
Datum ginvacuumcleanup(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_DECLARE; IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); Relation index = info->index; bool needLock; BlockNumber npages, blkno; BlockNumber totFreePages, nFreePages, *freePages, maxFreePages; BlockNumber lastBlock = GIN_ROOT_BLKNO, lastFilledBlock = GIN_ROOT_BLKNO; /* Set up all-zero stats if ginbulkdelete wasn't called */ if (stats == NULL) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* * XXX we always report the heap tuple count as the number of index * entries. This is bogus if the index is partial, but it's real hard to * tell how many distinct heap entries are referenced by a GIN index. */ stats->num_index_tuples = info->num_heap_tuples; /* * If vacuum full, we already have exclusive lock on the index. Otherwise, * need lock unless it's local to this backend. */ if (info->vacuum_full) needLock = false; else needLock = !RELATION_IS_LOCAL(index); if (needLock) LockRelationForExtension(index, ExclusiveLock); npages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); maxFreePages = npages; if (maxFreePages > MaxFSMPages) maxFreePages = MaxFSMPages; totFreePages = nFreePages = 0; freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages); for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); if (GinPageIsDeleted(page)) { if (nFreePages < maxFreePages) freePages[nFreePages++] = blkno; totFreePages++; } else lastFilledBlock = blkno; UnlockReleaseBuffer(buffer); MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- } lastBlock = npages - 1; if (info->vacuum_full && nFreePages > 0) { /* try to truncate index */ int i; for (i = 0; i < nFreePages; i++) if (freePages[i] >= lastFilledBlock) { totFreePages = nFreePages = i; break; } if (lastBlock > lastFilledBlock) RelationTruncate( index, lastFilledBlock + 1, /* markPersistentAsPhysicallyTruncated */ true); stats->pages_removed = lastBlock - lastFilledBlock; } RecordIndexFreeSpace(&index->rd_node, totFreePages, nFreePages, freePages); stats->pages_free = totFreePages; if (needLock) LockRelationForExtension(index, ExclusiveLock); stats->num_pages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); PG_RETURN_POINTER(stats); }