/* * SQL-callable function to clean the insert pending list */ Datum gin_clean_pending_list(PG_FUNCTION_ARGS) { Oid indexoid = PG_GETARG_OID(0); Relation indexRel = index_open(indexoid, AccessShareLock); IndexBulkDeleteResult stats; GinState ginstate; if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("GIN pending list cannot be cleaned up during recovery."))); /* Must be a GIN index */ if (indexRel->rd_rel->relkind != RELKIND_INDEX || indexRel->rd_rel->relam != GIN_AM_OID) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a GIN index", RelationGetRelationName(indexRel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(indexRel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary indexes of other sessions"))); /* User must own the index (comparable to privileges needed for VACUUM) */ if (!pg_class_ownercheck(indexoid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, RelationGetRelationName(indexRel)); memset(&stats, 0, sizeof(stats)); initGinState(&ginstate, indexRel); ginInsertCleanup(&ginstate, true, true, &stats); index_close(indexRel, AccessShareLock); PG_RETURN_INT64((int64) stats.pages_deleted); }
Datum ginvacuumcleanup(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); Relation index = info->index; bool needLock; BlockNumber npages, blkno; BlockNumber totFreePages; GinState ginstate; GinStatsData idxStat; /* * In an autovacuum analyze, we want to clean up pending insertions. * Otherwise, an ANALYZE-only call is a no-op. */ if (info->analyze_only) { if (IsAutoVacuumWorkerProcess()) { initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } PG_RETURN_POINTER(stats); } /* * Set up all-zero stats and cleanup pending inserts if ginbulkdelete * wasn't called */ if (stats == NULL) { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } memset(&idxStat, 0, sizeof(idxStat)); /* * XXX we always report the heap tuple count as the number of index * entries. This is bogus if the index is partial, but it's real hard to * tell how many distinct heap entries are referenced by a GIN index. */ stats->num_index_tuples = info->num_heap_tuples; stats->estimated_count = info->estimated_count; /* * Need lock unless it's local to this backend. */ needLock = !RELATION_IS_LOCAL(index); if (needLock) LockRelationForExtension(index, ExclusiveLock); npages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); totFreePages = 0; for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); if (GinPageIsDeleted(page)) { Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); totFreePages++; } else if (GinPageIsData(page)) { idxStat.nDataPages++; } else if (!GinPageIsList(page)) { idxStat.nEntryPages++; if (GinPageIsLeaf(page)) idxStat.nEntries += PageGetMaxOffsetNumber(page); } UnlockReleaseBuffer(buffer); } /* Update the metapage with accurate page and entry counts */ idxStat.nTotalPages = npages; ginUpdateStats(info->index, &idxStat); /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); stats->pages_free = totFreePages; if (needLock) LockRelationForExtension(index, ExclusiveLock); stats->num_pages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); PG_RETURN_POINTER(stats); }
Datum ginbulkdelete(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2); void *callback_state = (void *) PG_GETARG_POINTER(3); Relation index = info->index; BlockNumber blkno = GIN_ROOT_BLKNO; GinVacuumState gvs; Buffer buffer; BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; gvs.index = index; gvs.callback = callback; gvs.callback_state = callback_state; gvs.strategy = info->strategy; initGinState(&gvs.ginstate, index); /* first time through? */ if (stats == NULL) { /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* and cleanup any pending inserts */ ginInsertCleanup(&gvs.ginstate, true, stats); } /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; gvs.result = stats; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); /* find leaf page */ for (;;) { Page page = BufferGetPage(buffer); IndexTuple itup; LockBuffer(buffer, GIN_SHARE); Assert(!GinPageIsData(page)); if (GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); LockBuffer(buffer, GIN_EXCLUSIVE); if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); continue; /* check it one more */ } break; } Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); blkno = GinGetDownlink(itup); Assert(blkno != InvalidBlockNumber); UnlockReleaseBuffer(buffer); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); } /* right now we found leftmost page in entry's BTree */ for (;;) { Page page = BufferGetPage(buffer); Page resPage; uint32 i; Assert(!GinPageIsData(page)); resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot); blkno = GinPageGetOpaque(page)->rightlink; if (resPage) { START_CRIT_SECTION(); PageRestoreTempPage(resPage, page); MarkBufferDirty(buffer); xlogVacuumPage(gvs.index, buffer); UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); } else { UnlockReleaseBuffer(buffer); } vacuum_delay_point(); for (i = 0; i < nRoot; i++) { ginVacuumPostingTree(&gvs, rootOfPostingTree[i]); vacuum_delay_point(); } if (blkno == InvalidBlockNumber) /* rightmost page */ break; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_EXCLUSIVE); } PG_RETURN_POINTER(gvs.result); }
/* * Write the index tuples contained in *collector into the index's * pending list. * * Function guarantees that all these tuples will be inserted consecutively, * preserving order */ void ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) { Relation index = ginstate->index; Buffer metabuffer; Page metapage; GinMetaPageData *metadata = NULL; XLogRecData rdata[2]; Buffer buffer = InvalidBuffer; Page page = NULL; ginxlogUpdateMeta data; bool separateList = false; bool needCleanup = false; if (collector->ntuples == 0) return; data.node = index->rd_node; data.ntuples = 0; data.newRightlink = data.prevTail = InvalidBlockNumber; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogUpdateMeta); rdata[0].next = NULL; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); metapage = BufferGetPage(metabuffer); if (collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GinListPageSize) { /* * Total size is greater than one page => make sublist */ separateList = true; } else { LockBuffer(metabuffer, GIN_EXCLUSIVE); metadata = GinPageGetMeta(metapage); if (metadata->head == InvalidBlockNumber || collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize) { /* * Pending list is empty or total size is greater than freespace * on tail page => make sublist * * We unlock metabuffer to keep high concurrency */ separateList = true; LockBuffer(metabuffer, GIN_UNLOCK); } } if (separateList) { /* * We should make sublist separately and append it to the tail */ GinMetaPageData sublist; memset(&sublist, 0, sizeof(GinMetaPageData)); makeSublist(index, collector->tuples, collector->ntuples, &sublist); /* * metapage was unlocked, see above */ LockBuffer(metabuffer, GIN_EXCLUSIVE); metadata = GinPageGetMeta(metapage); if (metadata->head == InvalidBlockNumber) { /* * Main list is empty, so just insert sublist as main list */ START_CRIT_SECTION(); metadata->head = sublist.head; metadata->tail = sublist.tail; metadata->tailFreeSize = sublist.tailFreeSize; metadata->nPendingPages = sublist.nPendingPages; metadata->nPendingHeapTuples = sublist.nPendingHeapTuples; } else { /* * Merge lists */ data.prevTail = metadata->tail; data.newRightlink = sublist.head; buffer = ReadBuffer(index, metadata->tail); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); rdata[0].next = rdata + 1; rdata[1].buffer = buffer; rdata[1].buffer_std = true; rdata[1].data = NULL; rdata[1].len = 0; rdata[1].next = NULL; Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber); START_CRIT_SECTION(); GinPageGetOpaque(page)->rightlink = sublist.head; MarkBufferDirty(buffer); metadata->tail = sublist.tail; metadata->tailFreeSize = sublist.tailFreeSize; metadata->nPendingPages += sublist.nPendingPages; metadata->nPendingHeapTuples += sublist.nPendingHeapTuples; } } else { /* * Insert into tail page. Metapage is already locked */ OffsetNumber l, off; int i, tupsize; char *ptr; buffer = ReadBuffer(index, metadata->tail); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); rdata[0].next = rdata + 1; rdata[1].buffer = buffer; rdata[1].buffer_std = true; ptr = rdata[1].data = (char *) palloc(collector->sumsize); rdata[1].len = collector->sumsize; rdata[1].next = NULL; data.ntuples = collector->ntuples; START_CRIT_SECTION(); /* * Increase counter of heap tuples */ Assert(GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples); GinPageGetOpaque(page)->maxoff++; metadata->nPendingHeapTuples++; for (i = 0; i < collector->ntuples; i++) { tupsize = IndexTupleSize(collector->tuples[i]); l = PageAddItem(page, (Item) collector->tuples[i], tupsize, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(index)); memcpy(ptr, collector->tuples[i], tupsize); ptr += tupsize; off++; } Assert((ptr - rdata[1].data) <= collector->sumsize); metadata->tailFreeSize = PageGetExactFreeSpace(page); MarkBufferDirty(buffer); } /* * Write metabuffer, make xlog entry */ MarkBufferDirty(metabuffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; memcpy(&data.metadata, metadata, sizeof(GinMetaPageData)); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata); PageSetLSN(metapage, recptr); PageSetTLI(metapage, ThisTimeLineID); if (buffer != InvalidBuffer) { PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } } if (buffer != InvalidBuffer) UnlockReleaseBuffer(buffer); /* * Force pending list cleanup when it becomes too long. And, * ginInsertCleanup could take significant amount of time, so we prefer to * call it when it can do all the work in a single collection cycle. In * non-vacuum mode, it shouldn't require maintenance_work_mem, so fire it * while pending list is still small enough to fit into work_mem. * * ginInsertCleanup() should not be called inside our CRIT_SECTION. */ if (metadata->nPendingPages * GIN_PAGE_FREESIZE > work_mem * 1024L) needCleanup = true; UnlockReleaseBuffer(metabuffer); END_CRIT_SECTION(); if (needCleanup) ginInsertCleanup(ginstate, false, NULL); }
/* * Write the index tuples contained in *collector into the index's * pending list. * * Function guarantees that all these tuples will be inserted consecutively, * preserving order */ void ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) { Relation index = ginstate->index; Buffer metabuffer; Page metapage; GinMetaPageData *metadata = NULL; Buffer buffer = InvalidBuffer; Page page = NULL; ginxlogUpdateMeta data; bool separateList = false; bool needCleanup = false; int cleanupSize; bool needWal; if (collector->ntuples == 0) return; needWal = RelationNeedsWAL(index); data.node = index->rd_node; data.ntuples = 0; data.newRightlink = data.prevTail = InvalidBlockNumber; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); metapage = BufferGetPage(metabuffer); if (collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GinListPageSize) { /* * Total size is greater than one page => make sublist */ separateList = true; } else { LockBuffer(metabuffer, GIN_EXCLUSIVE); metadata = GinPageGetMeta(metapage); if (metadata->head == InvalidBlockNumber || collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize) { /* * Pending list is empty or total size is greater than freespace * on tail page => make sublist * * We unlock metabuffer to keep high concurrency */ separateList = true; LockBuffer(metabuffer, GIN_UNLOCK); } } if (separateList) { /* * We should make sublist separately and append it to the tail */ GinMetaPageData sublist; memset(&sublist, 0, sizeof(GinMetaPageData)); makeSublist(index, collector->tuples, collector->ntuples, &sublist); if (needWal) XLogBeginInsert(); /* * metapage was unlocked, see above */ LockBuffer(metabuffer, GIN_EXCLUSIVE); metadata = GinPageGetMeta(metapage); if (metadata->head == InvalidBlockNumber) { /* * Main list is empty, so just insert sublist as main list */ START_CRIT_SECTION(); metadata->head = sublist.head; metadata->tail = sublist.tail; metadata->tailFreeSize = sublist.tailFreeSize; metadata->nPendingPages = sublist.nPendingPages; metadata->nPendingHeapTuples = sublist.nPendingHeapTuples; } else { /* * Merge lists */ data.prevTail = metadata->tail; data.newRightlink = sublist.head; buffer = ReadBuffer(index, metadata->tail); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber); START_CRIT_SECTION(); GinPageGetOpaque(page)->rightlink = sublist.head; MarkBufferDirty(buffer); metadata->tail = sublist.tail; metadata->tailFreeSize = sublist.tailFreeSize; metadata->nPendingPages += sublist.nPendingPages; metadata->nPendingHeapTuples += sublist.nPendingHeapTuples; if (needWal) XLogRegisterBuffer(1, buffer, REGBUF_STANDARD); } } else { /* * Insert into tail page. Metapage is already locked */ OffsetNumber l, off; int i, tupsize; char *ptr; char *collectordata; buffer = ReadBuffer(index, metadata->tail); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); collectordata = ptr = (char *) palloc(collector->sumsize); data.ntuples = collector->ntuples; if (needWal) XLogBeginInsert(); START_CRIT_SECTION(); /* * Increase counter of heap tuples */ Assert(GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples); GinPageGetOpaque(page)->maxoff++; metadata->nPendingHeapTuples++; for (i = 0; i < collector->ntuples; i++) { tupsize = IndexTupleSize(collector->tuples[i]); l = PageAddItem(page, (Item) collector->tuples[i], tupsize, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(index)); memcpy(ptr, collector->tuples[i], tupsize); ptr += tupsize; off++; } Assert((ptr - collectordata) <= collector->sumsize); if (needWal) { XLogRegisterBuffer(1, buffer, REGBUF_STANDARD); XLogRegisterBufData(1, collectordata, collector->sumsize); } metadata->tailFreeSize = PageGetExactFreeSpace(page); MarkBufferDirty(buffer); } /* * Set pd_lower just past the end of the metadata. This is essential, * because without doing so, metadata will be lost if xlog.c compresses * the page. (We must do this here because pre-v11 versions of PG did not * set the metapage's pd_lower correctly, so a pg_upgraded index might * contain the wrong value.) */ ((PageHeader) metapage)->pd_lower = ((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage; /* * Write metabuffer, make xlog entry */ MarkBufferDirty(metabuffer); if (needWal) { XLogRecPtr recptr; memcpy(&data.metadata, metadata, sizeof(GinMetaPageData)); XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta)); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE); PageSetLSN(metapage, recptr); if (buffer != InvalidBuffer) { PageSetLSN(page, recptr); } } if (buffer != InvalidBuffer) UnlockReleaseBuffer(buffer); /* * Force pending list cleanup when it becomes too long. And, * ginInsertCleanup could take significant amount of time, so we prefer to * call it when it can do all the work in a single collection cycle. In * non-vacuum mode, it shouldn't require maintenance_work_mem, so fire it * while pending list is still small enough to fit into * gin_pending_list_limit. * * ginInsertCleanup() should not be called inside our CRIT_SECTION. */ cleanupSize = GinGetPendingListCleanupSize(index); if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * 1024L) needCleanup = true; UnlockReleaseBuffer(metabuffer); END_CRIT_SECTION(); /* * Since it could contend with concurrent cleanup process we cleanup * pending list not forcibly. */ if (needCleanup) ginInsertCleanup(ginstate, false, true, false, NULL); }