/* * Step right from current page. * * The next page is locked first, before releasing the current page. This is * crucial to protect from concurrent page deletion (see comment in * ginDeletePage). */ Buffer ginStepRight(Buffer buffer, Relation index, int lockmode) { Buffer nextbuffer; Page page = BufferGetPage(buffer); bool isLeaf = GinPageIsLeaf(page); bool isData = GinPageIsData(page); BlockNumber blkno = GinPageGetOpaque(page)->rightlink; nextbuffer = ReadBuffer(index, blkno); LockBuffer(nextbuffer, lockmode); UnlockReleaseBuffer(buffer); /* Sanity check that the page we stepped to is of similar kind. */ page = BufferGetPage(nextbuffer); if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page)) elog(ERROR, "right sibling of GIN page is of different type"); /* * Given the proper lock sequence above, we should never land on a deleted * page. */ if (GinPageIsDeleted(page)) elog(ERROR, "right sibling of GIN page was deleted"); return nextbuffer; }
static void ginRedoInsert(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record); Buffer buffer; #ifdef NOT_USED BlockNumber leftChildBlkno = InvalidBlockNumber; #endif BlockNumber rightChildBlkno = InvalidBlockNumber; bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0; /* * First clear incomplete-split flag on child page if this finishes a * split. */ if (!isLeaf) { char *payload = XLogRecGetData(record) + sizeof(ginxlogInsert); #ifdef NOT_USED leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload); #endif payload += sizeof(BlockIdData); rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload); payload += sizeof(BlockIdData); ginRedoClearIncompleteSplit(record, 1); } if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); Size len; char *payload = XLogRecGetBlockData(record, 0, &len); /* How to insert the payload is tree-type specific */ if (data->flags & GIN_INSERT_ISDATA) { Assert(GinPageIsData(page)); ginRedoInsertData(buffer, isLeaf, rightChildBlkno, payload); } else { Assert(!GinPageIsData(page)); ginRedoInsertEntry(buffer, isLeaf, rightChildBlkno, payload); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void ginRedoDeletePage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record); Buffer dbuffer; Buffer pbuffer; Buffer lbuffer; Page page; /* * Lock left page first in order to prevent possible deadlock with * ginStepRight(). */ if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(lbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->rightlink = data->rightLink; PageSetLSN(page, lsn); MarkBufferDirty(lbuffer); } if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(dbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->flags = GIN_DELETED; GinPageSetDeleteXid(page, data->deleteXid); PageSetLSN(page, lsn); MarkBufferDirty(dbuffer); } if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(pbuffer); Assert(GinPageIsData(page)); Assert(!GinPageIsLeaf(page)); GinPageDeletePostingItem(page, data->parentOffset); PageSetLSN(page, lsn); MarkBufferDirty(pbuffer); } if (BufferIsValid(lbuffer)) UnlockReleaseBuffer(lbuffer); if (BufferIsValid(pbuffer)) UnlockReleaseBuffer(pbuffer); if (BufferIsValid(dbuffer)) UnlockReleaseBuffer(dbuffer); }
static void ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record) { ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record); Buffer buffer; Page page; /* If we have a full-page image, restore it and we're done */ if (record->xl_info & XLR_BKP_BLOCK(0)) { (void) RestoreBackupBlock(lsn, record, 0, false, false); return; } buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, false); if (!BufferIsValid(buffer)) return; page = (Page) BufferGetPage(buffer); Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (lsn > PageGetLSN(page)) { ginRedoRecompress(page, &xlrec->data); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); }
static bool entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off, GinBtreeEntryInsertData *insertData) { Size releasedsz = 0; Size addedsz; Page page = BufferGetPage(buf); Assert(insertData->entry); Assert(!GinPageIsData(page)); if (insertData->isDelete) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); releasedsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } addedsz = MAXALIGN(IndexTupleSize(insertData->entry)) + sizeof(ItemIdData); if (PageGetFreeSpace(page) + releasedsz >= addedsz) return true; return false; }
/* * Delete tuple on leaf page if tuples was existed and we * should update it, update old child blkno to new right page * if child split is occured */ static BlockNumber entryPreparePage(GinBtree btree, Page page, OffsetNumber off) { BlockNumber ret = InvalidBlockNumber; Assert(btree->entry); Assert(!GinPageIsData(page)); if (btree->isDelete) { Assert(GinPageIsLeaf(page)); PageIndexTupleDelete(page, off); } if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber); ret = btree->rightblkno; } btree->rightblkno = InvalidBlockNumber; return ret; }
/* * Searches correct position for value on leaf page. * Page should be correctly chosen. * Returns true if value found on page. */ static bool entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); OffsetNumber low, high; Assert(GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = PageGetMaxOffsetNumber(page); if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); IndexTuple itup; OffsetNumber attnum; Datum key; GinNullCategory category; int result; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); attnum = gintuple_get_attrnum(btree->ginstate, itup); key = gintuple_get_key(btree->ginstate, itup, &category); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, btree->entryCategory, attnum, key, category); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
/* * returns blkno of leftmost child */ static BlockNumber dataGetLeftMostPage(GinBtree btree, Page page) { PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber); pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber); return PostingItemGetBlockNumber(pitem); }
/* * Searches correct position for value on leaf page. * Page should be corrrectly choosen. * Returns true if value found on page. */ static bool entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer); OffsetNumber low, high; IndexTuple itup; Assert(GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = PageGetMaxOffsetNumber(page); if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); int result; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryValue, gintuple_get_attrnum(btree->ginstate, itup), gin_index_getattr(btree->ginstate, itup)); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
static BlockNumber entryGetLeftMostPage(GinBtree btree, Page page) { IndexTuple itup; Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); return GinGetDownlink(itup); }
/* * Searches correct position for value on leaf page. * Page should be correctly chosen. * Returns true if value found on page. */ static bool dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer); OffsetNumber low, high; int result; Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = GinPageGetOpaque(page)->maxoff; if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); result = ginCompareItemPointers(&btree->itemptr, GinDataPageGetItemPointer(page, mid)); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
/* * In case of previous split update old child blkno to * new right page * item pointer never deletes! */ static BlockNumber dataPrepareData(GinBtree btree, Page page, OffsetNumber off) { BlockNumber ret = InvalidBlockNumber; Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off); PostingItemSetBlockNumber(pitem, btree->rightblkno); ret = btree->rightblkno; } btree->rightblkno = InvalidBlockNumber; return ret; }
/* * Delete tuple on leaf page if tuples existed and we * should update it, update old child blkno to new right page * if child split occurred */ static void entryPreparePage(GinBtree btree, Page page, OffsetNumber off, GinBtreeEntryInsertData *insertData, BlockNumber updateblkno) { Assert(insertData->entry); Assert(!GinPageIsData(page)); if (insertData->isDelete) { Assert(GinPageIsLeaf(page)); PageIndexTupleDelete(page, off); } if (!GinPageIsLeaf(page) && updateblkno != InvalidBlockNumber) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); GinSetDownlink(itup, updateblkno); } }
/* * Finds links to blkno on non-leaf page, returns * offset of PostingItem */ static OffsetNumber dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff; PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); /* if page isn't changed, we return storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { pitem = (PostingItem *) GinDataPageGetItem(page, storedOff); if (PostingItemGetBlockNumber(pitem) == blkno) return storedOff; /* * we hope, that needed pointer goes to right. It's true if there * wasn't a deletion */ for (i = storedOff + 1; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } maxoff = storedOff - 1; } /* last chance */ for (i = FirstOffsetNumber; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } return InvalidOffsetNumber; }
static bool entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off) { Size itupsz = 0; Page page = BufferGetPage(buf); Assert(btree->entry); Assert(!GinPageIsData(page)); if (btree->isDelete) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData)) return true; return false; }
static OffsetNumber entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page); IndexTuple itup; Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); /* if page isn't changed, we returns storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff)); if (GinGetDownlink(itup) == blkno) return storedOff; /* * we hope, that needed pointer goes to right. It's true if there * wasn't a deletion */ for (i = storedOff + 1; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); if (GinGetDownlink(itup) == blkno) return i; } maxoff = storedOff - 1; } /* last chance */ for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); if (GinGetDownlink(itup) == blkno) return i; } return InvalidOffsetNumber; }
/* * checks space to install new value, * item pointer never deletes! */ static bool dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off) { Page page = BufferGetPage(buf); Assert(GinPageIsData(page)); Assert(!btree->isDelete); if (GinPageIsLeaf(page)) { if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff) { if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page)) return true; return false; }
/* * Create a WAL record for vacuuming entry tree leaf page. */ static void xlogVacuumPage(Relation index, Buffer buffer) { Page page = BufferGetPage(buffer); XLogRecPtr recptr; /* This is only used for entry tree leaf pages. */ Assert(!GinPageIsData(page)); Assert(GinPageIsLeaf(page)); if (!RelationNeedsWAL(index)) return; /* * Always create a full image, we don't track the changes on the page at * any more fine-grained level. This could obviously be improved... */ XLogBeginInsert(); XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE); PageSetLSN(page, recptr); }
static void ginRedoVacuumDataLeafPage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); Size len; ginxlogVacuumDataLeafPage *xlrec; xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len); Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); ginRedoRecompress(page, &xlrec->data); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static bool ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer) { Buffer buffer; Page page; bool hasVoidPage = FALSE; MemoryContext oldCxt; buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); /* * We should be sure that we don't concurrent with inserts, insert process * never release root page until end (but it can unlock it and lock * again). New scan can't start but previously started ones work * concurrently. */ if (isRoot) LockBufferForCleanup(buffer); else LockBuffer(buffer, GIN_EXCLUSIVE); Assert(GinPageIsData(page)); if (GinPageIsLeaf(page)) { oldCxt = MemoryContextSwitchTo(gvs->tmpCxt); ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs); MemoryContextSwitchTo(oldCxt); MemoryContextReset(gvs->tmpCxt); /* if root is a leaf page, we don't desire further processing */ if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page)) hasVoidPage = TRUE; } else { OffsetNumber i; bool isChildHasVoid = FALSE; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = GinDataPageGetPostingItem(page, i); if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL)) isChildHasVoid = TRUE; } if (isChildHasVoid) hasVoidPage = TRUE; } /* * if we have root and there are empty pages in tree, then we don't * release lock to go further processing and guarantee that tree is unused */ if (!(isRoot && hasVoidPage)) { UnlockReleaseBuffer(buffer); } else { Assert(rootBuffer); *rootBuffer = buffer; } return hasVoidPage; }
static void ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) { ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record); Buffer buffer; Page page; char *payload; BlockNumber leftChildBlkno = InvalidBlockNumber; BlockNumber rightChildBlkno = InvalidBlockNumber; bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0; payload = XLogRecGetData(record) + sizeof(ginxlogInsert); /* * First clear incomplete-split flag on child page if this finishes a * split. */ if (!isLeaf) { leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload); payload += sizeof(BlockIdData); rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload); payload += sizeof(BlockIdData); if (record->xl_info & XLR_BKP_BLOCK(0)) (void) RestoreBackupBlock(lsn, record, 0, false, false); else ginRedoClearIncompleteSplit(lsn, data->node, leftChildBlkno); } /* If we have a full-page image, restore it and we're done */ if (record->xl_info & XLR_BKP_BLOCK(isLeaf ? 0 : 1)) { (void) RestoreBackupBlock(lsn, record, isLeaf ? 0 : 1, false, false); return; } buffer = XLogReadBuffer(data->node, data->blkno, false); if (!BufferIsValid(buffer)) return; /* page was deleted, nothing to do */ page = (Page) BufferGetPage(buffer); if (lsn > PageGetLSN(page)) { /* How to insert the payload is tree-type specific */ if (data->flags & GIN_INSERT_ISDATA) { Assert(GinPageIsData(page)); ginRedoInsertData(buffer, isLeaf, rightChildBlkno, payload); } else { Assert(!GinPageIsData(page)); ginRedoInsertEntry(buffer, isLeaf, rightChildBlkno, payload); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); }
/* * scans posting tree and deletes empty pages */ static bool ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) { DataPageDeleteStack *me; Buffer buffer; Page page; bool meDelete = FALSE; if (isRoot) { me = parent; } else { if (!parent->child) { me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack)); me->parent = parent; parent->child = me; me->leftBlkno = InvalidBlockNumber; } else me = parent->child; } buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page)) { OffsetNumber i; me->blkno = blkno; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) i--; } } if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page))) { /* we never delete right most branch */ Assert(!isRoot); if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); meDelete = TRUE; } } } ReleaseBuffer(buffer); if (!meDelete) me->leftBlkno = blkno; return meDelete; }
Datum ginvacuumcleanup(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); Relation index = info->index; bool needLock; BlockNumber npages, blkno; BlockNumber totFreePages; GinState ginstate; GinStatsData idxStat; /* * In an autovacuum analyze, we want to clean up pending insertions. * Otherwise, an ANALYZE-only call is a no-op. */ if (info->analyze_only) { if (IsAutoVacuumWorkerProcess()) { initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } PG_RETURN_POINTER(stats); } /* * Set up all-zero stats and cleanup pending inserts if ginbulkdelete * wasn't called */ if (stats == NULL) { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } memset(&idxStat, 0, sizeof(idxStat)); /* * XXX we always report the heap tuple count as the number of index * entries. This is bogus if the index is partial, but it's real hard to * tell how many distinct heap entries are referenced by a GIN index. */ stats->num_index_tuples = info->num_heap_tuples; stats->estimated_count = info->estimated_count; /* * Need lock unless it's local to this backend. */ needLock = !RELATION_IS_LOCAL(index); if (needLock) LockRelationForExtension(index, ExclusiveLock); npages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); totFreePages = 0; for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); if (GinPageIsDeleted(page)) { Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); totFreePages++; } else if (GinPageIsData(page)) { idxStat.nDataPages++; } else if (!GinPageIsList(page)) { idxStat.nEntryPages++; if (GinPageIsLeaf(page)) idxStat.nEntries += PageGetMaxOffsetNumber(page); } UnlockReleaseBuffer(buffer); } /* Update the metapage with accurate page and entry counts */ idxStat.nTotalPages = npages; ginUpdateStats(info->index, &idxStat); /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); stats->pages_free = totFreePages; if (needLock) LockRelationForExtension(index, ExclusiveLock); stats->num_pages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); PG_RETURN_POINTER(stats); }
static void ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) { ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record); Buffer dbuffer; Buffer pbuffer; Buffer lbuffer; Page page; if (record->xl_info & XLR_BKP_BLOCK(0)) dbuffer = RestoreBackupBlock(lsn, record, 0, false, true); else { dbuffer = XLogReadBuffer(data->node, data->blkno, false); if (BufferIsValid(dbuffer)) { page = BufferGetPage(dbuffer); if (lsn > PageGetLSN(page)) { Assert(GinPageIsData(page)); GinPageGetOpaque(page)->flags = GIN_DELETED; PageSetLSN(page, lsn); MarkBufferDirty(dbuffer); } } } if (record->xl_info & XLR_BKP_BLOCK(1)) pbuffer = RestoreBackupBlock(lsn, record, 1, false, true); else { pbuffer = XLogReadBuffer(data->node, data->parentBlkno, false); if (BufferIsValid(pbuffer)) { page = BufferGetPage(pbuffer); if (lsn > PageGetLSN(page)) { Assert(GinPageIsData(page)); Assert(!GinPageIsLeaf(page)); GinPageDeletePostingItem(page, data->parentOffset); PageSetLSN(page, lsn); MarkBufferDirty(pbuffer); } } } if (record->xl_info & XLR_BKP_BLOCK(2)) (void) RestoreBackupBlock(lsn, record, 2, false, false); else if (data->leftBlkno != InvalidBlockNumber) { lbuffer = XLogReadBuffer(data->node, data->leftBlkno, false); if (BufferIsValid(lbuffer)) { page = BufferGetPage(lbuffer); if (lsn > PageGetLSN(page)) { Assert(GinPageIsData(page)); GinPageGetOpaque(page)->rightlink = data->rightLink; PageSetLSN(page, lsn); MarkBufferDirty(lbuffer); } UnlockReleaseBuffer(lbuffer); } } if (BufferIsValid(pbuffer)) UnlockReleaseBuffer(pbuffer); if (BufferIsValid(dbuffer)) UnlockReleaseBuffer(dbuffer); }
static bool ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer) { Buffer buffer; Page page; bool hasVoidPage = FALSE; buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); /* * We should be sure that we don't concurrent with inserts, insert process * never release root page until end (but it can unlock it and lock * again). New scan can't start but previously started ones work * concurrently. */ if (isRoot) LockBufferForCleanup(buffer); else LockBuffer(buffer, GIN_EXCLUSIVE); Assert(GinPageIsData(page)); if (GinPageIsLeaf(page)) { OffsetNumber newMaxOff, oldMaxOff = GinPageGetOpaque(page)->maxoff; ItemPointerData *cleaned = NULL; newMaxOff = ginVacuumPostingList(gvs, (ItemPointer) GinDataPageGetData(page), oldMaxOff, &cleaned); /* saves changes about deleted tuple ... */ if (oldMaxOff != newMaxOff) { START_CRIT_SECTION(); if (newMaxOff > 0) memcpy(GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff); pfree(cleaned); GinPageGetOpaque(page)->maxoff = newMaxOff; MarkBufferDirty(buffer); xlogVacuumPage(gvs->index, buffer); END_CRIT_SECTION(); /* if root is a leaf page, we don't desire further processing */ if (!isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) hasVoidPage = TRUE; } } else { OffsetNumber i; bool isChildHasVoid = FALSE; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL)) isChildHasVoid = TRUE; } if (isChildHasVoid) hasVoidPage = TRUE; } /* * if we have root and theres void pages in tree, then we don't release * lock to go further processing and guarantee that tree is unused */ if (!(isRoot && hasVoidPage)) { UnlockReleaseBuffer(buffer); } else { Assert(rootBuffer); *rootBuffer = buffer; } return hasVoidPage; }
/* * Find correct tuple in non-leaf page. It supposed that * page correctly chosen and searching value SHOULD be on page */ static BlockNumber entryLocateEntry(GinBtree btree, GinBtreeStack *stack) { OffsetNumber low, high, maxoff; IndexTuple itup = NULL; int result; Page page = BufferGetPage(stack->buffer); Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; stack->predictNumber *= PageGetMaxOffsetNumber(page); return btree->getLeftMostChild(btree, page); } low = FirstOffsetNumber; maxoff = high = PageGetMaxOffsetNumber(page); Assert(high >= low); high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); if (mid == maxoff && GinPageRightMost(page)) { /* Right infinity */ result = -1; } else { OffsetNumber attnum; Datum key; GinNullCategory category; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); attnum = gintuple_get_attrnum(btree->ginstate, itup); key = gintuple_get_key(btree->ginstate, itup, &category); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, btree->entryCategory, attnum, key, category); } if (result == 0) { stack->off = mid; Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); return GinGetDownlink(itup); } else if (result > 0) low = mid + 1; else high = mid; } Assert(high >= FirstOffsetNumber && high <= maxoff); stack->off = high; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); return GinGetDownlink(itup); }
Datum ginbulkdelete(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2); void *callback_state = (void *) PG_GETARG_POINTER(3); Relation index = info->index; BlockNumber blkno = GIN_ROOT_BLKNO; GinVacuumState gvs; Buffer buffer; BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; gvs.index = index; gvs.callback = callback; gvs.callback_state = callback_state; gvs.strategy = info->strategy; initGinState(&gvs.ginstate, index); /* first time through? */ if (stats == NULL) { /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* and cleanup any pending inserts */ ginInsertCleanup(&gvs.ginstate, true, stats); } /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; gvs.result = stats; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); /* find leaf page */ for (;;) { Page page = BufferGetPage(buffer); IndexTuple itup; LockBuffer(buffer, GIN_SHARE); Assert(!GinPageIsData(page)); if (GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); LockBuffer(buffer, GIN_EXCLUSIVE); if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); continue; /* check it one more */ } break; } Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); blkno = GinGetDownlink(itup); Assert(blkno != InvalidBlockNumber); UnlockReleaseBuffer(buffer); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); } /* right now we found leftmost page in entry's BTree */ for (;;) { Page page = BufferGetPage(buffer); Page resPage; uint32 i; Assert(!GinPageIsData(page)); resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot); blkno = GinPageGetOpaque(page)->rightlink; if (resPage) { START_CRIT_SECTION(); PageRestoreTempPage(resPage, page); MarkBufferDirty(buffer); xlogVacuumPage(gvs.index, buffer); UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); } else { UnlockReleaseBuffer(buffer); } vacuum_delay_point(); for (i = 0; i < nRoot; i++) { ginVacuumPostingTree(&gvs, rootOfPostingTree[i]); vacuum_delay_point(); } if (blkno == InvalidBlockNumber) /* rightmost page */ break; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_EXCLUSIVE); } PG_RETURN_POINTER(gvs.result); }
/* * Find correct PostingItem in non-leaf page. It supposed that page * correctly chosen and searching value SHOULD be on page */ static BlockNumber dataLocateItem(GinBtree btree, GinBtreeStack *stack) { OffsetNumber low, high, maxoff; PostingItem *pitem = NULL; int result; Page page = BufferGetPage(stack->buffer); Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; stack->predictNumber *= GinPageGetOpaque(page)->maxoff; return btree->getLeftMostPage(btree, page); } low = FirstOffsetNumber; maxoff = high = GinPageGetOpaque(page)->maxoff; Assert(high >= low); high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); pitem = (PostingItem *) GinDataPageGetItem(page, mid); if (mid == maxoff) { /* * Right infinity, page already correctly chosen with a help of * dataIsMoveRight */ result = -1; } else { pitem = (PostingItem *) GinDataPageGetItem(page, mid); result = ginCompareItemPointers(btree->items + btree->curitem, &(pitem->key)); } if (result == 0) { stack->off = mid; return PostingItemGetBlockNumber(pitem); } else if (result > 0) low = mid + 1; else high = mid; } Assert(high >= FirstOffsetNumber && high <= maxoff); stack->off = high; pitem = (PostingItem *) GinDataPageGetItem(page, high); return PostingItemGetBlockNumber(pitem); }
/* * fills WAL record for vacuum leaf page */ static void xlogVacuumPage(Relation index, Buffer buffer) { Page page = BufferGetPage(buffer); XLogRecPtr recptr; XLogRecData rdata[3]; ginxlogVacuumPage data; char *backup; char itups[BLCKSZ]; uint32 len = 0; Assert(GinPageIsLeaf(page)); if (!RelationNeedsWAL(index)) return; data.node = index->rd_node; data.blkno = BufferGetBlockNumber(buffer); if (GinPageIsData(page)) { backup = GinDataPageGetData(page); data.nitem = GinPageGetOpaque(page)->maxoff; if (data.nitem) len = MAXALIGN(sizeof(ItemPointerData) * data.nitem); } else { char *ptr; OffsetNumber i; ptr = backup = itups; for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); memcpy(ptr, itup, IndexTupleSize(itup)); ptr += MAXALIGN(IndexTupleSize(itup)); } data.nitem = PageGetMaxOffsetNumber(page); len = ptr - backup; } rdata[0].buffer = buffer; rdata[0].buffer_std = (GinPageIsData(page)) ? FALSE : TRUE; rdata[0].len = 0; rdata[0].data = NULL; rdata[0].next = rdata + 1; rdata[1].buffer = InvalidBuffer; rdata[1].len = sizeof(ginxlogVacuumPage); rdata[1].data = (char *) &data; if (len == 0) { rdata[1].next = NULL; } else { rdata[1].next = rdata + 2; rdata[2].buffer = InvalidBuffer; rdata[2].len = len; rdata[2].data = backup; rdata[2].next = NULL; } recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); }
/* * Insert a new item to a page. * * Returns true if the insertion was finished. On false, the page was split and * the parent needs to be updated. (a root split returns true as it doesn't * need any further action by the caller to complete) * * When inserting a downlink to a internal page, 'childbuf' contains the * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared * atomically with the insert. Also, the existing item at the given location * is updated to point to 'updateblkno'. * * stack->buffer is locked on entry, and is kept locked. */ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, void *insertdata, BlockNumber updateblkno, Buffer childbuf, GinStatsData *buildStats) { Page page = BufferGetPage(stack->buffer); XLogRecData *payloadrdata; bool fit; uint16 xlflags = 0; Page childpage = NULL; if (GinPageIsData(page)) xlflags |= GIN_INSERT_ISDATA; if (GinPageIsLeaf(page)) { xlflags |= GIN_INSERT_ISLEAF; Assert(!BufferIsValid(childbuf)); Assert(updateblkno == InvalidBlockNumber); } else { Assert(BufferIsValid(childbuf)); Assert(updateblkno != InvalidBlockNumber); childpage = BufferGetPage(childbuf); } /* * Try to put the incoming tuple on the page. If it doesn't fit, * placeToPage method will return false and leave the page unmodified, and * we'll have to split the page. */ START_CRIT_SECTION(); fit = btree->placeToPage(btree, stack->buffer, stack->off, insertdata, updateblkno, &payloadrdata); if (fit) { MarkBufferDirty(stack->buffer); /* An insert to an internal page finishes the split of the child. */ if (childbuf != InvalidBuffer) { GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; MarkBufferDirty(childbuf); } if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; XLogRecData rdata[3]; ginxlogInsert xlrec; BlockIdData childblknos[2]; xlrec.node = btree->index->rd_node; xlrec.blkno = BufferGetBlockNumber(stack->buffer); xlrec.offset = stack->off; xlrec.flags = xlflags; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(ginxlogInsert); /* * Log information about child if this was an insertion of a * downlink. */ if (childbuf != InvalidBuffer) { rdata[0].next = &rdata[1]; BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf)); BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink); rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) childblknos; rdata[1].len = sizeof(BlockIdData) * 2; rdata[1].next = &rdata[2]; rdata[2].buffer = childbuf; rdata[2].buffer_std = false; rdata[2].data = NULL; rdata[2].len = 0; rdata[2].next = payloadrdata; } else rdata[0].next = payloadrdata; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); PageSetLSN(page, recptr); if (childbuf != InvalidBuffer) PageSetLSN(childpage, recptr); } END_CRIT_SECTION(); return true; } else { /* Didn't fit, have to split */ Buffer rbuffer; Page newlpage; BlockNumber savedRightLink; Page rpage; XLogRecData rdata[2]; ginxlogSplit data; Buffer lbuffer = InvalidBuffer; Page newrootpg = NULL; END_CRIT_SECTION(); rbuffer = GinNewBuffer(btree->index); /* During index build, count the new page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } savedRightLink = GinPageGetOpaque(page)->rightlink; /* * newlpage is a pointer to memory page, it is not associated with a * buffer. stack->buffer is not touched yet. */ newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, insertdata, updateblkno, &payloadrdata); data.node = btree->index->rd_node; data.rblkno = BufferGetBlockNumber(rbuffer); data.flags = xlflags; if (childbuf != InvalidBuffer) { Page childpage = BufferGetPage(childbuf); GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; data.leftChildBlkno = BufferGetBlockNumber(childbuf); data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink; } else data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); if (childbuf != InvalidBuffer) { rdata[0].next = &rdata[1]; rdata[1].buffer = childbuf; rdata[1].buffer_std = false; rdata[1].data = NULL; rdata[1].len = 0; rdata[1].next = payloadrdata; } else rdata[0].next = payloadrdata; rpage = BufferGetPage(rbuffer); if (stack->parent == NULL) { /* * split root, so we need to allocate new left page and place * pointer on root to left and right page */ lbuffer = GinNewBuffer(btree->index); /* During index build, count the newly-added root page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } /* * root never has a right-link, so we borrow the rrlink field to * store the root block number. */ data.rrlink = BufferGetBlockNumber(stack->buffer); data.lblkno = BufferGetBlockNumber(lbuffer); data.flags |= GIN_SPLIT_ROOT; GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); /* * Construct a new root page containing downlinks to the new left * and right pages. (do this in a temporary copy first rather * than overwriting the original page directly, so that we can still * abort gracefully if this fails.) */ newrootpg = PageGetTempPage(rpage); GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF, BLCKSZ); btree->fillRoot(btree, newrootpg, BufferGetBlockNumber(lbuffer), newlpage, BufferGetBlockNumber(rbuffer), rpage); } else { /* split non-root page */ data.rrlink = savedRightLink; data.lblkno = BufferGetBlockNumber(stack->buffer); GinPageGetOpaque(rpage)->rightlink = savedRightLink; GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); } /* * Ok, we have the new contents of the left page in a temporary copy * now (newlpage), and the newly-allocated right block has been filled * in. The original page is still unchanged. * * If this is a root split, we also have a temporary page containing * the new contents of the root. Copy the new left page to a * newly-allocated block, and initialize the (original) root page the * new copy. Otherwise, copy over the temporary copy of the new left * page over the old left page. */ START_CRIT_SECTION(); MarkBufferDirty(rbuffer); if (stack->parent == NULL) { PageRestoreTempPage(newlpage, BufferGetPage(lbuffer)); MarkBufferDirty(lbuffer); newlpage = newrootpg; } PageRestoreTempPage(newlpage, BufferGetPage(stack->buffer)); MarkBufferDirty(stack->buffer); /* write WAL record */ if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); PageSetLSN(BufferGetPage(stack->buffer), recptr); PageSetLSN(rpage, recptr); if (stack->parent == NULL) PageSetLSN(BufferGetPage(lbuffer), recptr); } END_CRIT_SECTION(); /* * We can release the lock on the right page now, but keep the * original buffer locked. */ UnlockReleaseBuffer(rbuffer); if (stack->parent == NULL) UnlockReleaseBuffer(lbuffer); /* * If we split the root, we're done. Otherwise the split is not * complete until the downlink for the new page has been inserted to * the parent. */ if (stack->parent == NULL) return true; else return false; } }