/* * Delete tuple on leaf page if tuples was existed and we * should update it, update old child blkno to new right page * if child split is occured */ static BlockNumber entryPreparePage(GinBtree btree, Page page, OffsetNumber off) { BlockNumber ret = InvalidBlockNumber; Assert(btree->entry); Assert(!GinPageIsData(page)); if (btree->isDelete) { Assert(GinPageIsLeaf(page)); PageIndexTupleDelete(page, off); } if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber); ret = btree->rightblkno; } btree->rightblkno = InvalidBlockNumber; return ret; }
/* * Locks buffer by needed method for search. */ static int ginTraverseLock(Buffer buffer, bool searchMode) { Page page; int access = GIN_SHARE; LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); if (GinPageIsLeaf(page)) { if (searchMode == FALSE) { /* we should relock our page */ LockBuffer(buffer, GIN_UNLOCK); LockBuffer(buffer, GIN_EXCLUSIVE); /* But root can become non-leaf during relock */ if (!GinPageIsLeaf(page)) { /* restore old lock type (very rare) */ LockBuffer(buffer, GIN_UNLOCK); LockBuffer(buffer, GIN_SHARE); } else access = GIN_EXCLUSIVE; } } return access; }
/* * Step right from current page. * * The next page is locked first, before releasing the current page. This is * crucial to protect from concurrent page deletion (see comment in * ginDeletePage). */ Buffer ginStepRight(Buffer buffer, Relation index, int lockmode) { Buffer nextbuffer; Page page = BufferGetPage(buffer); bool isLeaf = GinPageIsLeaf(page); bool isData = GinPageIsData(page); BlockNumber blkno = GinPageGetOpaque(page)->rightlink; nextbuffer = ReadBuffer(index, blkno); LockBuffer(nextbuffer, lockmode); UnlockReleaseBuffer(buffer); /* Sanity check that the page we stepped to is of similar kind. */ page = BufferGetPage(nextbuffer); if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page)) elog(ERROR, "right sibling of GIN page is of different type"); /* * Given the proper lock sequence above, we should never land on a deleted * page. */ if (GinPageIsDeleted(page)) elog(ERROR, "right sibling of GIN page was deleted"); return nextbuffer; }
static void ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata) { Page page = BufferGetPage(buffer); if (isLeaf) { ginxlogRecompressDataLeaf *data = (ginxlogRecompressDataLeaf *) rdata; Assert(GinPageIsLeaf(page)); ginRedoRecompress(page, data); } else { ginxlogInsertDataInternal *data = (ginxlogInsertDataInternal *) rdata; PostingItem *oldpitem; Assert(!GinPageIsLeaf(page)); /* update link to right page after split */ oldpitem = GinDataPageGetPostingItem(page, data->offset); PostingItemSetBlockNumber(oldpitem, rightblkno); GinDataPageAddPostingItem(page, &data->newitem, data->offset); } }
/* * add ItemPointer to a leaf page. */ void GinDataPageAddItemPointer(Page page, ItemPointer data, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; char *ptr; Assert(ItemPointerIsValid(data)); Assert(GinPageIsLeaf(page)); if (offset == InvalidOffsetNumber) { ptr = (char *) GinDataPageGetItemPointer(page, maxoff + 1); } else { ptr = (char *) GinDataPageGetItemPointer(page, offset); if (maxoff + 1 - offset != 0) memmove(ptr + sizeof(ItemPointerData), ptr, (maxoff - offset + 1) * sizeof(ItemPointerData)); } memcpy(ptr, data, sizeof(ItemPointerData)); GinPageGetOpaque(page)->maxoff++; }
/* * add PostingItem to a non-leaf page. */ void GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; char *ptr; Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber); Assert(!GinPageIsLeaf(page)); if (offset == InvalidOffsetNumber) { ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1); } else { ptr = (char *) GinDataPageGetPostingItem(page, offset); if (maxoff + 1 - offset != 0) memmove(ptr + sizeof(PostingItem), ptr, (maxoff - offset + 1) * sizeof(PostingItem)); } memcpy(ptr, data, sizeof(PostingItem)); GinPageGetOpaque(page)->maxoff++; }
/* * Form a non-leaf entry tuple by copying the key data from the given tuple, * which can be either a leaf or non-leaf entry tuple. * * Any posting list in the source tuple is not copied. The specified child * block number is inserted into t_tid. */ static IndexTuple GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk) { IndexTuple nitup; if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) { /* Tuple contains a posting list, just copy stuff before that */ uint32 origsize = GinGetPostingOffset(itup); origsize = MAXALIGN(origsize); nitup = (IndexTuple) palloc(origsize); memcpy(nitup, itup, origsize); /* ... be sure to fix the size header field ... */ nitup->t_info &= ~INDEX_SIZE_MASK; nitup->t_info |= origsize; } else { /* Copy the tuple as-is */ nitup = (IndexTuple) palloc(IndexTupleSize(itup)); memcpy(nitup, itup, IndexTupleSize(itup)); } /* Now insert the correct downlink */ GinSetDownlink(nitup, childblk); return nitup; }
static void ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record) { ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record); Buffer buffer; Page page; /* If we have a full-page image, restore it and we're done */ if (record->xl_info & XLR_BKP_BLOCK(0)) { (void) RestoreBackupBlock(lsn, record, 0, false, false); return; } buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, false); if (!BufferIsValid(buffer)) return; page = (Page) BufferGetPage(buffer); Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (lsn > PageGetLSN(page)) { ginRedoRecompress(page, &xlrec->data); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); }
/* * Searches correct position for value on leaf page. * Page should be correctly chosen. * Returns true if value found on page. */ static bool entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); OffsetNumber low, high; Assert(GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = PageGetMaxOffsetNumber(page); if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); IndexTuple itup; OffsetNumber attnum; Datum key; GinNullCategory category; int result; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); attnum = gintuple_get_attrnum(btree->ginstate, itup); key = gintuple_get_key(btree->ginstate, itup, &category); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, btree->entryCategory, attnum, key, category); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
/* * Place tuple on page and fills WAL record */ static void entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { Page page = BufferGetPage(buf); static XLogRecData rdata[3]; OffsetNumber placed; static ginxlogInsert data; int cnt = 0; *prdata = rdata; data.updateBlkno = entryPreparePage(btree, page, off); placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false); if (placed != off) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); data.node = btree->index->rd_node; data.blkno = BufferGetBlockNumber(buf); data.offset = off; data.nitem = 1; data.isDelete = btree->isDelete; data.isData = false; data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; /* * Prevent full page write if child's split occurs. That is needed to * remove incomplete splits while replaying WAL * * data.updateBlkno contains new block number (of newly created right * page) for recently splited page. */ if (data.updateBlkno == InvalidBlockNumber) { rdata[0].buffer = buf; rdata[0].buffer_std = TRUE; rdata[0].data = NULL; rdata[0].len = 0; rdata[0].next = &rdata[1]; cnt++; } rdata[cnt].buffer = InvalidBuffer; rdata[cnt].data = (char *) &data; rdata[cnt].len = sizeof(ginxlogInsert); rdata[cnt].next = &rdata[cnt + 1]; cnt++; rdata[cnt].buffer = InvalidBuffer; rdata[cnt].data = (char *) btree->entry; rdata[cnt].len = IndexTupleSize(btree->entry); rdata[cnt].next = NULL; btree->entry = NULL; }
/* * Delete tuple on leaf page if tuples existed and we * should update it, update old child blkno to new right page * if child split occurred */ static void entryPreparePage(GinBtree btree, Page page, OffsetNumber off, GinBtreeEntryInsertData *insertData, BlockNumber updateblkno) { Assert(insertData->entry); Assert(!GinPageIsData(page)); if (insertData->isDelete) { Assert(GinPageIsLeaf(page)); PageIndexTupleDelete(page, off); } if (!GinPageIsLeaf(page) && updateblkno != InvalidBlockNumber) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); GinSetDownlink(itup, updateblkno); } }
/* * returns blkno of leftmost child */ static BlockNumber dataGetLeftMostPage(GinBtree btree, Page page) { PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber); pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber); return PostingItemGetBlockNumber(pitem); }
/* * Searches correct position for value on leaf page. * Page should be corrrectly choosen. * Returns true if value found on page. */ static bool entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer); OffsetNumber low, high; IndexTuple itup; Assert(GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = PageGetMaxOffsetNumber(page); if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); int result; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryValue, gintuple_get_attrnum(btree->ginstate, itup), gin_index_getattr(btree->ginstate, itup)); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
static BlockNumber entryGetLeftMostPage(GinBtree btree, Page page) { IndexTuple itup; Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); return GinGetDownlink(itup); }
static void ginRedoDeletePage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record); Buffer dbuffer; Buffer pbuffer; Buffer lbuffer; Page page; /* * Lock left page first in order to prevent possible deadlock with * ginStepRight(). */ if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(lbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->rightlink = data->rightLink; PageSetLSN(page, lsn); MarkBufferDirty(lbuffer); } if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(dbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->flags = GIN_DELETED; GinPageSetDeleteXid(page, data->deleteXid); PageSetLSN(page, lsn); MarkBufferDirty(dbuffer); } if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(pbuffer); Assert(GinPageIsData(page)); Assert(!GinPageIsLeaf(page)); GinPageDeletePostingItem(page, data->parentOffset); PageSetLSN(page, lsn); MarkBufferDirty(pbuffer); } if (BufferIsValid(lbuffer)) UnlockReleaseBuffer(lbuffer); if (BufferIsValid(pbuffer)) UnlockReleaseBuffer(pbuffer); if (BufferIsValid(dbuffer)) UnlockReleaseBuffer(dbuffer); }
/* * Deletes posting item from non-leaf page */ void GinPageDeletePostingItem(Page page, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; Assert(!GinPageIsLeaf(page)); Assert(offset >= FirstOffsetNumber && offset <= maxoff); if (offset != maxoff) memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1), sizeof(PostingItem) * (maxoff - offset)); GinPageGetOpaque(page)->maxoff--; }
static void ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata) { Page page = BufferGetPage(buffer); ginxlogInsertEntry *data = (ginxlogInsertEntry *) rdata; OffsetNumber offset = data->offset; IndexTuple itup; if (rightblkno != InvalidBlockNumber) { /* update link to right page after split */ Assert(!GinPageIsLeaf(page)); Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page)); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offset)); GinSetDownlink(itup, rightblkno); } if (data->isDelete) { Assert(GinPageIsLeaf(page)); Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page)); PageIndexTupleDelete(page, offset); } itup = &data->tuple; if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), offset, false, false) == InvalidOffsetNumber) { RelFileNode node; ForkNumber forknum; BlockNumber blknum; BufferGetTag(buffer, &node, &forknum, &blknum); elog(ERROR, "failed to add item to index page in %u/%u/%u", node.spcNode, node.dbNode, node.relNode); } }
/* * Searches correct position for value on leaf page. * Page should be correctly chosen. * Returns true if value found on page. */ static bool dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer); OffsetNumber low, high; int result; Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = GinPageGetOpaque(page)->maxoff; if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); result = ginCompareItemPointers(&btree->itemptr, GinDataPageGetItemPointer(page, mid)); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
/* * In case of previous split update old child blkno to * new right page * item pointer never deletes! */ static BlockNumber dataPrepareData(GinBtree btree, Page page, OffsetNumber off) { BlockNumber ret = InvalidBlockNumber; Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off); PostingItemSetBlockNumber(pitem, btree->rightblkno); ret = btree->rightblkno; } btree->rightblkno = InvalidBlockNumber; return ret; }
/* * Returns new tuple with copied value from source tuple. * New tuple will not store posting list */ static IndexTuple copyIndexTuple(IndexTuple itup, Page page) { IndexTuple nitup; if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) { nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup))); memcpy(nitup, itup, GinGetOrigSizePosting(itup)); nitup->t_info &= ~INDEX_SIZE_MASK; nitup->t_info |= GinGetOrigSizePosting(itup); } else { nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup))); memcpy(nitup, itup, IndexTupleSize(itup)); } return nitup; }
/* * Finds links to blkno on non-leaf page, returns * offset of PostingItem */ static OffsetNumber dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff; PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); /* if page isn't changed, we return storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { pitem = (PostingItem *) GinDataPageGetItem(page, storedOff); if (PostingItemGetBlockNumber(pitem) == blkno) return storedOff; /* * we hope, that needed pointer goes to right. It's true if there * wasn't a deletion */ for (i = storedOff + 1; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } maxoff = storedOff - 1; } /* last chance */ for (i = FirstOffsetNumber; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } return InvalidOffsetNumber; }
static OffsetNumber entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page); IndexTuple itup; Assert(!GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); /* if page isn't changed, we returns storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff)); if (GinGetDownlink(itup) == blkno) return storedOff; /* * we hope, that needed pointer goes to right. It's true if there * wasn't a deletion */ for (i = storedOff + 1; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); if (GinGetDownlink(itup) == blkno) return i; } maxoff = storedOff - 1; } /* last chance */ for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); if (GinGetDownlink(itup) == blkno) return i; } return InvalidOffsetNumber; }
static void ginRedoSplitData(Page lpage, Page rpage, void *rdata) { bool isleaf = GinPageIsLeaf(lpage); if (isleaf) { ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata; Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf); Pointer rptr = lptr + data->lsize; Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize); Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize); memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize); memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize); GinDataPageSetDataSize(lpage, data->lsize); GinDataPageSetDataSize(rpage, data->rsize); *GinDataPageGetRightBound(lpage) = data->lrightbound; *GinDataPageGetRightBound(rpage) = data->rrightbound; } else { ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata; PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal)); OffsetNumber i; OffsetNumber maxoff; for (i = 0; i < data->separator; i++) GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber); for (i = data->separator; i < data->nitem; i++) GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber); /* set up right key */ maxoff = GinPageGetOpaque(lpage)->maxoff; *GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key; *GinDataPageGetRightBound(rpage) = data->rightbound; } }
/* * Create a WAL record for vacuuming entry tree leaf page. */ static void xlogVacuumPage(Relation index, Buffer buffer) { Page page = BufferGetPage(buffer); XLogRecPtr recptr; /* This is only used for entry tree leaf pages. */ Assert(!GinPageIsData(page)); Assert(GinPageIsLeaf(page)); if (!RelationNeedsWAL(index)) return; /* * Always create a full image, we don't track the changes on the page at * any more fine-grained level. This could obviously be improved... */ XLogBeginInsert(); XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE); PageSetLSN(page, recptr); }
/* * checks space to install new value, * item pointer never deletes! */ static bool dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off) { Page page = BufferGetPage(buf); Assert(GinPageIsData(page)); Assert(!btree->isDelete); if (GinPageIsLeaf(page)) { if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff) { if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page)) return true; return false; }
static void ginRedoVacuumDataLeafPage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); Size len; ginxlogVacuumDataLeafPage *xlrec; xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len); Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); ginRedoRecompress(page, &xlrec->data); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * fills WAL record for vacuum leaf page */ static void xlogVacuumPage(Relation index, Buffer buffer) { Page page = BufferGetPage(buffer); XLogRecPtr recptr; XLogRecData rdata[3]; ginxlogVacuumPage data; char *backup; char itups[BLCKSZ]; uint32 len = 0; Assert(GinPageIsLeaf(page)); if (!RelationNeedsWAL(index)) return; data.node = index->rd_node; data.blkno = BufferGetBlockNumber(buffer); if (GinPageIsData(page)) { backup = GinDataPageGetData(page); data.nitem = GinPageGetOpaque(page)->maxoff; if (data.nitem) len = MAXALIGN(sizeof(ItemPointerData) * data.nitem); } else { char *ptr; OffsetNumber i; ptr = backup = itups; for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); memcpy(ptr, itup, IndexTupleSize(itup)); ptr += MAXALIGN(IndexTupleSize(itup)); } data.nitem = PageGetMaxOffsetNumber(page); len = ptr - backup; } rdata[0].buffer = buffer; rdata[0].buffer_std = (GinPageIsData(page)) ? FALSE : TRUE; rdata[0].len = 0; rdata[0].data = NULL; rdata[0].next = rdata + 1; rdata[1].buffer = InvalidBuffer; rdata[1].len = sizeof(ginxlogVacuumPage); rdata[1].data = (char *) &data; if (len == 0) { rdata[1].next = NULL; } else { rdata[1].next = rdata + 2; rdata[2].buffer = InvalidBuffer; rdata[2].len = len; rdata[2].data = backup; rdata[2].next = NULL; } recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); }
Datum ginvacuumcleanup(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); Relation index = info->index; bool needLock; BlockNumber npages, blkno; BlockNumber totFreePages; GinState ginstate; GinStatsData idxStat; /* * In an autovacuum analyze, we want to clean up pending insertions. * Otherwise, an ANALYZE-only call is a no-op. */ if (info->analyze_only) { if (IsAutoVacuumWorkerProcess()) { initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } PG_RETURN_POINTER(stats); } /* * Set up all-zero stats and cleanup pending inserts if ginbulkdelete * wasn't called */ if (stats == NULL) { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initGinState(&ginstate, index); ginInsertCleanup(&ginstate, true, stats); } memset(&idxStat, 0, sizeof(idxStat)); /* * XXX we always report the heap tuple count as the number of index * entries. This is bogus if the index is partial, but it's real hard to * tell how many distinct heap entries are referenced by a GIN index. */ stats->num_index_tuples = info->num_heap_tuples; stats->estimated_count = info->estimated_count; /* * Need lock unless it's local to this backend. */ needLock = !RELATION_IS_LOCAL(index); if (needLock) LockRelationForExtension(index, ExclusiveLock); npages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); totFreePages = 0; for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++) { Buffer buffer; Page page; vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_SHARE); page = (Page) BufferGetPage(buffer); if (GinPageIsDeleted(page)) { Assert(blkno != GIN_ROOT_BLKNO); RecordFreeIndexPage(index, blkno); totFreePages++; } else if (GinPageIsData(page)) { idxStat.nDataPages++; } else if (!GinPageIsList(page)) { idxStat.nEntryPages++; if (GinPageIsLeaf(page)) idxStat.nEntries += PageGetMaxOffsetNumber(page); } UnlockReleaseBuffer(buffer); } /* Update the metapage with accurate page and entry counts */ idxStat.nTotalPages = npages; ginUpdateStats(info->index, &idxStat); /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); stats->pages_free = totFreePages; if (needLock) LockRelationForExtension(index, ExclusiveLock); stats->num_pages = RelationGetNumberOfBlocks(index); if (needLock) UnlockRelationForExtension(index, ExclusiveLock); PG_RETURN_POINTER(stats); }
Datum ginbulkdelete(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2); void *callback_state = (void *) PG_GETARG_POINTER(3); Relation index = info->index; BlockNumber blkno = GIN_ROOT_BLKNO; GinVacuumState gvs; Buffer buffer; BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; gvs.index = index; gvs.callback = callback; gvs.callback_state = callback_state; gvs.strategy = info->strategy; initGinState(&gvs.ginstate, index); /* first time through? */ if (stats == NULL) { /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* and cleanup any pending inserts */ ginInsertCleanup(&gvs.ginstate, true, stats); } /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; gvs.result = stats; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); /* find leaf page */ for (;;) { Page page = BufferGetPage(buffer); IndexTuple itup; LockBuffer(buffer, GIN_SHARE); Assert(!GinPageIsData(page)); if (GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); LockBuffer(buffer, GIN_EXCLUSIVE); if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); continue; /* check it one more */ } break; } Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); blkno = GinGetDownlink(itup); Assert(blkno != InvalidBlockNumber); UnlockReleaseBuffer(buffer); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); } /* right now we found leftmost page in entry's BTree */ for (;;) { Page page = BufferGetPage(buffer); Page resPage; uint32 i; Assert(!GinPageIsData(page)); resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot); blkno = GinPageGetOpaque(page)->rightlink; if (resPage) { START_CRIT_SECTION(); PageRestoreTempPage(resPage, page); MarkBufferDirty(buffer); xlogVacuumPage(gvs.index, buffer); UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); } else { UnlockReleaseBuffer(buffer); } vacuum_delay_point(); for (i = 0; i < nRoot; i++) { ginVacuumPostingTree(&gvs, rootOfPostingTree[i]); vacuum_delay_point(); } if (blkno == InvalidBlockNumber) /* rightmost page */ break; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_EXCLUSIVE); } PG_RETURN_POINTER(gvs.result); }
/* * scans posting tree and deletes empty pages */ static bool ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) { DataPageDeleteStack *me; Buffer buffer; Page page; bool meDelete = FALSE; if (isRoot) { me = parent; } else { if (!parent->child) { me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack)); me->parent = parent; parent->child = me; me->leftBlkno = InvalidBlockNumber; } else me = parent->child; } buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page)) { OffsetNumber i; me->blkno = blkno; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) i--; } } if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page))) { /* we never delete right most branch */ Assert(!isRoot); if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); meDelete = TRUE; } } } ReleaseBuffer(buffer); if (!meDelete) me->leftBlkno = blkno; return meDelete; }