/* * add PostingItem to a non-leaf page. */ void GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; char *ptr; Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber); Assert(!GinPageIsLeaf(page)); if (offset == InvalidOffsetNumber) { ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1); } else { ptr = (char *) GinDataPageGetPostingItem(page, offset); if (maxoff + 1 - offset != 0) memmove(ptr + sizeof(PostingItem), ptr, (maxoff - offset + 1) * sizeof(PostingItem)); } memcpy(ptr, data, sizeof(PostingItem)); GinPageGetOpaque(page)->maxoff++; }
/* * add ItemPointer to a leaf page. */ void GinDataPageAddItemPointer(Page page, ItemPointer data, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; char *ptr; Assert(ItemPointerIsValid(data)); Assert(GinPageIsLeaf(page)); if (offset == InvalidOffsetNumber) { ptr = (char *) GinDataPageGetItemPointer(page, maxoff + 1); } else { ptr = (char *) GinDataPageGetItemPointer(page, offset); if (maxoff + 1 - offset != 0) memmove(ptr + sizeof(ItemPointerData), ptr, (maxoff - offset + 1) * sizeof(ItemPointerData)); } memcpy(ptr, data, sizeof(ItemPointerData)); GinPageGetOpaque(page)->maxoff++; }
static void ginRedoInsertListPage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record); Buffer buffer; Page page; OffsetNumber l, off = FirstOffsetNumber; int i, tupsize; char *payload; IndexTuple tuples; Size totaltupsize; /* We always re-initialize the page. */ buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); GinInitBuffer(buffer, GIN_LIST); GinPageGetOpaque(page)->rightlink = data->rightlink; if (data->rightlink == InvalidBlockNumber) { /* tail of sublist */ GinPageSetFullRow(page); GinPageGetOpaque(page)->maxoff = 1; } else { GinPageGetOpaque(page)->maxoff = 0; } payload = XLogRecGetBlockData(record, 0, &totaltupsize); tuples = (IndexTuple) payload; for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); l = PageAddItem(page, (Item) tuples, tupsize, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); off++; } Assert((char *) tuples == payload + totaltupsize); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record) { ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record); Buffer buffer; Page page; OffsetNumber l, off = FirstOffsetNumber; int i, tupsize; IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage)); /* If we have a full-page image, restore it and we're done */ if (record->xl_info & XLR_BKP_BLOCK(0)) { (void) RestoreBackupBlock(lsn, record, 0, false, false); return; } buffer = XLogReadBuffer(data->node, data->blkno, true); Assert(BufferIsValid(buffer)); page = BufferGetPage(buffer); GinInitBuffer(buffer, GIN_LIST); GinPageGetOpaque(page)->rightlink = data->rightlink; if (data->rightlink == InvalidBlockNumber) { /* tail of sublist */ GinPageSetFullRow(page); GinPageGetOpaque(page)->maxoff = 1; } else { GinPageGetOpaque(page)->maxoff = 0; } for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); l = PageAddItem(page, (Item) tuples, tupsize, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record) { ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record); Buffer buffer; Page page; OffsetNumber l, off = FirstOffsetNumber; int i, tupsize; IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage)); /* * Backup blocks are not used, we always re-initialize the page. */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); buffer = XLogReadBuffer(data->node, data->blkno, true); Assert(BufferIsValid(buffer)); page = BufferGetPage(buffer); GinInitBuffer(buffer, GIN_LIST); GinPageGetOpaque(page)->rightlink = data->rightlink; if (data->rightlink == InvalidBlockNumber) { /* tail of sublist */ GinPageSetFullRow(page); GinPageGetOpaque(page)->maxoff = 1; } else { GinPageGetOpaque(page)->maxoff = 0; } for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); l = PageAddItem(page, (Item) tuples, tupsize, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); off++; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void ginRedoDeletePage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record); Buffer dbuffer; Buffer pbuffer; Buffer lbuffer; Page page; /* * Lock left page first in order to prevent possible deadlock with * ginStepRight(). */ if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(lbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->rightlink = data->rightLink; PageSetLSN(page, lsn); MarkBufferDirty(lbuffer); } if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(dbuffer); Assert(GinPageIsData(page)); GinPageGetOpaque(page)->flags = GIN_DELETED; GinPageSetDeleteXid(page, data->deleteXid); PageSetLSN(page, lsn); MarkBufferDirty(dbuffer); } if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO) { page = BufferGetPage(pbuffer); Assert(GinPageIsData(page)); Assert(!GinPageIsLeaf(page)); GinPageDeletePostingItem(page, data->parentOffset); PageSetLSN(page, lsn); MarkBufferDirty(pbuffer); } if (BufferIsValid(lbuffer)) UnlockReleaseBuffer(lbuffer); if (BufferIsValid(pbuffer)) UnlockReleaseBuffer(pbuffer); if (BufferIsValid(dbuffer)) UnlockReleaseBuffer(dbuffer); }
/* * Deletes posting item from non-leaf page */ void GinPageDeletePostingItem(Page page, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; Assert(!GinPageIsLeaf(page)); Assert(offset >= FirstOffsetNumber && offset <= maxoff); if (offset != maxoff) memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1), sizeof(PostingItem) * (maxoff - offset)); GinPageGetOpaque(page)->maxoff--; }
/* * Step right from current page. * * The next page is locked first, before releasing the current page. This is * crucial to protect from concurrent page deletion (see comment in * ginDeletePage). */ Buffer ginStepRight(Buffer buffer, Relation index, int lockmode) { Buffer nextbuffer; Page page = BufferGetPage(buffer); bool isLeaf = GinPageIsLeaf(page); bool isData = GinPageIsData(page); BlockNumber blkno = GinPageGetOpaque(page)->rightlink; nextbuffer = ReadBuffer(index, blkno); LockBuffer(nextbuffer, lockmode); UnlockReleaseBuffer(buffer); /* Sanity check that the page we stepped to is of similar kind. */ page = BufferGetPage(nextbuffer); if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page)) elog(ERROR, "right sibling of GIN page is of different type"); /* * Given the proper lock sequence above, we should never land on a deleted * page. */ if (GinPageIsDeleted(page)) elog(ERROR, "right sibling of GIN page was deleted"); return nextbuffer; }
/* * Mask a GIN page before running consistency checks on it. */ void gin_mask(char *pagedata, BlockNumber blkno) { Page page = (Page) pagedata; GinPageOpaque opaque; mask_page_lsn_and_checksum(page); opaque = GinPageGetOpaque(page); mask_page_hint_bits(page); /* * GIN metapage doesn't use pd_lower/pd_upper. Other page types do. Hence, * we need to apply masking for those pages. */ if (opaque->flags != GIN_META) { /* * For GIN_DELETED page, the page is initialized to empty. Hence, mask * the page content. */ if (opaque->flags & GIN_DELETED) mask_page_content(page); else mask_unused_space(page); } }
/* * add ItemPointer or PostingItem to page. data should point to * correct value! depending on leaf or non-leaf page */ void GinDataPageAddItem(Page page, void *data, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; char *ptr; if (offset == InvalidOffsetNumber) { ptr = GinDataPageGetItem(page, maxoff + 1); } else { ptr = GinDataPageGetItem(page, offset); if (maxoff + 1 - offset != 0) memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page)); } memcpy(ptr, data, GinSizeOfItem(page)); GinPageGetOpaque(page)->maxoff++; }
void GinInitPage(Page page, uint32 f, Size pageSize) { GinPageOpaque opaque; PageInit(page, pageSize, sizeof(GinPageOpaqueData)); opaque = GinPageGetOpaque(page); memset(opaque, 0, sizeof(GinPageOpaqueData)); opaque->flags = f; opaque->rightlink = InvalidBlockNumber; }
/* * returns blkno of leftmost child */ static BlockNumber dataGetLeftMostPage(GinBtree btree, Page page) { PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber); pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber); return PostingItemGetBlockNumber(pitem); }
/* * Creates posting tree with one page. Function * suppose that items[] fits to page */ static BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems) { BlockNumber blkno; Buffer buffer = GinNewBuffer(index); Page page; START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF); page = BufferGetPage(buffer); blkno = BufferGetBlockNumber(buffer); memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems); GinPageGetOpaque(page)->maxoff = nitems; MarkBufferDirty(buffer); if (!index->rd_istemp) { XLogRecPtr recptr; XLogRecData rdata[2]; ginxlogCreatePostingTree data; data.node = index->rd_node; data.blkno = blkno; data.nitem = nitems; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogCreatePostingTree); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) items; rdata[1].len = sizeof(ItemPointerData) * nitems; rdata[1].next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); return blkno; }
static void ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record) { ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record); Buffer metabuffer; Page metapage; int i; /* Backup blocks are not used in delete_listpage records */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false); if (!BufferIsValid(metabuffer)) return; /* assume index was deleted, nothing to do */ metapage = BufferGetPage(metabuffer); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); PageSetLSN(metapage, lsn); MarkBufferDirty(metabuffer); /* * In normal operation, shiftList() takes exclusive lock on all the * pages-to-be-deleted simultaneously. During replay, however, it should * be all right to lock them one at a time. This is dependent on the fact * that we are deleting pages from the head of the list, and that readers * share-lock the next page before releasing the one they are on. So we * cannot get past a reader that is on, or due to visit, any page we are * going to delete. New incoming readers will block behind our metapage * lock and then see a fully updated page list. */ for (i = 0; i < data->ndeleted; i++) { Buffer buffer = XLogReadBuffer(data->node, data->toDelete[i], false); if (BufferIsValid(buffer)) { Page page = BufferGetPage(buffer); if (lsn > PageGetLSN(page)) { GinPageGetOpaque(page)->flags = GIN_DELETED; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } UnlockReleaseBuffer(metabuffer); }
/* * Searches correct position for value on leaf page. * Page should be correctly chosen. * Returns true if value found on page. */ static bool dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer); OffsetNumber low, high; int result; Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = GinPageGetOpaque(page)->maxoff; if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); result = ginCompareItemPointers(&btree->itemptr, GinDataPageGetItemPointer(page, mid)); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
static bool findItemInPage(Page page, ItemPointer item, OffsetNumber *off) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; int res; if ( GinPageGetOpaque(page)->flags & GIN_DELETED ) /* page was deleted by concurrent vacuum */ return false; /* * scan page to find equal or first greater value */ for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) { res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); if (res <= 0) return true; } return false; }
static void ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; Page page; if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * Finds links to blkno on non-leaf page, returns * offset of PostingItem */ static OffsetNumber dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff; PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); /* if page isn't changed, we return storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { pitem = (PostingItem *) GinDataPageGetItem(page, storedOff); if (PostingItemGetBlockNumber(pitem) == blkno) return storedOff; /* * we hope, that needed pointer goes to right. It's true if there * wasn't a deletion */ for (i = storedOff + 1; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } maxoff = storedOff - 1; } /* last chance */ for (i = FirstOffsetNumber; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } return InvalidOffsetNumber; }
static void ginRedoClearIncompleteSplit(XLogRecPtr lsn, RelFileNode node, BlockNumber blkno) { Buffer buffer; Page page; buffer = XLogReadBuffer(node, blkno, false); if (!BufferIsValid(buffer)) return; /* page was deleted, nothing to do */ page = (Page) BufferGetPage(buffer); if (lsn > PageGetLSN(page)) { GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); }
static void ginRedoSplitData(Page lpage, Page rpage, void *rdata) { bool isleaf = GinPageIsLeaf(lpage); if (isleaf) { ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata; Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf); Pointer rptr = lptr + data->lsize; Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize); Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize); memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize); memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize); GinDataPageSetDataSize(lpage, data->lsize); GinDataPageSetDataSize(rpage, data->rsize); *GinDataPageGetRightBound(lpage) = data->lrightbound; *GinDataPageGetRightBound(rpage) = data->rrightbound; } else { ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata; PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal)); OffsetNumber i; OffsetNumber maxoff; for (i = 0; i < data->separator; i++) GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber); for (i = data->separator; i < data->nitem; i++) GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber); /* set up right key */ maxoff = GinPageGetOpaque(lpage)->maxoff; *GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key; *GinDataPageGetRightBound(rpage) = data->rightbound; } }
/* * checks space to install new value, * item pointer never deletes! */ static bool dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off) { Page page = BufferGetPage(buf); Assert(GinPageIsData(page)); Assert(!btree->isDelete); if (GinPageIsLeaf(page)) { if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff) { if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page)) return true; } else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page)) return true; return false; }
/* * fills WAL record for vacuum leaf page */ static void xlogVacuumPage(Relation index, Buffer buffer) { Page page = BufferGetPage(buffer); XLogRecPtr recptr; XLogRecData rdata[3]; ginxlogVacuumPage data; char *backup; char itups[BLCKSZ]; uint32 len = 0; Assert(GinPageIsLeaf(page)); if (!RelationNeedsWAL(index)) return; data.node = index->rd_node; data.blkno = BufferGetBlockNumber(buffer); if (GinPageIsData(page)) { backup = GinDataPageGetData(page); data.nitem = GinPageGetOpaque(page)->maxoff; if (data.nitem) len = MAXALIGN(sizeof(ItemPointerData) * data.nitem); } else { char *ptr; OffsetNumber i; ptr = backup = itups; for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); memcpy(ptr, itup, IndexTupleSize(itup)); ptr += MAXALIGN(IndexTupleSize(itup)); } data.nitem = PageGetMaxOffsetNumber(page); len = ptr - backup; } rdata[0].buffer = buffer; rdata[0].buffer_std = (GinPageIsData(page)) ? FALSE : TRUE; rdata[0].len = 0; rdata[0].data = NULL; rdata[0].next = rdata + 1; rdata[1].buffer = InvalidBuffer; rdata[1].len = sizeof(ginxlogVacuumPage); rdata[1].data = (char *) &data; if (len == 0) { rdata[1].next = NULL; } else { rdata[1].next = rdata + 2; rdata[2].buffer = InvalidBuffer; rdata[2].len = len; rdata[2].data = backup; rdata[2].next = NULL; } recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); }
Datum ginbulkdelete(PG_FUNCTION_ARGS) { IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2); void *callback_state = (void *) PG_GETARG_POINTER(3); Relation index = info->index; BlockNumber blkno = GIN_ROOT_BLKNO; GinVacuumState gvs; Buffer buffer; BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))]; uint32 nRoot; gvs.index = index; gvs.callback = callback; gvs.callback_state = callback_state; gvs.strategy = info->strategy; initGinState(&gvs.ginstate, index); /* first time through? */ if (stats == NULL) { /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* and cleanup any pending inserts */ ginInsertCleanup(&gvs.ginstate, true, stats); } /* we'll re-count the tuples each time */ stats->num_index_tuples = 0; gvs.result = stats; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); /* find leaf page */ for (;;) { Page page = BufferGetPage(buffer); IndexTuple itup; LockBuffer(buffer, GIN_SHARE); Assert(!GinPageIsData(page)); if (GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); LockBuffer(buffer, GIN_EXCLUSIVE); if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page)) { LockBuffer(buffer, GIN_UNLOCK); continue; /* check it one more */ } break; } Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); blkno = GinGetDownlink(itup); Assert(blkno != InvalidBlockNumber); UnlockReleaseBuffer(buffer); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); } /* right now we found leftmost page in entry's BTree */ for (;;) { Page page = BufferGetPage(buffer); Page resPage; uint32 i; Assert(!GinPageIsData(page)); resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot); blkno = GinPageGetOpaque(page)->rightlink; if (resPage) { START_CRIT_SECTION(); PageRestoreTempPage(resPage, page); MarkBufferDirty(buffer); xlogVacuumPage(gvs.index, buffer); UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); } else { UnlockReleaseBuffer(buffer); } vacuum_delay_point(); for (i = 0; i < nRoot; i++) { ginVacuumPostingTree(&gvs, rootOfPostingTree[i]); vacuum_delay_point(); } if (blkno == InvalidBlockNumber) /* rightmost page */ break; buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_EXCLUSIVE); } PG_RETURN_POINTER(gvs.result); }
/* * scans posting tree and deletes empty pages */ static bool ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) { DataPageDeleteStack *me; Buffer buffer; Page page; bool meDelete = FALSE; if (isRoot) { me = parent; } else { if (!parent->child) { me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack)); me->parent = parent; parent->child = me; me->leftBlkno = InvalidBlockNumber; } else me = parent->child; } buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page)) { OffsetNumber i; me->blkno = blkno; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) i--; } } if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page))) { /* we never delete right most branch */ Assert(!isRoot); if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); meDelete = TRUE; } } } ReleaseBuffer(buffer); if (!meDelete) me->leftBlkno = blkno; return meDelete; }
static void ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno, BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot) { Buffer dBuffer; Buffer lBuffer; Buffer pBuffer; Page page, parentPage; dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno, RBM_NORMAL, gvs->strategy); if (leftBlkno != InvalidBlockNumber) lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno, RBM_NORMAL, gvs->strategy); else lBuffer = InvalidBuffer; pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno, RBM_NORMAL, gvs->strategy); LockBuffer(dBuffer, GIN_EXCLUSIVE); if (!isParentRoot) /* parent is already locked by * LockBufferForCleanup() */ LockBuffer(pBuffer, GIN_EXCLUSIVE); if (leftBlkno != InvalidBlockNumber) LockBuffer(lBuffer, GIN_EXCLUSIVE); START_CRIT_SECTION(); if (leftBlkno != InvalidBlockNumber) { BlockNumber rightlink; page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; page = BufferGetPage(lBuffer); GinPageGetOpaque(page)->rightlink = rightlink; } parentPage = BufferGetPage(pBuffer); #ifdef USE_ASSERT_CHECKING do { PostingItem *tod = (PostingItem *) GinDataPageGetItem(parentPage, myoff); Assert(PostingItemGetBlockNumber(tod) == deleteBlkno); } while (0); #endif GinPageDeletePostingItem(parentPage, myoff); page = BufferGetPage(dBuffer); /* * we shouldn't change rightlink field to save workability of running * search scan */ GinPageGetOpaque(page)->flags = GIN_DELETED; MarkBufferDirty(pBuffer); if (leftBlkno != InvalidBlockNumber) MarkBufferDirty(lBuffer); MarkBufferDirty(dBuffer); if (RelationNeedsWAL(gvs->index)) { XLogRecPtr recptr; XLogRecData rdata[4]; ginxlogDeletePage data; int n; data.node = gvs->index->rd_node; data.blkno = deleteBlkno; data.parentBlkno = parentBlkno; data.parentOffset = myoff; data.leftBlkno = leftBlkno; data.rightLink = GinPageGetOpaque(page)->rightlink; rdata[0].buffer = dBuffer; rdata[0].buffer_std = FALSE; rdata[0].data = NULL; rdata[0].len = 0; rdata[0].next = rdata + 1; rdata[1].buffer = pBuffer; rdata[1].buffer_std = FALSE; rdata[1].data = NULL; rdata[1].len = 0; rdata[1].next = rdata + 2; if (leftBlkno != InvalidBlockNumber) { rdata[2].buffer = lBuffer; rdata[2].buffer_std = FALSE; rdata[2].data = NULL; rdata[2].len = 0; rdata[2].next = rdata + 3; n = 3; } else n = 2; rdata[n].buffer = InvalidBuffer; rdata[n].buffer_std = FALSE; rdata[n].len = sizeof(ginxlogDeletePage); rdata[n].data = (char *) &data; rdata[n].next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); PageSetLSN(parentPage, recptr); PageSetTLI(parentPage, ThisTimeLineID); if (leftBlkno != InvalidBlockNumber) { page = BufferGetPage(lBuffer); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } } if (!isParentRoot) LockBuffer(pBuffer, GIN_UNLOCK); ReleaseBuffer(pBuffer); if (leftBlkno != InvalidBlockNumber) UnlockReleaseBuffer(lBuffer); UnlockReleaseBuffer(dBuffer); END_CRIT_SECTION(); gvs->result->pages_deleted++; }
static bool ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer) { Buffer buffer; Page page; bool hasVoidPage = FALSE; buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); /* * We should be sure that we don't concurrent with inserts, insert process * never release root page until end (but it can unlock it and lock * again). New scan can't start but previously started ones work * concurrently. */ if (isRoot) LockBufferForCleanup(buffer); else LockBuffer(buffer, GIN_EXCLUSIVE); Assert(GinPageIsData(page)); if (GinPageIsLeaf(page)) { OffsetNumber newMaxOff, oldMaxOff = GinPageGetOpaque(page)->maxoff; ItemPointerData *cleaned = NULL; newMaxOff = ginVacuumPostingList(gvs, (ItemPointer) GinDataPageGetData(page), oldMaxOff, &cleaned); /* saves changes about deleted tuple ... */ if (oldMaxOff != newMaxOff) { START_CRIT_SECTION(); if (newMaxOff > 0) memcpy(GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff); pfree(cleaned); GinPageGetOpaque(page)->maxoff = newMaxOff; MarkBufferDirty(buffer); xlogVacuumPage(gvs->index, buffer); END_CRIT_SECTION(); /* if root is a leaf page, we don't desire further processing */ if (!isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) hasVoidPage = TRUE; } } else { OffsetNumber i; bool isChildHasVoid = FALSE; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL)) isChildHasVoid = TRUE; } if (isChildHasVoid) hasVoidPage = TRUE; } /* * if we have root and theres void pages in tree, then we don't release * lock to go further processing and guarantee that tree is unused */ if (!(isRoot && hasVoidPage)) { UnlockReleaseBuffer(buffer); } else { Assert(rootBuffer); *rootBuffer = buffer; } return hasVoidPage; }
/* * Delete a posting tree page. */ static void ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno, BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot) { Buffer dBuffer; Buffer lBuffer; Buffer pBuffer; Page page, parentPage; BlockNumber rightlink; /* * Lock the pages in the same order as an insertion would, to avoid * deadlocks: left, then right, then parent. */ lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno, RBM_NORMAL, gvs->strategy); dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno, RBM_NORMAL, gvs->strategy); pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno, RBM_NORMAL, gvs->strategy); LockBuffer(lBuffer, GIN_EXCLUSIVE); LockBuffer(dBuffer, GIN_EXCLUSIVE); if (!isParentRoot) /* parent is already locked by * LockBufferForCleanup() */ LockBuffer(pBuffer, GIN_EXCLUSIVE); START_CRIT_SECTION(); /* Unlink the page by changing left sibling's rightlink */ page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; page = BufferGetPage(lBuffer); GinPageGetOpaque(page)->rightlink = rightlink; /* Delete downlink from parent */ parentPage = BufferGetPage(pBuffer); #ifdef USE_ASSERT_CHECKING do { PostingItem *tod = GinDataPageGetPostingItem(parentPage, myoff); Assert(PostingItemGetBlockNumber(tod) == deleteBlkno); } while (0); #endif GinPageDeletePostingItem(parentPage, myoff); page = BufferGetPage(dBuffer); /* * we shouldn't change rightlink field to save workability of running * search scan */ GinPageGetOpaque(page)->flags = GIN_DELETED; MarkBufferDirty(pBuffer); MarkBufferDirty(lBuffer); MarkBufferDirty(dBuffer); if (RelationNeedsWAL(gvs->index)) { XLogRecPtr recptr; ginxlogDeletePage data; /* * We can't pass REGBUF_STANDARD for the deleted page, because we * didn't set pd_lower on pre-9.4 versions. The page might've been * binary-upgraded from an older version, and hence not have pd_lower * set correctly. Ditto for the left page, but removing the item from * the parent updated its pd_lower, so we know that's OK at this * point. */ XLogBeginInsert(); XLogRegisterBuffer(0, dBuffer, 0); XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD); XLogRegisterBuffer(2, lBuffer, 0); data.parentOffset = myoff; data.rightLink = GinPageGetOpaque(page)->rightlink; XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage)); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE); PageSetLSN(page, recptr); PageSetLSN(parentPage, recptr); PageSetLSN(BufferGetPage(lBuffer), recptr); } if (!isParentRoot) LockBuffer(pBuffer, GIN_UNLOCK); ReleaseBuffer(pBuffer); UnlockReleaseBuffer(lBuffer); UnlockReleaseBuffer(dBuffer); END_CRIT_SECTION(); gvs->result->pages_deleted++; }
static bool ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer) { Buffer buffer; Page page; bool hasVoidPage = FALSE; MemoryContext oldCxt; buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); /* * We should be sure that we don't concurrent with inserts, insert process * never release root page until end (but it can unlock it and lock * again). New scan can't start but previously started ones work * concurrently. */ if (isRoot) LockBufferForCleanup(buffer); else LockBuffer(buffer, GIN_EXCLUSIVE); Assert(GinPageIsData(page)); if (GinPageIsLeaf(page)) { oldCxt = MemoryContextSwitchTo(gvs->tmpCxt); ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs); MemoryContextSwitchTo(oldCxt); MemoryContextReset(gvs->tmpCxt); /* if root is a leaf page, we don't desire further processing */ if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page)) hasVoidPage = TRUE; } else { OffsetNumber i; bool isChildHasVoid = FALSE; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = GinDataPageGetPostingItem(page, i); if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL)) isChildHasVoid = TRUE; } if (isChildHasVoid) hasVoidPage = TRUE; } /* * if we have root and there are empty pages in tree, then we don't * release lock to go further processing and guarantee that tree is unused */ if (!(isRoot && hasVoidPage)) { UnlockReleaseBuffer(buffer); } else { Assert(rootBuffer); *rootBuffer = buffer; } return hasVoidPage; }
/* * Split entry page and insert new data. * * Returns new temp pages to *newlpage and *newrpage. * The original buffer is left untouched. */ static void entrySplitPage(GinBtree btree, Buffer origbuf, GinBtreeStack *stack, GinBtreeEntryInsertData *insertData, BlockNumber updateblkno, Page *newlpage, Page *newrpage) { OffsetNumber off = stack->off; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; char *ptr; IndexTuple itup; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); Size pageSize = PageGetPageSize(lpage); char tupstore[2 * BLCKSZ]; entryPreparePage(btree, lpage, off, insertData, updateblkno); /* * First, append all the existing tuples and the new tuple we're inserting * one after another in a temporary workspace. */ maxoff = PageGetMaxOffsetNumber(lpage); ptr = tupstore; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); size = MAXALIGN(IndexTupleSize(itup)); memcpy(ptr, itup, size); ptr += size; totalsize += size + sizeof(ItemIdData); } if (off == maxoff + 1) { size = MAXALIGN(IndexTupleSize(insertData->entry)); memcpy(ptr, insertData->entry, size); ptr += size; totalsize += size + sizeof(ItemIdData); } /* * Initialize the left and right pages, and copy all the tuples back to * them. */ GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); ptr = tupstore; maxoff++; lsize = 0; page = lpage; for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) ptr; /* * Decide where to split. We try to equalize the pages' total data * size, not number of tuples. */ if (lsize > totalsize / 2) { if (separator == InvalidOffsetNumber) separator = i - 1; page = rpage; } else { lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); } if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(btree->index)); ptr += MAXALIGN(IndexTupleSize(itup)); } /* return temp pages to caller */ *newlpage = lpage; *newrpage = rpage; }
static void ginRedoUpdateMetapage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record); Buffer metabuffer; Page metapage; Buffer buffer; /* * Restore the metapage. This is essentially the same as a full-page * image, so restore the metapage unconditionally without looking at the * LSN, to avoid torn page hazards. */ metabuffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO); metapage = BufferGetPage(metabuffer); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); PageSetLSN(metapage, lsn); MarkBufferDirty(metabuffer); if (data->ntuples > 0) { /* * insert into tail page */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); OffsetNumber off; int i; Size tupsize; char *payload; IndexTuple tuples; Size totaltupsize; payload = XLogRecGetBlockData(record, 1, &totaltupsize); tuples = (IndexTuple) payload; if (PageIsEmpty(page)) off = FirstOffsetNumber; else off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); if (PageAddItem(page, (Item) tuples, tupsize, off, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); off++; } Assert(payload + totaltupsize == (char *) tuples); /* * Increase counter of heap tuples */ GinPageGetOpaque(page)->maxoff++; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else if (data->prevTail != InvalidBlockNumber) { /* * New tail */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); GinPageGetOpaque(page)->rightlink = data->newRightlink; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } UnlockReleaseBuffer(metabuffer); }