/* * Deletes posting item from non-leaf page */ void GinPageDeletePostingItem(Page page, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; Assert(!GinPageIsLeaf(page)); Assert(offset >= FirstOffsetNumber && offset <= maxoff); if (offset != maxoff) memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1), sizeof(PostingItem) * (maxoff - offset)); GinPageGetOpaque(page)->maxoff--; }
/* * add ItemPointer or PostingItem to page. data should point to * correct value! depending on leaf or non-leaf page */ void GinDataPageAddItem(Page page, void *data, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; char *ptr; if (offset == InvalidOffsetNumber) { ptr = GinDataPageGetItem(page, maxoff + 1); } else { ptr = GinDataPageGetItem(page, offset); if (maxoff + 1 - offset != 0) memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page)); } memcpy(ptr, data, GinSizeOfItem(page)); GinPageGetOpaque(page)->maxoff++; }
/* * Finds links to blkno on non-leaf page, returns * offset of PostingItem */ static OffsetNumber dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff; PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); /* if page isn't changed, we return storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { pitem = (PostingItem *) GinDataPageGetItem(page, storedOff); if (PostingItemGetBlockNumber(pitem) == blkno) return storedOff; /* * we hope, that needed pointer goes to right. It's true if there * wasn't a deletion */ for (i = storedOff + 1; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } maxoff = storedOff - 1; } /* last chance */ for (i = FirstOffsetNumber; i <= maxoff; i++) { pitem = (PostingItem *) GinDataPageGetItem(page, i); if (PostingItemGetBlockNumber(pitem) == blkno) return i; } return InvalidOffsetNumber; }
/* * returns blkno of leftmost child */ static BlockNumber dataGetLeftMostPage(GinBtree btree, Page page) { PostingItem *pitem; Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber); pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber); return PostingItemGetBlockNumber(pitem); }
/* * Searches correct position for value on leaf page. * Page should be correctly chosen. * Returns true if value found on page. */ static bool dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) { Page page = BufferGetPage(stack->buffer); OffsetNumber low, high; int result; Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; return TRUE; } low = FirstOffsetNumber; high = GinPageGetOpaque(page)->maxoff; if (high < low) { stack->off = FirstOffsetNumber; return false; } high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); result = ginCompareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid)); if (result == 0) { stack->off = mid; return true; } else if (result > 0) low = mid + 1; else high = mid; } stack->off = high; return false; }
/* * In case of previous split update old child blkno to * new right page * item pointer never deletes! */ static BlockNumber dataPrepareData(GinBtree btree, Page page, OffsetNumber off) { BlockNumber ret = InvalidBlockNumber; Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off); PostingItemSetBlockNumber(pitem, btree->rightblkno); ret = btree->rightblkno; } btree->rightblkno = InvalidBlockNumber; return ret; }
static bool findItemInPage(Page page, ItemPointer item, OffsetNumber *off) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; int res; if ( GinPageGetOpaque(page)->flags & GIN_DELETED ) /* page was deleted by concurrent vacuum */ return false; /* * scan page to find equal or first greater value */ for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++) { res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off)); if (res <= 0) return true; } return false; }
/* * scans posting tree and deletes empty pages */ static bool ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) { DataPageDeleteStack *me; Buffer buffer; Page page; bool meDelete = FALSE; if (isRoot) { me = parent; } else { if (!parent->child) { me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack)); me->parent = parent; parent->child = me; me->leftBlkno = InvalidBlockNumber; } else me = parent->child; } buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page)) { OffsetNumber i; me->blkno = blkno; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) i--; } } if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { if (!(me->leftBlkno == InvalidBlockNumber && GinPageRightMost(page))) { /* we never delete right most branch */ Assert(!isRoot); if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) { ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); meDelete = TRUE; } } } ReleaseBuffer(buffer); if (!meDelete) me->leftBlkno = blkno; return meDelete; }
static void ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno, BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot) { Buffer dBuffer; Buffer lBuffer; Buffer pBuffer; Page page, parentPage; dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno, RBM_NORMAL, gvs->strategy); if (leftBlkno != InvalidBlockNumber) lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno, RBM_NORMAL, gvs->strategy); else lBuffer = InvalidBuffer; pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno, RBM_NORMAL, gvs->strategy); LockBuffer(dBuffer, GIN_EXCLUSIVE); if (!isParentRoot) /* parent is already locked by * LockBufferForCleanup() */ LockBuffer(pBuffer, GIN_EXCLUSIVE); if (leftBlkno != InvalidBlockNumber) LockBuffer(lBuffer, GIN_EXCLUSIVE); START_CRIT_SECTION(); if (leftBlkno != InvalidBlockNumber) { BlockNumber rightlink; page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; page = BufferGetPage(lBuffer); GinPageGetOpaque(page)->rightlink = rightlink; } parentPage = BufferGetPage(pBuffer); #ifdef USE_ASSERT_CHECKING do { PostingItem *tod = (PostingItem *) GinDataPageGetItem(parentPage, myoff); Assert(PostingItemGetBlockNumber(tod) == deleteBlkno); } while (0); #endif GinPageDeletePostingItem(parentPage, myoff); page = BufferGetPage(dBuffer); /* * we shouldn't change rightlink field to save workability of running * search scan */ GinPageGetOpaque(page)->flags = GIN_DELETED; MarkBufferDirty(pBuffer); if (leftBlkno != InvalidBlockNumber) MarkBufferDirty(lBuffer); MarkBufferDirty(dBuffer); if (RelationNeedsWAL(gvs->index)) { XLogRecPtr recptr; XLogRecData rdata[4]; ginxlogDeletePage data; int n; data.node = gvs->index->rd_node; data.blkno = deleteBlkno; data.parentBlkno = parentBlkno; data.parentOffset = myoff; data.leftBlkno = leftBlkno; data.rightLink = GinPageGetOpaque(page)->rightlink; rdata[0].buffer = dBuffer; rdata[0].buffer_std = FALSE; rdata[0].data = NULL; rdata[0].len = 0; rdata[0].next = rdata + 1; rdata[1].buffer = pBuffer; rdata[1].buffer_std = FALSE; rdata[1].data = NULL; rdata[1].len = 0; rdata[1].next = rdata + 2; if (leftBlkno != InvalidBlockNumber) { rdata[2].buffer = lBuffer; rdata[2].buffer_std = FALSE; rdata[2].data = NULL; rdata[2].len = 0; rdata[2].next = rdata + 3; n = 3; } else n = 2; rdata[n].buffer = InvalidBuffer; rdata[n].buffer_std = FALSE; rdata[n].len = sizeof(ginxlogDeletePage); rdata[n].data = (char *) &data; rdata[n].next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); PageSetLSN(parentPage, recptr); PageSetTLI(parentPage, ThisTimeLineID); if (leftBlkno != InvalidBlockNumber) { page = BufferGetPage(lBuffer); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } } if (!isParentRoot) LockBuffer(pBuffer, GIN_UNLOCK); ReleaseBuffer(pBuffer); if (leftBlkno != InvalidBlockNumber) UnlockReleaseBuffer(lBuffer); UnlockReleaseBuffer(dBuffer); END_CRIT_SECTION(); gvs->result->pages_deleted++; }
static bool ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer) { Buffer buffer; Page page; bool hasVoidPage = FALSE; buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); /* * We should be sure that we don't concurrent with inserts, insert process * never release root page until end (but it can unlock it and lock * again). New scan can't start but previously started ones work * concurrently. */ if (isRoot) LockBufferForCleanup(buffer); else LockBuffer(buffer, GIN_EXCLUSIVE); Assert(GinPageIsData(page)); if (GinPageIsLeaf(page)) { OffsetNumber newMaxOff, oldMaxOff = GinPageGetOpaque(page)->maxoff; ItemPointerData *cleaned = NULL; newMaxOff = ginVacuumPostingList(gvs, (ItemPointer) GinDataPageGetData(page), oldMaxOff, &cleaned); /* saves changes about deleted tuple ... */ if (oldMaxOff != newMaxOff) { START_CRIT_SECTION(); if (newMaxOff > 0) memcpy(GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff); pfree(cleaned); GinPageGetOpaque(page)->maxoff = newMaxOff; MarkBufferDirty(buffer); xlogVacuumPage(gvs->index, buffer); END_CRIT_SECTION(); /* if root is a leaf page, we don't desire further processing */ if (!isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber) hasVoidPage = TRUE; } } else { OffsetNumber i; bool isChildHasVoid = FALSE; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i); if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL)) isChildHasVoid = TRUE; } if (isChildHasVoid) hasVoidPage = TRUE; } /* * if we have root and theres void pages in tree, then we don't release * lock to go further processing and guarantee that tree is unused */ if (!(isRoot && hasVoidPage)) { UnlockReleaseBuffer(buffer); } else { Assert(rootBuffer); *rootBuffer = buffer; } return hasVoidPage; }
/* * Find correct PostingItem in non-leaf page. It supposed that page * correctly chosen and searching value SHOULD be on page */ static BlockNumber dataLocateItem(GinBtree btree, GinBtreeStack *stack) { OffsetNumber low, high, maxoff; PostingItem *pitem = NULL; int result; Page page = BufferGetPage(stack->buffer); Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); if (btree->fullScan) { stack->off = FirstOffsetNumber; stack->predictNumber *= GinPageGetOpaque(page)->maxoff; return btree->getLeftMostPage(btree, page); } low = FirstOffsetNumber; maxoff = high = GinPageGetOpaque(page)->maxoff; Assert(high >= low); high++; while (high > low) { OffsetNumber mid = low + ((high - low) / 2); pitem = (PostingItem *) GinDataPageGetItem(page, mid); if (mid == maxoff) { /* * Right infinity, page already correctly chosen with a help of * dataIsMoveRight */ result = -1; } else { pitem = (PostingItem *) GinDataPageGetItem(page, mid); result = ginCompareItemPointers(btree->items + btree->curitem, &(pitem->key)); } if (result == 0) { stack->off = mid; return PostingItemGetBlockNumber(pitem); } else if (result > 0) low = mid + 1; else high = mid; } Assert(high >= FirstOffsetNumber && high <= maxoff); stack->off = high; pitem = (PostingItem *) GinDataPageGetItem(page, high); return PostingItemGetBlockNumber(pitem); }
/* * split page and fills WAL record. original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. In leaf page and build mode puts all * ItemPointers to pages. Also, in build mode splits data by way to full fulled * left page */ static Page dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { char *ptr; OffsetNumber separator; ItemPointer bound; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); int sizeofitem = GinSizeOfDataPageItem(lpage); OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); Size freeSpace; uint32 nCopied = 1; /* these must be static so they can be returned to caller */ static ginxlogSplit data; static XLogRecData rdata[4]; static char vector[2 * BLCKSZ]; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); freeSpace = GinDataPageGetFreeSpace(rpage); *prdata = rdata; data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem)); data.updateBlkno = dataPrepareData(btree, lpage, off); memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber), maxoff * sizeofitem); if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) { nCopied = 0; while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData))) { memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem, sizeof(ItemPointerData)); maxoff++; nCopied++; btree->curitem++; } } else { ptr = vector + (off - 1) * sizeofitem; if (maxoff + 1 - off != 0) memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem); if (GinPageIsLeaf(lpage)) { memcpy(ptr, btree->items + btree->curitem, sizeofitem); btree->curitem++; } else memcpy(ptr, &(btree->pitem), sizeofitem); maxoff++; } /* * we suppose that during index creation table scaned from begin to end, * so ItemPointers are monotonically increased.. */ if (btree->isBuild && GinPageRightMost(lpage)) separator = freeSpace / sizeofitem; else separator = maxoff / 2; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem); GinPageGetOpaque(lpage)->maxoff = separator; memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber), vector + separator * sizeofitem, (maxoff - separator) * sizeofitem); GinPageGetOpaque(rpage)->maxoff = maxoff - separator; PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf)); if (GinPageIsLeaf(lpage)) btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff); else btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key; btree->rightblkno = BufferGetBlockNumber(rbuf); /* set up right bound for left page */ bound = GinDataPageGetRightBound(lpage); *bound = btree->pitem.key; /* set up right bound for right page */ bound = GinDataPageGetRightBound(rpage); *bound = oldbound; data.node = btree->index->rd_node; data.rootBlkno = InvalidBlockNumber; data.lblkno = BufferGetBlockNumber(lbuf); data.rblkno = BufferGetBlockNumber(rbuf); data.separator = separator; data.nitem = maxoff; data.isData = TRUE; data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; data.isRootSplit = FALSE; data.rightbound = oldbound; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = vector; rdata[1].len = MAXALIGN(maxoff * sizeofitem); rdata[1].next = NULL; return lpage; }
/* * Start* functions setup state of searches: find correct buffer and locks it, * Stop* functions unlock buffer (but don't release!) */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; bool needUnlock = TRUE; if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; return; } /* * We should find entry, and begin scan of posting tree * or just store posting list in memory */ prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; entry->reduceResult = FALSE; entry->predictNumberResult = 0; if (btreeEntry.findItem(&btreeEntry, stackEntry)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); GinPostingTreeScan *gdi; Page page; LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); entry->buffer = scanBeginPostingTree(gdi); /* * We keep buffer pinned because we need to prevent deletition * page during scan. See GIN's vacuum implementation. RefCount * is increased to keep buffer pinned after freeGinBtreeStack() call. */ IncrBufferRefCount(entry->buffer); page = BufferGetPage(entry->buffer); entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; /* * Keep page content in memory to prevent durable page locking */ entry->list = (ItemPointerData *) palloc( BLCKSZ ); entry->nlist = GinPageGetOpaque(page)->maxoff; memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); freeGinBtreeStack(gdi->stack); pfree(gdi); entry->isFinished = FALSE; } else if (GinGetNPosting(itup) > 0) { entry->nlist = GinGetNPosting(itup); entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); entry->isFinished = FALSE; } } if (needUnlock) LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); }
/* * Gets next ItemPointer from PostingTree. Note, that we copy * page into GinScanEntry->list array and unlock page, but keep it pinned * to prevent interference with vacuum */ static void entryGetNextItem(Relation index, GinScanEntry entry) { Page page; BlockNumber blkno; for(;;) { entry->offset++; if (entry->offset <= entry->nlist) { entry->curItem = entry->list[entry->offset - 1]; return; } LockBuffer(entry->buffer, GIN_SHARE); page = BufferGetPage(entry->buffer); for(;;) { /* * It's needed to go by right link. During that we should refind * first ItemPointer greater that stored */ blkno = GinPageGetOpaque(page)->rightlink; LockBuffer(entry->buffer, GIN_UNLOCK); if (blkno == InvalidBlockNumber) { ReleaseBuffer(entry->buffer); ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); entry->buffer = InvalidBuffer; entry->isFinished = TRUE; return; } entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); LockBuffer(entry->buffer, GIN_SHARE); page = BufferGetPage(entry->buffer); entry->offset = InvalidOffsetNumber; if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset)) { /* * Found position equal to or greater than stored */ entry->nlist = GinPageGetOpaque(page)->maxoff; memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); if ( !ItemPointerIsValid(&entry->curItem) || compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 ) { /* * First pages are deleted or empty, or we found exact position, * so break inner loop and continue outer one. */ break; } /* * Find greater than entry->curItem position, store it. */ entry->curItem = entry->list[entry->offset - 1]; return; } } } }