/* * Inserts array of item pointers, may execute several tree scan (very rare) */ void ginInsertItemPointers(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { BlockNumber rootBlkno = gdi->stack->blkno; gdi->btree.items = items; gdi->btree.nitem = nitem; gdi->btree.curitem = 0; while (gdi->btree.curitem < gdi->btree.nitem) { if (!gdi->stack) gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno); gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack); if (gdi->btree.findItem(&(gdi->btree), gdi->stack)) { /* * gdi->btree.items[gdi->btree.curitem] already exists in index */ gdi->btree.curitem++; LockBuffer(gdi->stack->buffer, GIN_UNLOCK); freeGinBtreeStack(gdi->stack); } else ginInsertValue(&(gdi->btree), gdi->stack, buildStats); gdi->stack = NULL; } }
/* * Inserts array of item pointers, may execute several tree scan (very rare) */ void ginInsertItemPointers(Relation index, BlockNumber rootBlkno, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { GinBtreeData btree; GinBtreeDataLeafInsertData insertdata; GinBtreeStack *stack; ginPrepareDataScan(&btree, index, rootBlkno); btree.isBuild = (buildStats != NULL); insertdata.items = items; insertdata.nitem = nitem; insertdata.curitem = 0; while (insertdata.curitem < insertdata.nitem) { /* search for the leaf page where the first item should go to */ btree.itemptr = insertdata.items[insertdata.curitem]; stack = ginFindLeafPage(&btree, false); if (btree.findItem(&btree, stack)) { /* * Current item already exists in index. */ insertdata.curitem++; LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); } else ginInsertValue(&btree, stack, &insertdata, buildStats); } }
/* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. */ static void ginEntryInsert(Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) { GinBtreeData btree; GinBtreeStack *stack; IndexTuple itup; Page page; prepareEntryScan(&btree, index, value, ginstate); stack = ginFindLeafPage(&btree, NULL); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { /* found entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { /* lock root of posting tree */ GinPostingTreeScan *gdi; BlockNumber rootPostingTree = GinGetPostingTree(itup); /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ gdi = prepareScanPostingTree(index, rootPostingTree, FALSE); gdi->btree.isBuild = isBuild; insertItemPointer(gdi, items, nitem); return; } itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild); btree.isDelete = TRUE; } else { /* We suppose, that tuple can store at list one itempointer */ itup = GinFormTuple(ginstate, value, items, 1); if (itup == NULL || IndexTupleSize(itup) >= GinMaxItemSize) elog(ERROR, "huge tuple"); if (nitem > 1) { IndexTuple previtup = itup; itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild); pfree(previtup); } } btree.entry = itup; ginInsertValue(&btree, stack); pfree(itup); }
/* * Insert one or more heap TIDs associated with the given key value. * This will either add a single key entry, or enlarge a pre-existing entry. * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. */ void ginEntryInsert(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { GinBtreeData btree; GinBtreeStack *stack; IndexTuple itup; Page page; /* During index build, count the to-be-inserted entry */ if (buildStats) buildStats->nEntries++; ginPrepareEntryScan(&btree, attnum, key, category, ginstate); stack = ginFindLeafPage(&btree, GIN_ROOT_BLKNO, false); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { /* found pre-existing entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { /* add entries to existing posting tree */ BlockNumber rootPostingTree = GinGetPostingTree(itup); /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ ginInsertItemPointers(ginstate->index, rootPostingTree, items, nitem, buildStats); return; } /* modify an existing leaf entry */ itup = addItemPointersToLeafTuple(ginstate, itup, items, nitem, buildStats); btree.isDelete = TRUE; } else { /* no match, so construct a new leaf entry */ itup = buildFreshLeafTuple(ginstate, attnum, key, category, items, nitem, buildStats); } /* Insert the new or modified leaf tuple */ btree.entry = itup; ginInsertValue(&btree, stack, buildStats); pfree(itup); }
/* * Insert a value to tree described by stack. * * The value to be inserted is given in 'insertdata'. Its format depends * on whether this is an entry or data tree, ginInsertValue just passes it * through to the tree-specific callback function. * * During an index build, buildStats is non-null and the counters it contains * are incremented as needed. * * NB: the passed-in stack is freed, as though by freeGinBtreeStack. */ void ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, GinStatsData *buildStats) { bool done; /* If the leaf page was incompletely split, finish the split first */ if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer))) ginFinishSplit(btree, stack, false, buildStats); done = ginPlaceToPage(btree, stack, insertdata, InvalidBlockNumber, InvalidBuffer, buildStats); if (done) { LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); } else ginFinishSplit(btree, stack, true, buildStats); }
/* * Insert value (stored in GinBtree) to tree described by stack * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. * * NB: the passed-in stack is freed, as though by freeGinBtreeStack. */ void ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) { GinBtreeStack *parent = stack; BlockNumber rootBlkno = InvalidBuffer; Page page, rpage, lpage; /* remember root BlockNumber */ while (parent) { rootBlkno = parent->blkno; parent = parent->parent; } while (stack) { XLogRecData *rdata; BlockNumber savedRightLink; page = BufferGetPage(stack->buffer); savedRightLink = GinPageGetOpaque(page)->rightlink; if (btree->isEnoughSpace(btree, stack->buffer, stack->off)) { START_CRIT_SECTION(); btree->placeToPage(btree, stack->buffer, stack->off, &rdata); MarkBufferDirty(stack->buffer); if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } LockBuffer(stack->buffer, GIN_UNLOCK); END_CRIT_SECTION(); freeGinBtreeStack(stack); return; } else { Buffer rbuffer = GinNewBuffer(btree->index); Page newlpage; /* * newlpage is a pointer to memory page, it doesn't associate with * buffer, stack->buffer should be untouched */ newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata); ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno; /* During index build, count the newly-split page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } parent = stack->parent; if (parent == NULL) { /* * split root, so we need to allocate new left page and place * pointer on root to left and right page */ Buffer lbuffer = GinNewBuffer(btree->index); ((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE; ((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber; page = BufferGetPage(stack->buffer); lpage = BufferGetPage(lbuffer); rpage = BufferGetPage(rbuffer); GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); ((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer); START_CRIT_SECTION(); GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF); PageRestoreTempPage(newlpage, lpage); btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer); MarkBufferDirty(rbuffer); MarkBufferDirty(lbuffer); MarkBufferDirty(stack->buffer); if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); PageSetLSN(lpage, recptr); PageSetTLI(lpage, ThisTimeLineID); PageSetLSN(rpage, recptr); PageSetTLI(rpage, ThisTimeLineID); } UnlockReleaseBuffer(rbuffer); UnlockReleaseBuffer(lbuffer); LockBuffer(stack->buffer, GIN_UNLOCK); END_CRIT_SECTION(); freeGinBtreeStack(stack); /* During index build, count the newly-added root page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } return; } else { /* split non-root page */ ((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE; ((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink; lpage = BufferGetPage(stack->buffer); rpage = BufferGetPage(rbuffer); GinPageGetOpaque(rpage)->rightlink = savedRightLink; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); START_CRIT_SECTION(); PageRestoreTempPage(newlpage, lpage); MarkBufferDirty(rbuffer); MarkBufferDirty(stack->buffer); if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); PageSetLSN(lpage, recptr); PageSetTLI(lpage, ThisTimeLineID); PageSetLSN(rpage, recptr); PageSetTLI(rpage, ThisTimeLineID); } UnlockReleaseBuffer(rbuffer); END_CRIT_SECTION(); } } btree->isDelete = FALSE; /* search parent to lock */ LockBuffer(parent->buffer, GIN_EXCLUSIVE); /* move right if it's needed */ page = BufferGetPage(parent->buffer); while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber) { BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; LockBuffer(parent->buffer, GIN_UNLOCK); if (rightlink == InvalidBlockNumber) { /* * rightmost page, but we don't find parent, we should use * plain search... */ ginFindParents(btree, stack, rootBlkno); parent = stack->parent; page = BufferGetPage(parent->buffer); break; } parent->blkno = rightlink; parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno); LockBuffer(parent->buffer, GIN_EXCLUSIVE); page = BufferGetPage(parent->buffer); } UnlockReleaseBuffer(stack->buffer); pfree(stack); stack = parent; } }
/* * Finish a split by inserting the downlink for the new page to parent. * * On entry, stack->buffer is exclusively locked. * * If freestack is true, all the buffers are released and unlocked as we * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept * locked, and stack is unmodified, except for possibly moving right to find * the correct parent of page. */ static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, GinStatsData *buildStats) { Page page; bool done; bool first = true; /* * freestack == false when we encounter an incompletely split page during a * scan, while freestack == true is used in the normal scenario that a * split is finished right after the initial insert. */ if (!freestack) elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"", stack->blkno, RelationGetRelationName(btree->index)); /* this loop crawls up the stack until the insertion is complete */ do { GinBtreeStack *parent = stack->parent; void *insertdata; BlockNumber updateblkno; /* search parent to lock */ LockBuffer(parent->buffer, GIN_EXCLUSIVE); /* * If the parent page was incompletely split, finish that split first, * then continue with the current one. * * Note: we have to finish *all* incomplete splits we encounter, even * if we have to move right. Otherwise we might choose as the target * a page that has no downlink in the parent, and splitting it further * would fail. */ if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer))) ginFinishSplit(btree, parent, false, buildStats); /* move right if it's needed */ page = BufferGetPage(parent->buffer); while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber) { if (GinPageRightMost(page)) { /* * rightmost page, but we don't find parent, we should use * plain search... */ LockBuffer(parent->buffer, GIN_UNLOCK); ginFindParents(btree, stack); parent = stack->parent; Assert(parent != NULL); break; } parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE); parent->blkno = BufferGetBlockNumber(parent->buffer); page = BufferGetPage(parent->buffer); if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer))) ginFinishSplit(btree, parent, false, buildStats); } /* insert the downlink */ insertdata = btree->prepareDownlink(btree, stack->buffer); updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink; done = ginPlaceToPage(btree, parent, insertdata, updateblkno, stack->buffer, buildStats); pfree(insertdata); /* * If the caller requested to free the stack, unlock and release the * child buffer now. Otherwise keep it pinned and locked, but if we * have to recurse up the tree, we can unlock the upper pages, only * keeping the page at the bottom of the stack locked. */ if (!first || freestack) LockBuffer(stack->buffer, GIN_UNLOCK); if (freestack) { ReleaseBuffer(stack->buffer); pfree(stack); } stack = parent; first = false; } while (!done); /* unlock the parent */ LockBuffer(stack->buffer, GIN_UNLOCK); if (freestack) freeGinBtreeStack(stack); }
/* * Start* functions setup state of searches: find correct buffer and locks it, * Stop* functions unlock buffer (but don't release!) */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; bool needUnlock = TRUE; if (entry->master != NULL) { entry->isFinished = entry->master->isFinished; return; } /* * We should find entry, and begin scan of posting tree * or just store posting list in memory */ prepareEntryScan(&btreeEntry, index, entry->entry, ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; entry->reduceResult = FALSE; entry->predictNumberResult = 0; if (btreeEntry.findItem(&btreeEntry, stackEntry)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); GinPostingTreeScan *gdi; Page page; LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); entry->buffer = scanBeginPostingTree(gdi); /* * We keep buffer pinned because we need to prevent deletition * page during scan. See GIN's vacuum implementation. RefCount * is increased to keep buffer pinned after freeGinBtreeStack() call. */ IncrBufferRefCount(entry->buffer); page = BufferGetPage(entry->buffer); entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff; /* * Keep page content in memory to prevent durable page locking */ entry->list = (ItemPointerData *) palloc( BLCKSZ ); entry->nlist = GinPageGetOpaque(page)->maxoff; memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); freeGinBtreeStack(gdi->stack); pfree(gdi); entry->isFinished = FALSE; } else if (GinGetNPosting(itup) > 0) { entry->nlist = GinGetNPosting(itup); entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist); memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist); entry->isFinished = FALSE; } } if (needUnlock) LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); }
/* * Inserts only one entry to the index, but it can add more than 1 ItemPointer. * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. */ void ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { GinBtreeData btree; GinBtreeStack *stack; IndexTuple itup; Page page; /* During index build, count the to-be-inserted entry */ if (buildStats) buildStats->nEntries++; ginPrepareEntryScan(&btree, index, attnum, value, ginstate); stack = ginFindLeafPage(&btree, NULL); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { /* found entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { /* lock root of posting tree */ GinPostingTreeScan *gdi; BlockNumber rootPostingTree = GinGetPostingTree(itup); /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ gdi = ginPrepareScanPostingTree(index, rootPostingTree, FALSE); gdi->btree.isBuild = (buildStats != NULL); ginInsertItemPointer(gdi, items, nitem, buildStats); pfree(gdi); return; } itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, buildStats); btree.isDelete = TRUE; } else { /* We suppose that tuple can store at least one itempointer */ itup = GinFormTuple(index, ginstate, attnum, value, items, 1, true); if (nitem > 1) { /* Add the rest, making a posting tree if necessary */ IndexTuple previtup = itup; itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, buildStats); pfree(previtup); } } btree.entry = itup; ginInsertValue(&btree, stack, buildStats); pfree(itup); }