/* * Creates posting tree with one page. Function * suppose that items[] fits to page */ static BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems) { BlockNumber blkno; Buffer buffer = GinNewBuffer(index); Page page; START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF); page = BufferGetPage(buffer); blkno = BufferGetBlockNumber(buffer); memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems); GinPageGetOpaque(page)->maxoff = nitems; MarkBufferDirty(buffer); if (!index->rd_istemp) { XLogRecPtr recptr; XLogRecData rdata[2]; ginxlogCreatePostingTree data; data.node = index->rd_node; data.blkno = blkno; data.nitem = nitems; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogCreatePostingTree); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) items; rdata[1].len = sizeof(ItemPointerData) * nitems; rdata[1].next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); return blkno; }
IndexBuildResult * ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) { IndexBuildResult *result; double reltuples; GinBuildState buildstate; Buffer RootBuffer, MetaBuffer; ItemPointerData *list; Datum key; GinNullCategory category; uint32 nlist; MemoryContext oldCtx; OffsetNumber attnum; if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); initGinState(&buildstate.ginstate, index); buildstate.indtuples = 0; memset(&buildstate.buildStats, 0, sizeof(GinStatsData)); /* initialize the meta page */ MetaBuffer = GinNewBuffer(index); /* initialize the root page */ RootBuffer = GinNewBuffer(index); START_CRIT_SECTION(); GinInitMetabuffer(MetaBuffer); MarkBufferDirty(MetaBuffer); GinInitBuffer(RootBuffer, GIN_LEAF); MarkBufferDirty(RootBuffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; Page page; XLogBeginInsert(); XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX); page = BufferGetPage(RootBuffer); PageSetLSN(page, recptr); page = BufferGetPage(MetaBuffer); PageSetLSN(page, recptr); } UnlockReleaseBuffer(MetaBuffer); UnlockReleaseBuffer(RootBuffer); END_CRIT_SECTION(); /* count the root as first entry page */ buildstate.buildStats.nEntryPages++; /* * create a temporary memory context that is used to hold data not yet * dumped out to the index */ buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "Gin build temporary context", ALLOCSET_DEFAULT_SIZES); /* * create a temporary memory context that is used for calling * ginExtractEntries(), and can be reset after each tuple */ buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext, "Gin build temporary context for user-defined function", ALLOCSET_DEFAULT_SIZES); buildstate.accum.ginstate = &buildstate.ginstate; ginInitBA(&buildstate.accum); /* * Do the heap scan. We disallow sync scan here because dataPlaceToPage * prefers to receive tuples in TID order. */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, false, ginBuildCallback, (void *) &buildstate, NULL); /* dump remaining entries to the index */ oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); ginBeginBAScan(&buildstate.accum); while ((list = ginGetBAEntry(&buildstate.accum, &attnum, &key, &category, &nlist)) != NULL) { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); ginEntryInsert(&buildstate.ginstate, attnum, key, category, list, nlist, &buildstate.buildStats); } MemoryContextSwitchTo(oldCtx); MemoryContextDelete(buildstate.funcCtx); MemoryContextDelete(buildstate.tmpCtx); /* * Update metapage stats */ buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index); ginUpdateStats(index, &buildstate.buildStats); /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; return result; }
Datum ginbuild(PG_FUNCTION_ARGS) { Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); IndexBuildResult *result; double reltuples; GinBuildState buildstate; Buffer RootBuffer, MetaBuffer; ItemPointerData *list; Datum key; GinNullCategory category; uint32 nlist; MemoryContext oldCtx; OffsetNumber attnum; if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); initGinState(&buildstate.ginstate, index); buildstate.indtuples = 0; memset(&buildstate.buildStats, 0, sizeof(GinStatsData)); /* initialize the meta page */ MetaBuffer = GinNewBuffer(index); /* initialize the root page */ RootBuffer = GinNewBuffer(index); START_CRIT_SECTION(); GinInitMetabuffer(MetaBuffer); MarkBufferDirty(MetaBuffer); GinInitBuffer(RootBuffer, GIN_LEAF); MarkBufferDirty(RootBuffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; XLogRecData rdata; Page page; rdata.buffer = InvalidBuffer; rdata.data = (char *) &(index->rd_node); rdata.len = sizeof(RelFileNode); rdata.next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata); page = BufferGetPage(RootBuffer); PageSetLSN(page, recptr); page = BufferGetPage(MetaBuffer); PageSetLSN(page, recptr); } UnlockReleaseBuffer(MetaBuffer); UnlockReleaseBuffer(RootBuffer); END_CRIT_SECTION(); /* count the root as first entry page */ buildstate.buildStats.nEntryPages++; /* * create a temporary memory context that is reset once for each tuple * inserted into the index */ buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "Gin build temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx, "Gin build temporary context for user-defined function", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); buildstate.accum.ginstate = &buildstate.ginstate; ginInitBA(&buildstate.accum); /* * Do the heap scan. We disallow sync scan here because dataPlaceToPage * prefers to receive tuples in TID order. */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, false, ginBuildCallback, (void *) &buildstate); /* dump remaining entries to the index */ oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); ginBeginBAScan(&buildstate.accum); while ((list = ginGetBAEntry(&buildstate.accum, &attnum, &key, &category, &nlist)) != NULL) { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); ginEntryInsert(&buildstate.ginstate, attnum, key, category, list, nlist, &buildstate.buildStats); } MemoryContextSwitchTo(oldCtx); MemoryContextDelete(buildstate.tmpCtx); /* * Update metapage stats */ buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index); ginUpdateStats(index, &buildstate.buildStats); /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; PG_RETURN_POINTER(result); }
/* * Insert value (stored in GinBtree) to tree described by stack * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. * * NB: the passed-in stack is freed, as though by freeGinBtreeStack. */ void ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats) { GinBtreeStack *parent = stack; BlockNumber rootBlkno = InvalidBuffer; Page page, rpage, lpage; /* remember root BlockNumber */ while (parent) { rootBlkno = parent->blkno; parent = parent->parent; } while (stack) { XLogRecData *rdata; BlockNumber savedRightLink; page = BufferGetPage(stack->buffer); savedRightLink = GinPageGetOpaque(page)->rightlink; if (btree->isEnoughSpace(btree, stack->buffer, stack->off)) { START_CRIT_SECTION(); btree->placeToPage(btree, stack->buffer, stack->off, &rdata); MarkBufferDirty(stack->buffer); if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } LockBuffer(stack->buffer, GIN_UNLOCK); END_CRIT_SECTION(); freeGinBtreeStack(stack); return; } else { Buffer rbuffer = GinNewBuffer(btree->index); Page newlpage; /* * newlpage is a pointer to memory page, it doesn't associate with * buffer, stack->buffer should be untouched */ newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata); ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno; /* During index build, count the newly-split page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } parent = stack->parent; if (parent == NULL) { /* * split root, so we need to allocate new left page and place * pointer on root to left and right page */ Buffer lbuffer = GinNewBuffer(btree->index); ((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE; ((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber; page = BufferGetPage(stack->buffer); lpage = BufferGetPage(lbuffer); rpage = BufferGetPage(rbuffer); GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); ((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer); START_CRIT_SECTION(); GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF); PageRestoreTempPage(newlpage, lpage); btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer); MarkBufferDirty(rbuffer); MarkBufferDirty(lbuffer); MarkBufferDirty(stack->buffer); if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); PageSetLSN(lpage, recptr); PageSetTLI(lpage, ThisTimeLineID); PageSetLSN(rpage, recptr); PageSetTLI(rpage, ThisTimeLineID); } UnlockReleaseBuffer(rbuffer); UnlockReleaseBuffer(lbuffer); LockBuffer(stack->buffer, GIN_UNLOCK); END_CRIT_SECTION(); freeGinBtreeStack(stack); /* During index build, count the newly-added root page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } return; } else { /* split non-root page */ ((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE; ((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink; lpage = BufferGetPage(stack->buffer); rpage = BufferGetPage(rbuffer); GinPageGetOpaque(rpage)->rightlink = savedRightLink; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); START_CRIT_SECTION(); PageRestoreTempPage(newlpage, lpage); MarkBufferDirty(rbuffer); MarkBufferDirty(stack->buffer); if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); PageSetLSN(lpage, recptr); PageSetTLI(lpage, ThisTimeLineID); PageSetLSN(rpage, recptr); PageSetTLI(rpage, ThisTimeLineID); } UnlockReleaseBuffer(rbuffer); END_CRIT_SECTION(); } } btree->isDelete = FALSE; /* search parent to lock */ LockBuffer(parent->buffer, GIN_EXCLUSIVE); /* move right if it's needed */ page = BufferGetPage(parent->buffer); while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber) { BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; LockBuffer(parent->buffer, GIN_UNLOCK); if (rightlink == InvalidBlockNumber) { /* * rightmost page, but we don't find parent, we should use * plain search... */ ginFindParents(btree, stack, rootBlkno); parent = stack->parent; page = BufferGetPage(parent->buffer); break; } parent->blkno = rightlink; parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno); LockBuffer(parent->buffer, GIN_EXCLUSIVE); page = BufferGetPage(parent->buffer); } UnlockReleaseBuffer(stack->buffer); pfree(stack); stack = parent; } }
/* * Insert a new item to a page. * * Returns true if the insertion was finished. On false, the page was split and * the parent needs to be updated. (a root split returns true as it doesn't * need any further action by the caller to complete) * * When inserting a downlink to a internal page, 'childbuf' contains the * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared * atomically with the insert. Also, the existing item at the given location * is updated to point to 'updateblkno'. * * stack->buffer is locked on entry, and is kept locked. */ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, void *insertdata, BlockNumber updateblkno, Buffer childbuf, GinStatsData *buildStats) { Page page = BufferGetPage(stack->buffer); XLogRecData *payloadrdata; bool fit; uint16 xlflags = 0; Page childpage = NULL; if (GinPageIsData(page)) xlflags |= GIN_INSERT_ISDATA; if (GinPageIsLeaf(page)) { xlflags |= GIN_INSERT_ISLEAF; Assert(!BufferIsValid(childbuf)); Assert(updateblkno == InvalidBlockNumber); } else { Assert(BufferIsValid(childbuf)); Assert(updateblkno != InvalidBlockNumber); childpage = BufferGetPage(childbuf); } /* * Try to put the incoming tuple on the page. If it doesn't fit, * placeToPage method will return false and leave the page unmodified, and * we'll have to split the page. */ START_CRIT_SECTION(); fit = btree->placeToPage(btree, stack->buffer, stack->off, insertdata, updateblkno, &payloadrdata); if (fit) { MarkBufferDirty(stack->buffer); /* An insert to an internal page finishes the split of the child. */ if (childbuf != InvalidBuffer) { GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; MarkBufferDirty(childbuf); } if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; XLogRecData rdata[3]; ginxlogInsert xlrec; BlockIdData childblknos[2]; xlrec.node = btree->index->rd_node; xlrec.blkno = BufferGetBlockNumber(stack->buffer); xlrec.offset = stack->off; xlrec.flags = xlflags; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(ginxlogInsert); /* * Log information about child if this was an insertion of a * downlink. */ if (childbuf != InvalidBuffer) { rdata[0].next = &rdata[1]; BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf)); BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink); rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) childblknos; rdata[1].len = sizeof(BlockIdData) * 2; rdata[1].next = &rdata[2]; rdata[2].buffer = childbuf; rdata[2].buffer_std = false; rdata[2].data = NULL; rdata[2].len = 0; rdata[2].next = payloadrdata; } else rdata[0].next = payloadrdata; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); PageSetLSN(page, recptr); if (childbuf != InvalidBuffer) PageSetLSN(childpage, recptr); } END_CRIT_SECTION(); return true; } else { /* Didn't fit, have to split */ Buffer rbuffer; Page newlpage; BlockNumber savedRightLink; Page rpage; XLogRecData rdata[2]; ginxlogSplit data; Buffer lbuffer = InvalidBuffer; Page newrootpg = NULL; END_CRIT_SECTION(); rbuffer = GinNewBuffer(btree->index); /* During index build, count the new page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } savedRightLink = GinPageGetOpaque(page)->rightlink; /* * newlpage is a pointer to memory page, it is not associated with a * buffer. stack->buffer is not touched yet. */ newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, insertdata, updateblkno, &payloadrdata); data.node = btree->index->rd_node; data.rblkno = BufferGetBlockNumber(rbuffer); data.flags = xlflags; if (childbuf != InvalidBuffer) { Page childpage = BufferGetPage(childbuf); GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; data.leftChildBlkno = BufferGetBlockNumber(childbuf); data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink; } else data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); if (childbuf != InvalidBuffer) { rdata[0].next = &rdata[1]; rdata[1].buffer = childbuf; rdata[1].buffer_std = false; rdata[1].data = NULL; rdata[1].len = 0; rdata[1].next = payloadrdata; } else rdata[0].next = payloadrdata; rpage = BufferGetPage(rbuffer); if (stack->parent == NULL) { /* * split root, so we need to allocate new left page and place * pointer on root to left and right page */ lbuffer = GinNewBuffer(btree->index); /* During index build, count the newly-added root page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } /* * root never has a right-link, so we borrow the rrlink field to * store the root block number. */ data.rrlink = BufferGetBlockNumber(stack->buffer); data.lblkno = BufferGetBlockNumber(lbuffer); data.flags |= GIN_SPLIT_ROOT; GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); /* * Construct a new root page containing downlinks to the new left * and right pages. (do this in a temporary copy first rather * than overwriting the original page directly, so that we can still * abort gracefully if this fails.) */ newrootpg = PageGetTempPage(rpage); GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF, BLCKSZ); btree->fillRoot(btree, newrootpg, BufferGetBlockNumber(lbuffer), newlpage, BufferGetBlockNumber(rbuffer), rpage); } else { /* split non-root page */ data.rrlink = savedRightLink; data.lblkno = BufferGetBlockNumber(stack->buffer); GinPageGetOpaque(rpage)->rightlink = savedRightLink; GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); } /* * Ok, we have the new contents of the left page in a temporary copy * now (newlpage), and the newly-allocated right block has been filled * in. The original page is still unchanged. * * If this is a root split, we also have a temporary page containing * the new contents of the root. Copy the new left page to a * newly-allocated block, and initialize the (original) root page the * new copy. Otherwise, copy over the temporary copy of the new left * page over the old left page. */ START_CRIT_SECTION(); MarkBufferDirty(rbuffer); if (stack->parent == NULL) { PageRestoreTempPage(newlpage, BufferGetPage(lbuffer)); MarkBufferDirty(lbuffer); newlpage = newrootpg; } PageRestoreTempPage(newlpage, BufferGetPage(stack->buffer)); MarkBufferDirty(stack->buffer); /* write WAL record */ if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); PageSetLSN(BufferGetPage(stack->buffer), recptr); PageSetLSN(rpage, recptr); if (stack->parent == NULL) PageSetLSN(BufferGetPage(lbuffer), recptr); } END_CRIT_SECTION(); /* * We can release the lock on the right page now, but keep the * original buffer locked. */ UnlockReleaseBuffer(rbuffer); if (stack->parent == NULL) UnlockReleaseBuffer(lbuffer); /* * If we split the root, we're done. Otherwise the split is not * complete until the downlink for the new page has been inserted to * the parent. */ if (stack->parent == NULL) return true; else return false; } }
static void makeSublist(Relation index, IndexTuple *tuples, int32 ntuples, GinMetaPageData *res) { Buffer curBuffer = InvalidBuffer; Buffer prevBuffer = InvalidBuffer; int i, size = 0, tupsize; int startTuple = 0; Assert(ntuples > 0); /* * Split tuples into pages */ for (i = 0; i < ntuples; i++) { if (curBuffer == InvalidBuffer) { curBuffer = GinNewBuffer(index); if (prevBuffer != InvalidBuffer) { res->nPendingPages++; writeListPage(index, prevBuffer, tuples + startTuple, i - startTuple, BufferGetBlockNumber(curBuffer)); } else { res->head = BufferGetBlockNumber(curBuffer); } prevBuffer = curBuffer; startTuple = i; size = 0; } tupsize = MAXALIGN(IndexTupleSize(tuples[i])) + sizeof(ItemIdData); if (size + tupsize > GinListPageSize) { /* won't fit, force a new page and reprocess */ i--; curBuffer = InvalidBuffer; } else { size += tupsize; } } /* * Write last page */ res->tail = BufferGetBlockNumber(curBuffer); res->tailFreeSize = writeListPage(index, curBuffer, tuples + startTuple, ntuples - startTuple, InvalidBlockNumber); res->nPendingPages++; /* that was only one heap tuple */ res->nPendingHeapTuples = 1; }
/* * Insert a new item to a page. * * Returns true if the insertion was finished. On false, the page was split and * the parent needs to be updated. (A root split returns true as it doesn't * need any further action by the caller to complete.) * * When inserting a downlink to an internal page, 'childbuf' contains the * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared * atomically with the insert. Also, the existing item at offset stack->off * in the target page is updated to point to updateblkno. * * stack->buffer is locked on entry, and is kept locked. * Likewise for childbuf, if given. */ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, void *insertdata, BlockNumber updateblkno, Buffer childbuf, GinStatsData *buildStats) { Page page = BufferGetPage(stack->buffer); bool result; GinPlaceToPageRC rc; uint16 xlflags = 0; Page childpage = NULL; Page newlpage = NULL, newrpage = NULL; void *ptp_workspace = NULL; XLogRecData payloadrdata[10]; MemoryContext tmpCxt; MemoryContext oldCxt; /* * We do all the work of this function and its subfunctions in a temporary * memory context. This avoids leakages and simplifies APIs, since some * subfunctions allocate storage that has to survive until we've finished * the WAL insertion. */ tmpCxt = AllocSetContextCreate(CurrentMemoryContext, "ginPlaceToPage temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldCxt = MemoryContextSwitchTo(tmpCxt); if (GinPageIsData(page)) xlflags |= GIN_INSERT_ISDATA; if (GinPageIsLeaf(page)) { xlflags |= GIN_INSERT_ISLEAF; Assert(!BufferIsValid(childbuf)); Assert(updateblkno == InvalidBlockNumber); } else { Assert(BufferIsValid(childbuf)); Assert(updateblkno != InvalidBlockNumber); childpage = BufferGetPage(childbuf); } /* * See if the incoming tuple will fit on the page. beginPlaceToPage will * decide if the page needs to be split, and will compute the split * contents if so. See comments for beginPlaceToPage and execPlaceToPage * functions for more details of the API here. */ rc = btree->beginPlaceToPage(btree, stack->buffer, stack, insertdata, updateblkno, &ptp_workspace, &newlpage, &newrpage, payloadrdata); if (rc == GPTP_NO_WORK) { /* Nothing to do */ result = true; } else if (rc == GPTP_INSERT) { /* It will fit, perform the insertion */ START_CRIT_SECTION(); /* Perform the page update, and set up WAL data about it */ btree->execPlaceToPage(btree, stack->buffer, stack, insertdata, updateblkno, ptp_workspace, payloadrdata); MarkBufferDirty(stack->buffer); /* An insert to an internal page finishes the split of the child. */ if (BufferIsValid(childbuf)) { GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; MarkBufferDirty(childbuf); } if (RelationNeedsWAL(btree->index)) { XLogRecPtr recptr; XLogRecData rdata[3]; ginxlogInsert xlrec; BlockIdData childblknos[2]; xlrec.node = btree->index->rd_node; xlrec.blkno = BufferGetBlockNumber(stack->buffer); xlrec.flags = xlflags; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(ginxlogInsert); /* * Log information about child if this was an insertion of a * downlink. */ if (BufferIsValid(childbuf)) { rdata[0].next = &rdata[1]; BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf)); BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink); rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) childblknos; rdata[1].len = sizeof(BlockIdData) * 2; rdata[1].next = &rdata[2]; rdata[2].buffer = childbuf; rdata[2].buffer_std = true; rdata[2].data = NULL; rdata[2].len = 0; rdata[2].next = payloadrdata; } else rdata[0].next = payloadrdata; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata); PageSetLSN(page, recptr); if (BufferIsValid(childbuf)) PageSetLSN(childpage, recptr); } END_CRIT_SECTION(); /* Insertion is complete. */ result = true; } else if (rc == GPTP_SPLIT) { /* * Didn't fit, need to split. The split has been computed in newlpage * and newrpage, which are pointers to palloc'd pages, not associated * with buffers. stack->buffer is not touched yet. */ Buffer rbuffer; BlockNumber savedRightLink; ginxlogSplit data; Buffer lbuffer = InvalidBuffer; Page newrootpg = NULL; /* Get a new index page to become the right page */ rbuffer = GinNewBuffer(btree->index); /* During index build, count the new page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } savedRightLink = GinPageGetOpaque(page)->rightlink; /* Begin setting up WAL record (which we might not use) */ data.node = btree->index->rd_node; data.rblkno = BufferGetBlockNumber(rbuffer); data.flags = xlflags; if (BufferIsValid(childbuf)) { data.leftChildBlkno = BufferGetBlockNumber(childbuf); data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink; } else data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber; if (stack->parent == NULL) { /* * splitting the root, so we need to allocate new left page and * place pointers to left and right page on root page. */ lbuffer = GinNewBuffer(btree->index); /* During index build, count the new left page */ if (buildStats) { if (btree->isData) buildStats->nDataPages++; else buildStats->nEntryPages++; } /* * root never has a right-link, so we borrow the rrlink field to * store the root block number. */ data.rrlink = BufferGetBlockNumber(stack->buffer); data.lblkno = BufferGetBlockNumber(lbuffer); data.flags |= GIN_SPLIT_ROOT; GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); /* * Construct a new root page containing downlinks to the new left * and right pages. (Do this in a temporary copy rather than * overwriting the original page directly, since we're not in the * critical section yet.) */ newrootpg = PageGetTempPage(newrpage); GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ); btree->fillRoot(btree, newrootpg, BufferGetBlockNumber(lbuffer), newlpage, BufferGetBlockNumber(rbuffer), newrpage); } else { /* splitting a non-root page */ data.rrlink = savedRightLink; data.lblkno = BufferGetBlockNumber(stack->buffer); GinPageGetOpaque(newrpage)->rightlink = savedRightLink; GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT; GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer); } /* * OK, we have the new contents of the left page in a temporary copy * now (newlpage), and likewise for the new contents of the * newly-allocated right block. The original page is still unchanged. * * If this is a root split, we also have a temporary page containing * the new contents of the root. */ START_CRIT_SECTION(); MarkBufferDirty(rbuffer); MarkBufferDirty(stack->buffer); /* * Restore the temporary copies over the real buffers. */ if (stack->parent == NULL) { /* Splitting the root, three pages to update */ MarkBufferDirty(lbuffer); memcpy(page, newrootpg, BLCKSZ); memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ); memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ); } else { /* Normal split, only two pages to update */ memcpy(page, newlpage, BLCKSZ); memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ); } /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */ if (BufferIsValid(childbuf)) { GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; MarkBufferDirty(childbuf); } /* write WAL record */ if (RelationNeedsWAL(btree->index)) { XLogRecData rdata[2]; XLogRecPtr recptr; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogSplit); if (BufferIsValid(childbuf)) { rdata[0].next = &rdata[1]; rdata[1].buffer = childbuf; rdata[1].buffer_std = true; rdata[1].data = NULL; rdata[1].len = 0; rdata[1].next = payloadrdata; } else rdata[0].next = payloadrdata; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata); PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(rbuffer), recptr); if (stack->parent == NULL) PageSetLSN(BufferGetPage(lbuffer), recptr); if (BufferIsValid(childbuf)) PageSetLSN(childpage, recptr); } END_CRIT_SECTION(); /* * We can release the locks/pins on the new pages now, but keep * stack->buffer locked. childbuf doesn't get unlocked either. */ UnlockReleaseBuffer(rbuffer); if (stack->parent == NULL) UnlockReleaseBuffer(lbuffer); /* * If we split the root, we're done. Otherwise the split is not * complete until the downlink for the new page has been inserted to * the parent. */ result = (stack->parent == NULL); } else { elog(ERROR, "invalid return code from GIN placeToPage method: %d", rc); result = false; /* keep compiler quiet */ } /* Clean up temp context */ MemoryContextSwitchTo(oldCxt); MemoryContextDelete(tmpCxt); return result; }
/* * Creates new posting tree containing the given TIDs. Returns the page * number of the root of the new posting tree. * * items[] must be in sorted order with no duplicates. */ BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, GinStatsData *buildStats) { BlockNumber blkno; Buffer buffer; Page page; int nrootitems; /* Calculate how many TIDs will fit on first page. */ nrootitems = Min(nitems, GinMaxLeafDataItems); /* * Create the root page. */ buffer = GinNewBuffer(index); page = BufferGetPage(buffer); blkno = BufferGetBlockNumber(buffer); START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF); memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nrootitems); GinPageGetOpaque(page)->maxoff = nrootitems; MarkBufferDirty(buffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; XLogRecData rdata[2]; ginxlogCreatePostingTree data; data.node = index->rd_node; data.blkno = blkno; data.nitem = nrootitems; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogCreatePostingTree); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) items; rdata[1].len = sizeof(ItemPointerData) * nrootitems; rdata[1].next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); PageSetLSN(page, recptr); } UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); /* During index build, count the newly-added data page */ if (buildStats) buildStats->nDataPages++; /* * Add any remaining TIDs to the newly-created posting tree. */ if (nitems > nrootitems) { ginInsertItemPointers(index, blkno, items + nrootitems, nitems - nrootitems, buildStats); } return blkno; }
Datum ginbuild(PG_FUNCTION_ARGS) { Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); IndexBuildResult *result; double reltuples; GinBuildState buildstate; Buffer buffer; ItemPointerData *list; Datum entry; uint32 nlist; MemoryContext oldCtx; if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); initGinState(&buildstate.ginstate, index); /* initialize the root page */ buffer = GinNewBuffer(index); START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_LEAF); MarkBufferDirty(buffer); if (!index->rd_istemp) { XLogRecPtr recptr; XLogRecData rdata; Page page; rdata.buffer = InvalidBuffer; rdata.data = (char *) &(index->rd_node); rdata.len = sizeof(RelFileNode); rdata.next = NULL; page = BufferGetPage(buffer); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); /* build the index */ buildstate.indtuples = 0; /* * create a temporary memory context that is reset once for each tuple * inserted into the index */ buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "Gin build temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx, "Gin build temporary context for user-defined function", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); buildstate.accum.ginstate = &buildstate.ginstate; ginInitBA(&buildstate.accum); /* * Do the heap scan. We disallow sync scan here because dataPlaceToPage * prefers to receive tuples in TID order. */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, false, ginBuildCallback, (void *) &buildstate); oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); while ((list = ginGetEntry(&buildstate.accum, &entry, &nlist)) != NULL) ginEntryInsert(index, &buildstate.ginstate, entry, list, nlist, TRUE); MemoryContextSwitchTo(oldCtx); MemoryContextDelete(buildstate.tmpCtx); /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; PG_RETURN_POINTER(result); }