static void spgRedoCreateIndex(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; Page page; buffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO); page = (Page) BufferGetPage(buffer); SpGistInitMetapage(page); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogInitBufferForRedo(record, 1); Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO); SpGistInitBuffer(buffer, SPGIST_LEAF); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogInitBufferForRedo(record, 2); Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO); SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); Buffer buffer; Page page; /* Backup blocks are not used in create_index records */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); SpGistInitMetapage(page); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogReadBuffer(*node, SPGIST_ROOT_BLKNO, true); Assert(BufferIsValid(buffer)); SpGistInitBuffer(buffer, SPGIST_LEAF); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogReadBuffer(*node, SPGIST_NULL_BLKNO, true); Assert(BufferIsValid(buffer)); SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); Buffer buffer; Page page; buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); SpGistInitMetapage(page); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogReadBuffer(*node, SPGIST_HEAD_BLKNO, true); Assert(BufferIsValid(buffer)); SpGistInitBuffer(buffer, SPGIST_LEAF); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
/* * Allocate and initialize a new buffer of the type and parity specified by * flags. The returned buffer is already pinned and exclusive-locked. * * When requesting an inner page, if we get one with the wrong parity, * we just release the buffer and try again. We will get a different page * because GetFreeIndexPage will have marked the page used in FSM. The page * is entered in our local lastUsedPages cache, so there's some hope of * making use of it later in this session, but otherwise we rely on VACUUM * to eventually re-enter the page in FSM, making it available for recycling. * Note that such a page does not get marked dirty here, so unless it's used * fairly soon, the buffer will just get discarded and the page will remain * as it was on disk. * * When we return a buffer to the caller, the page is *not* entered into * the lastUsedPages cache; we expect the caller will do so after it's taken * whatever space it will use. This is because after the caller has used up * some space, the page might have less space than whatever was cached already * so we'd rather not trash the old cache entry. */ static Buffer allocNewBuffer(Relation index, int flags) { SpGistCache *cache = spgGetCache(index); uint16 pageflags = 0; if (GBUF_REQ_LEAF(flags)) pageflags |= SPGIST_LEAF; if (GBUF_REQ_NULLS(flags)) pageflags |= SPGIST_NULLS; for (;;) { Buffer buffer; buffer = SpGistNewBuffer(index); SpGistInitBuffer(buffer, pageflags); if (pageflags & SPGIST_LEAF) { /* Leaf pages have no parity concerns, so just use it */ return buffer; } else { BlockNumber blkno = BufferGetBlockNumber(buffer); int blkFlags = GBUF_INNER_PARITY(blkno); if ((flags & GBUF_PARITY_MASK) == blkFlags) { /* Page has right parity, use it */ return buffer; } else { /* Page has wrong parity, record it in cache and try again */ if (pageflags & SPGIST_NULLS) blkFlags |= GBUF_NULLS; cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno; cache->lastUsedPages.cachedPage[blkFlags].freeSpace = PageGetExactFreeSpace(BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); } } } }
/* * Allocate and initialize a new buffer of the type and parity specified by * flags. The returned buffer is already pinned and exclusive-locked. * * When requesting an inner page, if we get one with the wrong parity, * we just release the buffer and try again. We will get a different page * because GetFreeIndexPage will have marked the page used in FSM. The page * is entered in our local lastUsedPages cache, so there's some hope of * making use of it later in this session, but otherwise we rely on VACUUM * to eventually re-enter the page in FSM, making it available for recycling. * Note that such a page does not get marked dirty here, so unless it's used * fairly soon, the buffer will just get discarded and the page will remain * as it was on disk. * * When we return a buffer to the caller, the page is *not* entered into * the lastUsedPages cache; we expect the caller will do so after it's taken * whatever space it will use. This is because after the caller has used up * some space, the page might have less space than whatever was cached already * so we'd rather not trash the old cache entry. */ static Buffer allocNewBuffer(Relation index, int flags) { SpGistCache *cache = spgGetCache(index); for (;;) { Buffer buffer; buffer = SpGistNewBuffer(index); SpGistInitBuffer(buffer, (flags & GBUF_LEAF) ? SPGIST_LEAF : 0); if (flags & GBUF_LEAF) { /* Leaf pages have no parity concerns, so just use it */ return buffer; } else { BlockNumber blkno = BufferGetBlockNumber(buffer); int blkParity = blkno % 3; if ((flags & GBUF_PARITY_MASK) == blkParity) { /* Page has right parity, use it */ return buffer; } else { /* Page has wrong parity, record it in cache and try again */ cache->lastUsedPages.innerPage[blkParity].blkno = blkno; cache->lastUsedPages.innerPage[blkParity].freeSpace = PageGetExactFreeSpace(BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); } } } }
/* * Build an SP-GiST index. */ Datum spgbuild(PG_FUNCTION_ARGS) { Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); IndexBuildResult *result; double reltuples; SpGistBuildState buildstate; Buffer metabuffer, rootbuffer, nullbuffer; if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* * Initialize the meta page and root pages */ metabuffer = SpGistNewBuffer(index); rootbuffer = SpGistNewBuffer(index); nullbuffer = SpGistNewBuffer(index); Assert(BufferGetBlockNumber(metabuffer) == SPGIST_METAPAGE_BLKNO); Assert(BufferGetBlockNumber(rootbuffer) == SPGIST_ROOT_BLKNO); Assert(BufferGetBlockNumber(nullbuffer) == SPGIST_NULL_BLKNO); START_CRIT_SECTION(); SpGistInitMetapage(BufferGetPage(metabuffer)); MarkBufferDirty(metabuffer); SpGistInitBuffer(rootbuffer, SPGIST_LEAF); MarkBufferDirty(rootbuffer); SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS); MarkBufferDirty(nullbuffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; XLogBeginInsert(); /* * Replay will re-initialize the pages, so don't take full pages * images. No other data to log. */ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT); XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX); PageSetLSN(BufferGetPage(metabuffer), recptr); PageSetLSN(BufferGetPage(rootbuffer), recptr); PageSetLSN(BufferGetPage(nullbuffer), recptr); } END_CRIT_SECTION(); UnlockReleaseBuffer(metabuffer); UnlockReleaseBuffer(rootbuffer); UnlockReleaseBuffer(nullbuffer); /* * Now insert all the heap data into the index */ initSpGistState(&buildstate.spgstate, index); buildstate.spgstate.isBuild = true; buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "SP-GiST build temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, spgistBuildCallback, (void *) &buildstate); MemoryContextDelete(buildstate.tmpCtx); SpGistUpdateMetaPage(index); result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult)); result->heap_tuples = result->index_tuples = reltuples; PG_RETURN_POINTER(result); }
static void spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr; SpGistLeafTuple leafTuple; Buffer buffer; Page page; /* we assume this is adequately aligned */ ptr += sizeof(spgxlogAddLeaf); leafTuple = (SpGistLeafTuple) ptr; if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, xldata->newPage); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (xldata->newPage) SpGistInitBuffer(buffer, SPGIST_LEAF); if (!XLByteLE(lsn, PageGetLSN(page))) { /* insert new tuple */ if (xldata->offnumLeaf != xldata->offnumHeadLeaf) { /* normal cases, tuple was added by SpGistPageAddNewItem */ addOrReplaceTuple(page, (Item) leafTuple, leafTuple->size, xldata->offnumLeaf); /* update head tuple's chain link if needed */ if (xldata->offnumHeadLeaf != InvalidOffsetNumber) { SpGistLeafTuple head; head = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumHeadLeaf)); Assert(head->nextOffset == leafTuple->nextOffset); head->nextOffset = xldata->offnumLeaf; } } else { /* replacing a DEAD tuple */ PageIndexTupleDelete(page, xldata->offnumLeaf); if (PageAddItem(page, (Item) leafTuple, leafTuple->size, xldata->offnumLeaf, false, false) != xldata->offnumLeaf) elog(ERROR, "failed to add item of size %u to SPGiST index page", leafTuple->size); } PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* update parent downlink if necessary */ if (xldata->blknoParent != InvalidBlockNumber && !(record->xl_info & XLR_BKP_BLOCK_2)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { SpGistInnerTuple tuple; tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoLeaf, xldata->offnumLeaf); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } }
static void spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr; SpGistInnerTuple prefixTuple; SpGistInnerTuple postfixTuple; Buffer buffer; Page page; /* we assume this is adequately aligned */ ptr += sizeof(spgxlogSplitTuple); prefixTuple = (SpGistInnerTuple) ptr; ptr += prefixTuple->size; postfixTuple = (SpGistInnerTuple) ptr; /* insert postfix tuple first to avoid dangling link */ if (xldata->blknoPostfix != xldata->blknoPrefix && !(record->xl_info & XLR_BKP_BLOCK_2)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, xldata->newPage); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (xldata->newPage) SpGistInitBuffer(buffer, 0); if (!XLByteLE(lsn, PageGetLSN(page))) { addOrReplaceTuple(page, (Item) postfixTuple, postfixTuple->size, xldata->offnumPostfix); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* now handle the original page */ if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoPrefix, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { PageIndexTupleDelete(page, xldata->offnumPrefix); if (PageAddItem(page, (Item) prefixTuple, prefixTuple->size, xldata->offnumPrefix, false, false) != xldata->offnumPrefix) elog(ERROR, "failed to add item of size %u to SPGiST index page", prefixTuple->size); if (xldata->blknoPostfix == xldata->blknoPrefix) addOrReplaceTuple(page, (Item) postfixTuple, postfixTuple->size, xldata->offnumPostfix); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } }
static void spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; SpGistInnerTuple innerTuple; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; uint8 *leafPageSelect; Buffer srcBuffer; Buffer destBuffer; Page page; int bbi; int i; fillFakeState(&state, xldata->stateSrc); ptr += MAXALIGN(sizeof(spgxlogPickSplit)); innerTuple = (SpGistInnerTuple) ptr; ptr += innerTuple->size; toDelete = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nDelete); toInsert = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nInsert); leafPageSelect = (uint8 *) ptr; ptr += MAXALIGN(sizeof(uint8) * xldata->nInsert); /* now ptr points to the list of leaf tuples */ /* * It's a bit tricky to identify which pages have been handled as * full-page images, so we explicitly count each referenced buffer. */ bbi = 0; if (xldata->blknoSrc == SPGIST_HEAD_BLKNO) { /* when splitting root, we touch it only in the guise of new inner */ srcBuffer = InvalidBuffer; } else if (xldata->initSrc) { /* just re-init the source page */ srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true); Assert(BufferIsValid(srcBuffer)); page = (Page) BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF); /* don't update LSN etc till we're done with it */ } else { /* delete the specified tuples from source page */ if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) { srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false); if (BufferIsValid(srcBuffer)) { page = BufferGetPage(srcBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { /* * We have it a bit easier here than in doPickSplit(), * because we know the inner tuple's location already, * so we can inject the correct redirection tuple now. */ if (!state.isBuild) spgPageIndexMultiDelete(&state, page, toDelete, xldata->nDelete, SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoInner, xldata->offnumInner); else spgPageIndexMultiDelete(&state, page, toDelete, xldata->nDelete, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* don't update LSN etc till we're done with it */ } } } else srcBuffer = InvalidBuffer; bbi++; } /* try to access dest page if any */ if (xldata->blknoDest == InvalidBlockNumber) { destBuffer = InvalidBuffer; } else if (xldata->initDest) { /* just re-init the dest page */ destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true); Assert(BufferIsValid(destBuffer)); page = (Page) BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF); /* don't update LSN etc till we're done with it */ } else { if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, false); else destBuffer = InvalidBuffer; bbi++; } /* restore leaf tuples to src and/or dest page */ for (i = 0; i < xldata->nInsert; i++) { SpGistLeafTuple lt = (SpGistLeafTuple) ptr; Buffer leafBuffer; ptr += lt->size; leafBuffer = leafPageSelect[i] ? destBuffer : srcBuffer; if (!BufferIsValid(leafBuffer)) continue; /* no need to touch this page */ page = BufferGetPage(leafBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]); } } /* Now update src and dest page LSNs */ if (BufferIsValid(srcBuffer)) { page = BufferGetPage(srcBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(srcBuffer); } UnlockReleaseBuffer(srcBuffer); } if (BufferIsValid(destBuffer)) { page = BufferGetPage(destBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(destBuffer); } UnlockReleaseBuffer(destBuffer); } /* restore new inner tuple */ if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) { Buffer buffer = XLogReadBuffer(xldata->node, xldata->blknoInner, xldata->initInner); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (xldata->initInner) SpGistInitBuffer(buffer, 0); if (!XLByteLE(lsn, PageGetLSN(page))) { addOrReplaceTuple(page, (Item) innerTuple, innerTuple->size, xldata->offnumInner); /* if inner is also parent, update link while we're here */ if (xldata->blknoInner == xldata->blknoParent) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); } PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } bbi++; /* update parent downlink, unless we did it above */ if (xldata->blknoParent == InvalidBlockNumber) { /* no parent cause we split the root */ Assert(xldata->blknoInner == SPGIST_HEAD_BLKNO); } else if (xldata->blknoInner != xldata->blknoParent) { if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) { Buffer buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } } }
static void spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; int nInsert; Buffer buffer; Page page; fillFakeState(&state, xldata->stateSrc); nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1; ptr += MAXALIGN(sizeof(spgxlogMoveLeafs)); toDelete = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nMoves); toInsert = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * nInsert); /* now ptr points to the list of leaf tuples */ /* Insert tuples on the dest page (do first, so redirect is valid) */ if (!(record->xl_info & XLR_BKP_BLOCK_2)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, xldata->newPage); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (xldata->newPage) SpGistInitBuffer(buffer, SPGIST_LEAF); if (!XLByteLE(lsn, PageGetLSN(page))) { int i; for (i = 0; i < nInsert; i++) { SpGistLeafTuple lt = (SpGistLeafTuple) ptr; addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]); ptr += lt->size; } PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* Delete tuples from the source page, inserting a redirection pointer */ if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* And update the parent downlink */ if (!(record->xl_info & XLR_BKP_BLOCK_3)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { SpGistInnerTuple tuple; tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } }
static void spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; SpGistInnerTuple innerTuple; SpGistState state; Buffer buffer; Page page; int bbi; /* we assume this is adequately aligned */ ptr += sizeof(spgxlogAddNode); innerTuple = (SpGistInnerTuple) ptr; fillFakeState(&state, xldata->stateSrc); if (xldata->blknoNew == InvalidBlockNumber) { /* update in place */ Assert(xldata->blknoParent == InvalidBlockNumber); if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) innerTuple, innerTuple->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", innerTuple->size); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } } else { /* Install new tuple first so redirect is valid */ if (!(record->xl_info & XLR_BKP_BLOCK_2)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, xldata->newPage); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (xldata->newPage) SpGistInitBuffer(buffer, 0); if (!XLByteLE(lsn, PageGetLSN(page))) { addOrReplaceTuple(page, (Item) innerTuple, innerTuple->size, xldata->offnumNew); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* Delete old tuple, replacing it with redirect or placeholder tuple */ if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { SpGistDeadTuple dt; if (state.isBuild) dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); else dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, xldata->blknoNew, xldata->offnumNew); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", dt->size); if (state.isBuild) SpGistPageGetOpaque(page)->nPlaceholder++; else SpGistPageGetOpaque(page)->nRedirection++; PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* * Update parent downlink. Since parent could be in either of the * previous two buffers, it's a bit tricky to determine which BKP bit * applies. */ if (xldata->blknoParent == xldata->blkno) bbi = 0; else if (xldata->blknoParent == xldata->blknoNew) bbi = 1; else bbi = 2; if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) { buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { SpGistInnerTuple innerTuple; innerTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(innerTuple, xldata->nodeI, xldata->blknoNew, xldata->offnumNew); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } } }
/* * Get a buffer of the type and parity specified by flags, having at least * as much free space as indicated by needSpace. We use the lastUsedPages * cache to assign the same buffer previously requested when possible. * The returned buffer is already pinned and exclusive-locked. * * *isNew is set true if the page was initialized here, false if it was * already valid. */ Buffer SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew) { SpGistCache *cache = spgGetCache(index); SpGistLastUsedPage *lup; /* Bail out if even an empty page wouldn't meet the demand */ if (needSpace > SPGIST_PAGE_CAPACITY) elog(ERROR, "desired SPGiST tuple size is too big"); /* * If possible, increase the space request to include relation's * fillfactor. This ensures that when we add unrelated tuples to a page, * we try to keep 100-fillfactor% available for adding tuples that are * related to the ones already on it. But fillfactor mustn't cause an * error for requests that would otherwise be legal. */ needSpace += RelationGetTargetPageFreeSpace(index, SPGIST_DEFAULT_FILLFACTOR); needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY); /* Get the cache entry for this flags setting */ lup = GET_LUP(cache, flags); /* If we have nothing cached, just turn it over to allocNewBuffer */ if (lup->blkno == InvalidBlockNumber) { *isNew = true; return allocNewBuffer(index, flags); } /* fixed pages should never be in cache */ Assert(!SpGistBlockIsFixed(lup->blkno)); /* If cached freeSpace isn't enough, don't bother looking at the page */ if (lup->freeSpace >= needSpace) { Buffer buffer; Page page; buffer = ReadBuffer(index, lup->blkno); if (!ConditionalLockBuffer(buffer)) { /* * buffer is locked by another process, so return a new buffer */ ReleaseBuffer(buffer); *isNew = true; return allocNewBuffer(index, flags); } page = BufferGetPage(buffer); if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page)) { /* OK to initialize the page */ uint16 pageflags = 0; if (GBUF_REQ_LEAF(flags)) pageflags |= SPGIST_LEAF; if (GBUF_REQ_NULLS(flags)) pageflags |= SPGIST_NULLS; SpGistInitBuffer(buffer, pageflags); lup->freeSpace = PageGetExactFreeSpace(page) - needSpace; *isNew = true; return buffer; } /* * Check that page is of right type and has enough space. We must * recheck this since our cache isn't necessarily up to date. */ if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) && (GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page))) { int freeSpace = PageGetExactFreeSpace(page); if (freeSpace >= needSpace) { /* Success, update freespace info and return the buffer */ lup->freeSpace = freeSpace - needSpace; *isNew = false; return buffer; } } /* * fallback to allocation of new buffer */ UnlockReleaseBuffer(buffer); } /* No success with cache, so return a new buffer */ *isNew = true; return allocNewBuffer(index, flags); }
static void spgRedoAddNode(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddNode); innerTuple = ptr; /* the tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); fillFakeState(&state, xldata->stateSrc); if (!XLogRecHasBlockRef(record, 1)) { /* update in place */ Assert(xldata->parentBlk == -1); if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", innerTupleHdr.size); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else { BlockNumber blkno; BlockNumber blknoNew; XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno); XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew); /* * In normal operation we would have all three pages (source, dest, * and parent) locked simultaneously; but in WAL replay it should be * safe to update them one at a time, as long as we do it in the right * order. We must insert the new tuple before replacing the old tuple * with the redirect tuple. */ /* Install new tuple first so redirect is valid */ if (xldata->newPage) { /* AddNode is not used for nulls pages */ buffer = XLogInitBufferForRedo(record, 1); SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumNew); /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 1) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete old tuple, replacing it with redirect or placeholder tuple */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { SpGistDeadTuple dt; page = BufferGetPage(buffer); if (state.isBuild) dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); else dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, blknoNew, xldata->offnumNew); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", dt->size); if (state.isBuild) SpGistPageGetOpaque(page)->nPlaceholder++; else SpGistPageGetOpaque(page)->nRedirection++; /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 0) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* * Update parent downlink (if we didn't do it as part of the source or * destination page update already). */ if (xldata->parentBlk == 2) { if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parentTuple; page = BufferGetPage(buffer); parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } } }
static void spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; uint8 *leafPageSelect; Buffer srcBuffer; Buffer destBuffer; Page srcPage; Page destPage; Buffer innerBuffer; Page page; int bbi; int i; XLogRedoAction action; fillFakeState(&state, xldata->stateSrc); ptr += SizeOfSpgxlogPickSplit; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDelete; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nInsert; leafPageSelect = (uint8 *) ptr; ptr += sizeof(uint8) * xldata->nInsert; innerTuple = ptr; /* the inner tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); ptr += innerTupleHdr.size; /* now ptr points to the list of leaf tuples */ /* * It's a bit tricky to identify which pages have been handled as * full-page images, so we explicitly count each referenced buffer. */ bbi = 0; if (SpGistBlockIsRoot(xldata->blknoSrc)) { /* when splitting root, we touch it only in the guise of new inner */ srcBuffer = InvalidBuffer; srcPage = NULL; } else if (xldata->initSrc) { /* just re-init the source page */ srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true); Assert(BufferIsValid(srcBuffer)); srcPage = (Page) BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * Delete the specified tuples from source page. (In case we're in * Hot Standby, we need to hold lock on the page till we're done * inserting leaf tuples and the new inner tuple, else the added * redirect tuple will be a dangling link.) */ if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoSrc, &srcBuffer) == BLK_NEEDS_REDO) { srcPage = BufferGetPage(srcBuffer); /* * We have it a bit easier here than in doPickSplit(), because we * know the inner tuple's location already, so we can inject the * correct redirection tuple now. */ if (!state.isBuild) spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoInner, xldata->offnumInner); else spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* don't update LSN etc till we're done with it */ } else { srcPage = NULL; /* don't do any page updates */ } bbi++; } /* try to access dest page if any */ if (xldata->blknoDest == InvalidBlockNumber) { destBuffer = InvalidBuffer; destPage = NULL; } else if (xldata->initDest) { /* just re-init the dest page */ destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true); Assert(BufferIsValid(destBuffer)); destPage = (Page) BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * We could probably release the page lock immediately in the * full-page-image case, but for safety let's hold it till later. */ if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoDest, &destBuffer) == BLK_NEEDS_REDO) { destPage = (Page) BufferGetPage(destBuffer); } else { destPage = NULL; /* don't do any page updates */ } bbi++; } /* restore leaf tuples to src and/or dest page */ for (i = 0; i < xldata->nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* the tuples are not aligned, so must copy to access the size field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); ptr += leafTupleHdr.size; page = leafPageSelect[i] ? destPage : srcPage; if (page == NULL) continue; /* no need to touch this page */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); } /* Now update src and dest page LSNs if needed */ if (srcPage != NULL) { PageSetLSN(srcPage, lsn); MarkBufferDirty(srcBuffer); } if (destPage != NULL) { PageSetLSN(destPage, lsn); MarkBufferDirty(destBuffer); } /* restore new inner tuple */ if (xldata->initInner) { innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true); SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoInner, &innerBuffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(innerBuffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumInner); /* if inner is also parent, update link while we're here */ if (xldata->blknoInner == xldata->blknoParent) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); } PageSetLSN(page, lsn); MarkBufferDirty(innerBuffer); } if (BufferIsValid(innerBuffer)) UnlockReleaseBuffer(innerBuffer); bbi++; /* * Now we can release the leaf-page locks. It's okay to do this before * updating the parent downlink. */ if (BufferIsValid(srcBuffer)) UnlockReleaseBuffer(srcBuffer); if (BufferIsValid(destBuffer)) UnlockReleaseBuffer(destBuffer); /* update parent downlink, unless we did it above */ if (xldata->blknoParent == InvalidBlockNumber) { /* no parent cause we split the root */ Assert(SpGistBlockIsRoot(xldata->blknoInner)); } else if (xldata->blknoInner != xldata->blknoParent) { Buffer parentBuffer; if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoParent, &parentBuffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parent; page = BufferGetPage(parentBuffer); parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); PageSetLSN(page, lsn); MarkBufferDirty(parentBuffer); } if (BufferIsValid(parentBuffer)) UnlockReleaseBuffer(parentBuffer); } }
static void spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr; char *prefixTuple; SpGistInnerTupleData prefixTupleHdr; char *postfixTuple; SpGistInnerTupleData postfixTupleHdr; Buffer buffer; Page page; ptr += sizeof(spgxlogSplitTuple); prefixTuple = ptr; /* the prefix tuple is unaligned, so make a copy to access its header */ memcpy(&prefixTupleHdr, prefixTuple, sizeof(SpGistInnerTupleData)); ptr += prefixTupleHdr.size; postfixTuple = ptr; /* postfix tuple is also unaligned */ memcpy(&postfixTupleHdr, postfixTuple, sizeof(SpGistInnerTupleData)); /* * In normal operation we would have both pages locked simultaneously; but * in WAL replay it should be safe to update them one at a time, as long * as we do it in the right order. */ /* insert postfix tuple first to avoid dangling link */ if (xldata->blknoPostfix != xldata->blknoPrefix) { XLogRedoAction action; if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true); /* SplitTuple is not used for nulls pages */ SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoPostfix, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } /* now handle the original page */ if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnumPrefix); if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size, xldata->offnumPrefix, false, false) != xldata->offnumPrefix) elog(ERROR, "failed to add item of size %u to SPGiST index page", prefixTupleHdr.size); if (xldata->blknoPostfix == xldata->blknoPrefix) addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; Buffer buffer; Page page; int bbi; XLogRedoAction action; ptr += sizeof(spgxlogAddNode); innerTuple = ptr; /* the tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); fillFakeState(&state, xldata->stateSrc); if (xldata->blknoNew == InvalidBlockNumber) { /* update in place */ Assert(xldata->blknoParent == InvalidBlockNumber); if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", innerTupleHdr.size); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else { /* * In normal operation we would have all three pages (source, dest, * and parent) locked simultaneously; but in WAL replay it should be * safe to update them one at a time, as long as we do it in the right * order. * * The logic here depends on the assumption that blkno != blknoNew, * else we can't tell which BKP bit goes with which page, and the LSN * checks could go wrong too. */ Assert(xldata->blkno != xldata->blknoNew); /* Install new tuple first so redirect is valid */ if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true); /* AddNode is not used for nulls pages */ SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoNew, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumNew); /* * If parent is in this same page, don't advance LSN; doing so * would fool us into not applying the parent downlink update * below. We'll update the LSN when we fix the parent downlink. */ if (xldata->blknoParent != xldata->blknoNew) { PageSetLSN(page, lsn); } MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete old tuple, replacing it with redirect or placeholder tuple */ if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, &buffer) == BLK_NEEDS_REDO) { SpGistDeadTuple dt; page = BufferGetPage(buffer); if (state.isBuild) dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); else dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, xldata->blknoNew, xldata->offnumNew); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", dt->size); if (state.isBuild) SpGistPageGetOpaque(page)->nPlaceholder++; else SpGistPageGetOpaque(page)->nRedirection++; /* * If parent is in this same page, don't advance LSN; doing so * would fool us into not applying the parent downlink update * below. We'll update the LSN when we fix the parent downlink. */ if (xldata->blknoParent != xldata->blkno) { PageSetLSN(page, lsn); } MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* * Update parent downlink. Since parent could be in either of the * previous two buffers, it's a bit tricky to determine which BKP bit * applies. */ if (xldata->blknoParent == xldata->blkno) bbi = 0; else if (xldata->blknoParent == xldata->blknoNew) bbi = 1; else bbi = 2; if (record->xl_info & XLR_BKP_BLOCK(bbi)) { if (bbi == 2) /* else we already did it */ (void) RestoreBackupBlock(lsn, record, bbi, false, false); action = BLK_RESTORED; buffer = InvalidBuffer; } else { action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoParent, &buffer); Assert(action != BLK_RESTORED); } if (action == BLK_NEEDS_REDO) { SpGistInnerTuple innerTuple; page = BufferGetPage(buffer); innerTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(innerTuple, xldata->nodeI, xldata->blknoNew, xldata->offnumNew); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
static void spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; int nInsert; Buffer buffer; Page page; XLogRedoAction action; fillFakeState(&state, xldata->stateSrc); nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1; ptr += SizeOfSpgxlogMoveLeafs; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMoves; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * nInsert; /* now ptr points to the list of leaf tuples */ /* * In normal operation we would have all three pages (source, dest, and * parent) locked simultaneously; but in WAL replay it should be safe to * update them one at a time, as long as we do it in the right order. */ /* Insert tuples on the dest page (do first, so redirect is valid) */ if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoDst, &buffer); if (action == BLK_NEEDS_REDO) { int i; page = BufferGetPage(buffer); for (i = 0; i < nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* * the tuples are not aligned, so must copy to access the size * field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); ptr += leafTupleHdr.size; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete tuples from the source page, inserting a redirection pointer */ if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* And update the parent downlink */ if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr; char *leafTuple; SpGistLeafTupleData leafTupleHdr; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddLeaf); leafTuple = ptr; /* the leaf tuple is unaligned, so make a copy to access its header */ memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); /* * In normal operation we would have both current and parent pages locked * simultaneously; but in WAL replay it should be safe to update the leaf * page before updating the parent. */ if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoLeaf, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); /* insert new tuple */ if (xldata->offnumLeaf != xldata->offnumHeadLeaf) { /* normal cases, tuple was added by SpGistPageAddNewItem */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf); /* update head tuple's chain link if needed */ if (xldata->offnumHeadLeaf != InvalidOffsetNumber) { SpGistLeafTuple head; head = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumHeadLeaf)); Assert(head->nextOffset == leafTupleHdr.nextOffset); head->nextOffset = xldata->offnumLeaf; } } else { /* replacing a DEAD tuple */ PageIndexTupleDelete(page, xldata->offnumLeaf); if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf, false, false) != xldata->offnumLeaf) elog(ERROR, "failed to add item of size %u to SPGiST index page", leafTupleHdr.size); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* update parent downlink if necessary */ if (xldata->blknoParent != InvalidBlockNumber) { if (XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoParent, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoLeaf, xldata->offnumLeaf); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
/* * Process one page during a bulkdelete scan */ static void spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno) { Relation index = bds->info->index; Buffer buffer; Page page; /* call vacuum_delay_point while not holding any buffer lock */ vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bds->info->strategy); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = (Page) BufferGetPage(buffer); if (PageIsNew(page)) { /* * We found an all-zero page, which could happen if the database * crashed just after extending the file. Initialize and recycle it. */ SpGistInitBuffer(buffer, 0); SpGistPageSetDeleted(page); /* We don't bother to WAL-log this action; easy to redo */ MarkBufferDirty(buffer); } else if (SpGistPageIsDeleted(page)) { /* nothing to do */ } else if (SpGistPageIsLeaf(page)) { if (SpGistBlockIsRoot(blkno)) { vacuumLeafRoot(bds, index, buffer); /* no need for vacuumRedirectAndPlaceholder */ } else { vacuumLeafPage(bds, index, buffer, false); vacuumRedirectAndPlaceholder(index, buffer); } } else { /* inner page */ vacuumRedirectAndPlaceholder(index, buffer); } /* * The root pages must never be deleted, nor marked as available in FSM, * because we don't want them ever returned by a search for a place to put * a new tuple. Otherwise, check for empty/deletable page, and make sure * FSM knows about it. */ if (!SpGistBlockIsRoot(blkno)) { /* If page is now empty, mark it deleted */ if (PageIsEmpty(page) && !SpGistPageIsDeleted(page)) { SpGistPageSetDeleted(page); /* We don't bother to WAL-log this action; easy to redo */ MarkBufferDirty(buffer); } if (SpGistPageIsDeleted(page)) { RecordFreeIndexPage(index, blkno); bds->stats->pages_deleted++; } else bds->lastFilledBlock = blkno; } SpGistSetLastUsedPage(index, buffer); UnlockReleaseBuffer(buffer); }
/* * Build an SP-GiST index. */ Datum spgbuild(PG_FUNCTION_ARGS) { Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); IndexBuildResult *result; double reltuples; SpGistBuildState buildstate; Buffer metabuffer, rootbuffer; if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* * Initialize the meta page and root page */ metabuffer = SpGistNewBuffer(index); rootbuffer = SpGistNewBuffer(index); Assert(BufferGetBlockNumber(metabuffer) == SPGIST_METAPAGE_BLKNO); Assert(BufferGetBlockNumber(rootbuffer) == SPGIST_HEAD_BLKNO); START_CRIT_SECTION(); SpGistInitMetapage(BufferGetPage(metabuffer)); MarkBufferDirty(metabuffer); SpGistInitBuffer(rootbuffer, SPGIST_LEAF); MarkBufferDirty(rootbuffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; XLogRecData rdata; /* WAL data is just the relfilenode */ rdata.data = (char *) &(index->rd_node); rdata.len = sizeof(RelFileNode); rdata.buffer = InvalidBuffer; rdata.next = NULL; recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata); PageSetLSN(BufferGetPage(metabuffer), recptr); PageSetTLI(BufferGetPage(metabuffer), ThisTimeLineID); PageSetLSN(BufferGetPage(rootbuffer), recptr); PageSetTLI(BufferGetPage(rootbuffer), ThisTimeLineID); } END_CRIT_SECTION(); UnlockReleaseBuffer(metabuffer); UnlockReleaseBuffer(rootbuffer); /* * Now insert all the heap data into the index */ initSpGistState(&buildstate.spgstate, index); buildstate.spgstate.isBuild = true; buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "SP-GiST build temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, spgistBuildCallback, (void *) &buildstate); MemoryContextDelete(buildstate.tmpCtx); SpGistUpdateMetaPage(index); result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult)); result->heap_tuples = result->index_tuples = reltuples; PG_RETURN_POINTER(result); }