static void spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr; OffsetNumber *toDead; OffsetNumber *toPlaceholder; OffsetNumber *moveSrc; OffsetNumber *moveDest; OffsetNumber *chainSrc; OffsetNumber *chainDest; SpGistState state; Buffer buffer; Page page; int i; fillFakeState(&state, xldata->stateSrc); ptr += sizeof(spgxlogVacuumLeaf); toDead = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDead; toPlaceholder = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nPlaceholder; moveSrc = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMove; moveDest = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMove; chainSrc = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nChain; chainDest = (OffsetNumber *) ptr; if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { spgPageIndexMultiDelete(&state, page, toDead, xldata->nDead, SPGIST_DEAD, SPGIST_DEAD, InvalidBlockNumber, InvalidOffsetNumber); spgPageIndexMultiDelete(&state, page, toPlaceholder, xldata->nPlaceholder, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* see comments in vacuumLeafPage() */ for (i = 0; i < xldata->nMove; i++) { ItemId idSrc = PageGetItemId(page, moveSrc[i]); ItemId idDest = PageGetItemId(page, moveDest[i]); ItemIdData tmp; tmp = *idSrc; *idSrc = *idDest; *idDest = tmp; } spgPageIndexMultiDelete(&state, page, moveSrc, xldata->nMove, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); for (i = 0; i < xldata->nChain; i++) { SpGistLeafTuple lt; lt = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, chainSrc[i])); Assert(lt->tupstate == SPGIST_LIVE); lt->nextOffset = chainDest[i]; } PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } }
/* * Vacuum a regular (non-root) leaf page * * We must delete tuples that are targeted for deletion by the VACUUM, * but not move any tuples that are referenced by outside links; we assume * those are the ones that are heads of chains. * * If we find a REDIRECT that was made by a concurrently-running transaction, * we must add its target TID to pendingList. (We don't try to visit the * target immediately, first because we don't want VACUUM locking more than * one buffer at a time, and second because the duplicate-filtering logic * in spgAddPendingTID is useful to ensure we can't get caught in an infinite * loop in the face of continuous concurrent insertions.) * * If forPending is true, we are examining the page as a consequence of * chasing a redirect link, not as part of the normal sequential scan. * We still vacuum the page normally, but we don't increment the stats * about live tuples; else we'd double-count those tuples, since the page * has been or will be visited in the sequential scan as well. */ static void vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer, bool forPending) { Page page = BufferGetPage(buffer); spgxlogVacuumLeaf xlrec; XLogRecData rdata[8]; OffsetNumber toDead[MaxIndexTuplesPerPage]; OffsetNumber toPlaceholder[MaxIndexTuplesPerPage]; OffsetNumber moveSrc[MaxIndexTuplesPerPage]; OffsetNumber moveDest[MaxIndexTuplesPerPage]; OffsetNumber chainSrc[MaxIndexTuplesPerPage]; OffsetNumber chainDest[MaxIndexTuplesPerPage]; OffsetNumber predecessor[MaxIndexTuplesPerPage + 1]; bool deletable[MaxIndexTuplesPerPage + 1]; int nDeletable; OffsetNumber i, max = PageGetMaxOffsetNumber(page); memset(predecessor, 0, sizeof(predecessor)); memset(deletable, 0, sizeof(deletable)); nDeletable = 0; /* Scan page, identify tuples to delete, accumulate stats */ for (i = FirstOffsetNumber; i <= max; i++) { SpGistLeafTuple lt; lt = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, i)); if (lt->tupstate == SPGIST_LIVE) { Assert(ItemPointerIsValid(<->heapPtr)); if (bds->callback(<->heapPtr, bds->callback_state)) { bds->stats->tuples_removed += 1; deletable[i] = true; nDeletable++; } else { if (!forPending) bds->stats->num_index_tuples += 1; } /* Form predecessor map, too */ if (lt->nextOffset != InvalidOffsetNumber) { /* paranoia about corrupted chain links */ if (lt->nextOffset < FirstOffsetNumber || lt->nextOffset > max || predecessor[lt->nextOffset] != InvalidOffsetNumber) elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"", BufferGetBlockNumber(buffer), RelationGetRelationName(index)); predecessor[lt->nextOffset] = i; } } else if (lt->tupstate == SPGIST_REDIRECT) { SpGistDeadTuple dt = (SpGistDeadTuple) lt; Assert(dt->nextOffset == InvalidOffsetNumber); Assert(ItemPointerIsValid(&dt->pointer)); /* * Add target TID to pending list if the redirection could have * happened since VACUUM started. * * Note: we could make a tighter test by seeing if the xid is * "running" according to the active snapshot; but tqual.c doesn't * currently export a suitable API, and it's not entirely clear * that a tighter test is worth the cycles anyway. */ if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin)) spgAddPendingTID(bds, &dt->pointer); } else { Assert(lt->nextOffset == InvalidOffsetNumber); } } if (nDeletable == 0) return; /* nothing more to do */ /*---------- * Figure out exactly what we have to do. We do this separately from * actually modifying the page, mainly so that we have a representation * that can be dumped into WAL and then the replay code can do exactly * the same thing. The output of this step consists of six arrays * describing four kinds of operations, to be performed in this order: * * toDead[]: tuple numbers to be replaced with DEAD tuples * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples * moveSrc[]: tuple numbers that need to be relocated to another offset * (replacing the tuple there) and then replaced with PLACEHOLDER tuples * moveDest[]: new locations for moveSrc tuples * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates * chainDest[]: new values of nextOffset for chainSrc members * * It's easiest to figure out what we have to do by processing tuple * chains, so we iterate over all the tuples (not just the deletable * ones!) to identify chain heads, then chase down each chain and make * work item entries for deletable tuples within the chain. *---------- */ xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0; for (i = FirstOffsetNumber; i <= max; i++) { SpGistLeafTuple head; bool interveningDeletable; OffsetNumber prevLive; OffsetNumber j; head = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, i)); if (head->tupstate != SPGIST_LIVE) continue; /* can't be a chain member */ if (predecessor[i] != 0) continue; /* not a chain head */ /* initialize ... */ interveningDeletable = false; prevLive = deletable[i] ? InvalidOffsetNumber : i; /* scan down the chain ... */ j = head->nextOffset; while (j != InvalidOffsetNumber) { SpGistLeafTuple lt; lt = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, j)); if (lt->tupstate != SPGIST_LIVE) { /* all tuples in chain should be live */ elog(ERROR, "unexpected SPGiST tuple state: %d", lt->tupstate); } if (deletable[j]) { /* This tuple should be replaced by a placeholder */ toPlaceholder[xlrec.nPlaceholder] = j; xlrec.nPlaceholder++; /* previous live tuple's chain link will need an update */ interveningDeletable = true; } else if (prevLive == InvalidOffsetNumber) { /* * This is the first live tuple in the chain. It has to move * to the head position. */ moveSrc[xlrec.nMove] = j; moveDest[xlrec.nMove] = i; xlrec.nMove++; /* Chain updates will be applied after the move */ prevLive = i; interveningDeletable = false; } else { /* * Second or later live tuple. Arrange to re-chain it to the * previous live one, if there was a gap. */ if (interveningDeletable) { chainSrc[xlrec.nChain] = prevLive; chainDest[xlrec.nChain] = j; xlrec.nChain++; } prevLive = j; interveningDeletable = false; } j = lt->nextOffset; } if (prevLive == InvalidOffsetNumber) { /* The chain is entirely removable, so we need a DEAD tuple */ toDead[xlrec.nDead] = i; xlrec.nDead++; } else if (interveningDeletable) { /* One or more deletions at end of chain, so close it off */ chainSrc[xlrec.nChain] = prevLive; chainDest[xlrec.nChain] = InvalidOffsetNumber; xlrec.nChain++; } } /* sanity check ... */ if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove) elog(ERROR, "inconsistent counts of deletable tuples"); /* Prepare WAL record */ xlrec.node = index->rd_node; xlrec.blkno = BufferGetBlockNumber(buffer); STORE_STATE(&bds->spgstate, xlrec.stateSrc); ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumLeaf, 0); ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1); ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2); ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3); ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4); ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5); ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6); ACCEPT_RDATA_BUFFER(buffer, 7); /* Do the updates */ START_CRIT_SECTION(); spgPageIndexMultiDelete(&bds->spgstate, page, toDead, xlrec.nDead, SPGIST_DEAD, SPGIST_DEAD, InvalidBlockNumber, InvalidOffsetNumber); spgPageIndexMultiDelete(&bds->spgstate, page, toPlaceholder, xlrec.nPlaceholder, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* * We implement the move step by swapping the item pointers of the source * and target tuples, then replacing the newly-source tuples with * placeholders. This is perhaps unduly friendly with the page data * representation, but it's fast and doesn't risk page overflow when a * tuple to be relocated is large. */ for (i = 0; i < xlrec.nMove; i++) { ItemId idSrc = PageGetItemId(page, moveSrc[i]); ItemId idDest = PageGetItemId(page, moveDest[i]); ItemIdData tmp; tmp = *idSrc; *idSrc = *idDest; *idDest = tmp; } spgPageIndexMultiDelete(&bds->spgstate, page, moveSrc, xlrec.nMove, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); for (i = 0; i < xlrec.nChain; i++) { SpGistLeafTuple lt; lt = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, chainSrc[i])); Assert(lt->tupstate == SPGIST_LIVE); lt->nextOffset = chainDest[i]; } MarkBufferDirty(buffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata); PageSetLSN(page, recptr); } END_CRIT_SECTION(); }
static void spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; SpGistInnerTuple innerTuple; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; uint8 *leafPageSelect; Buffer srcBuffer; Buffer destBuffer; Page page; int bbi; int i; fillFakeState(&state, xldata->stateSrc); ptr += MAXALIGN(sizeof(spgxlogPickSplit)); innerTuple = (SpGistInnerTuple) ptr; ptr += innerTuple->size; toDelete = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nDelete); toInsert = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nInsert); leafPageSelect = (uint8 *) ptr; ptr += MAXALIGN(sizeof(uint8) * xldata->nInsert); /* now ptr points to the list of leaf tuples */ /* * It's a bit tricky to identify which pages have been handled as * full-page images, so we explicitly count each referenced buffer. */ bbi = 0; if (xldata->blknoSrc == SPGIST_HEAD_BLKNO) { /* when splitting root, we touch it only in the guise of new inner */ srcBuffer = InvalidBuffer; } else if (xldata->initSrc) { /* just re-init the source page */ srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true); Assert(BufferIsValid(srcBuffer)); page = (Page) BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF); /* don't update LSN etc till we're done with it */ } else { /* delete the specified tuples from source page */ if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) { srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false); if (BufferIsValid(srcBuffer)) { page = BufferGetPage(srcBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { /* * We have it a bit easier here than in doPickSplit(), * because we know the inner tuple's location already, * so we can inject the correct redirection tuple now. */ if (!state.isBuild) spgPageIndexMultiDelete(&state, page, toDelete, xldata->nDelete, SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoInner, xldata->offnumInner); else spgPageIndexMultiDelete(&state, page, toDelete, xldata->nDelete, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* don't update LSN etc till we're done with it */ } } } else srcBuffer = InvalidBuffer; bbi++; } /* try to access dest page if any */ if (xldata->blknoDest == InvalidBlockNumber) { destBuffer = InvalidBuffer; } else if (xldata->initDest) { /* just re-init the dest page */ destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true); Assert(BufferIsValid(destBuffer)); page = (Page) BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF); /* don't update LSN etc till we're done with it */ } else { if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, false); else destBuffer = InvalidBuffer; bbi++; } /* restore leaf tuples to src and/or dest page */ for (i = 0; i < xldata->nInsert; i++) { SpGistLeafTuple lt = (SpGistLeafTuple) ptr; Buffer leafBuffer; ptr += lt->size; leafBuffer = leafPageSelect[i] ? destBuffer : srcBuffer; if (!BufferIsValid(leafBuffer)) continue; /* no need to touch this page */ page = BufferGetPage(leafBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]); } } /* Now update src and dest page LSNs */ if (BufferIsValid(srcBuffer)) { page = BufferGetPage(srcBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(srcBuffer); } UnlockReleaseBuffer(srcBuffer); } if (BufferIsValid(destBuffer)) { page = BufferGetPage(destBuffer); if (!XLByteLE(lsn, PageGetLSN(page))) { PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(destBuffer); } UnlockReleaseBuffer(destBuffer); } /* restore new inner tuple */ if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) { Buffer buffer = XLogReadBuffer(xldata->node, xldata->blknoInner, xldata->initInner); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (xldata->initInner) SpGistInitBuffer(buffer, 0); if (!XLByteLE(lsn, PageGetLSN(page))) { addOrReplaceTuple(page, (Item) innerTuple, innerTuple->size, xldata->offnumInner); /* if inner is also parent, update link while we're here */ if (xldata->blknoInner == xldata->blknoParent) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); } PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } bbi++; /* update parent downlink, unless we did it above */ if (xldata->blknoParent == InvalidBlockNumber) { /* no parent cause we split the root */ Assert(xldata->blknoInner == SPGIST_HEAD_BLKNO); } else if (xldata->blknoInner != xldata->blknoParent) { if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi))) { Buffer buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } } }
static void spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; int nInsert; Buffer buffer; Page page; fillFakeState(&state, xldata->stateSrc); nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1; ptr += MAXALIGN(sizeof(spgxlogMoveLeafs)); toDelete = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nMoves); toInsert = (OffsetNumber *) ptr; ptr += MAXALIGN(sizeof(OffsetNumber) * nInsert); /* now ptr points to the list of leaf tuples */ /* Insert tuples on the dest page (do first, so redirect is valid) */ if (!(record->xl_info & XLR_BKP_BLOCK_2)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, xldata->newPage); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (xldata->newPage) SpGistInitBuffer(buffer, SPGIST_LEAF); if (!XLByteLE(lsn, PageGetLSN(page))) { int i; for (i = 0; i < nInsert; i++) { SpGistLeafTuple lt = (SpGistLeafTuple) ptr; addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]); ptr += lt->size; } PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* Delete tuples from the source page, inserting a redirection pointer */ if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } /* And update the parent downlink */ if (!(record->xl_info & XLR_BKP_BLOCK_3)) { buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { SpGistInnerTuple tuple; tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } }
static void spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; uint8 *leafPageSelect; Buffer srcBuffer; Buffer destBuffer; Page srcPage; Page destPage; Buffer innerBuffer; Page page; int bbi; int i; XLogRedoAction action; fillFakeState(&state, xldata->stateSrc); ptr += SizeOfSpgxlogPickSplit; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDelete; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nInsert; leafPageSelect = (uint8 *) ptr; ptr += sizeof(uint8) * xldata->nInsert; innerTuple = ptr; /* the inner tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); ptr += innerTupleHdr.size; /* now ptr points to the list of leaf tuples */ /* * It's a bit tricky to identify which pages have been handled as * full-page images, so we explicitly count each referenced buffer. */ bbi = 0; if (SpGistBlockIsRoot(xldata->blknoSrc)) { /* when splitting root, we touch it only in the guise of new inner */ srcBuffer = InvalidBuffer; srcPage = NULL; } else if (xldata->initSrc) { /* just re-init the source page */ srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true); Assert(BufferIsValid(srcBuffer)); srcPage = (Page) BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * Delete the specified tuples from source page. (In case we're in * Hot Standby, we need to hold lock on the page till we're done * inserting leaf tuples and the new inner tuple, else the added * redirect tuple will be a dangling link.) */ if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoSrc, &srcBuffer) == BLK_NEEDS_REDO) { srcPage = BufferGetPage(srcBuffer); /* * We have it a bit easier here than in doPickSplit(), because we * know the inner tuple's location already, so we can inject the * correct redirection tuple now. */ if (!state.isBuild) spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoInner, xldata->offnumInner); else spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* don't update LSN etc till we're done with it */ } else { srcPage = NULL; /* don't do any page updates */ } bbi++; } /* try to access dest page if any */ if (xldata->blknoDest == InvalidBlockNumber) { destBuffer = InvalidBuffer; destPage = NULL; } else if (xldata->initDest) { /* just re-init the dest page */ destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true); Assert(BufferIsValid(destBuffer)); destPage = (Page) BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * We could probably release the page lock immediately in the * full-page-image case, but for safety let's hold it till later. */ if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoDest, &destBuffer) == BLK_NEEDS_REDO) { destPage = (Page) BufferGetPage(destBuffer); } else { destPage = NULL; /* don't do any page updates */ } bbi++; } /* restore leaf tuples to src and/or dest page */ for (i = 0; i < xldata->nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* the tuples are not aligned, so must copy to access the size field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); ptr += leafTupleHdr.size; page = leafPageSelect[i] ? destPage : srcPage; if (page == NULL) continue; /* no need to touch this page */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); } /* Now update src and dest page LSNs if needed */ if (srcPage != NULL) { PageSetLSN(srcPage, lsn); MarkBufferDirty(srcBuffer); } if (destPage != NULL) { PageSetLSN(destPage, lsn); MarkBufferDirty(destBuffer); } /* restore new inner tuple */ if (xldata->initInner) { innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true); SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoInner, &innerBuffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(innerBuffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumInner); /* if inner is also parent, update link while we're here */ if (xldata->blknoInner == xldata->blknoParent) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); } PageSetLSN(page, lsn); MarkBufferDirty(innerBuffer); } if (BufferIsValid(innerBuffer)) UnlockReleaseBuffer(innerBuffer); bbi++; /* * Now we can release the leaf-page locks. It's okay to do this before * updating the parent downlink. */ if (BufferIsValid(srcBuffer)) UnlockReleaseBuffer(srcBuffer); if (BufferIsValid(destBuffer)) UnlockReleaseBuffer(destBuffer); /* update parent downlink, unless we did it above */ if (xldata->blknoParent == InvalidBlockNumber) { /* no parent cause we split the root */ Assert(SpGistBlockIsRoot(xldata->blknoInner)); } else if (xldata->blknoInner != xldata->blknoParent) { Buffer parentBuffer; if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoParent, &parentBuffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parent; page = BufferGetPage(parentBuffer); parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); PageSetLSN(page, lsn); MarkBufferDirty(parentBuffer); } if (BufferIsValid(parentBuffer)) UnlockReleaseBuffer(parentBuffer); } }
static void spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; int nInsert; Buffer buffer; Page page; XLogRedoAction action; fillFakeState(&state, xldata->stateSrc); nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1; ptr += SizeOfSpgxlogMoveLeafs; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMoves; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * nInsert; /* now ptr points to the list of leaf tuples */ /* * In normal operation we would have all three pages (source, dest, and * parent) locked simultaneously; but in WAL replay it should be safe to * update them one at a time, as long as we do it in the right order. */ /* Insert tuples on the dest page (do first, so redirect is valid) */ if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoDst, &buffer); if (action == BLK_NEEDS_REDO) { int i; page = BufferGetPage(buffer); for (i = 0; i < nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* * the tuples are not aligned, so must copy to access the size * field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); ptr += leafTupleHdr.size; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete tuples from the source page, inserting a redirection pointer */ if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* And update the parent downlink */ if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoVacuumLeaf(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr; OffsetNumber *toDead; OffsetNumber *toPlaceholder; OffsetNumber *moveSrc; OffsetNumber *moveDest; OffsetNumber *chainSrc; OffsetNumber *chainDest; SpGistState state; Buffer buffer; Page page; int i; fillFakeState(&state, xldata->stateSrc); ptr += SizeOfSpgxlogVacuumLeaf; toDead = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDead; toPlaceholder = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nPlaceholder; moveSrc = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMove; moveDest = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMove; chainSrc = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nChain; chainDest = (OffsetNumber *) ptr; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); spgPageIndexMultiDelete(&state, page, toDead, xldata->nDead, SPGIST_DEAD, SPGIST_DEAD, InvalidBlockNumber, InvalidOffsetNumber); spgPageIndexMultiDelete(&state, page, toPlaceholder, xldata->nPlaceholder, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* see comments in vacuumLeafPage() */ for (i = 0; i < xldata->nMove; i++) { ItemId idSrc = PageGetItemId(page, moveSrc[i]); ItemId idDest = PageGetItemId(page, moveDest[i]); ItemIdData tmp; tmp = *idSrc; *idSrc = *idDest; *idDest = tmp; } spgPageIndexMultiDelete(&state, page, moveSrc, xldata->nMove, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); for (i = 0; i < xldata->nChain; i++) { SpGistLeafTuple lt; lt = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, chainSrc[i])); Assert(lt->tupstate == SPGIST_LIVE); lt->nextOffset = chainDest[i]; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }