static void spgRedoCreateIndex(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; Page page; buffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO); page = (Page) BufferGetPage(buffer); SpGistInitMetapage(page); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogInitBufferForRedo(record, 1); Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO); SpGistInitBuffer(buffer, SPGIST_LEAF); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogInitBufferForRedo(record, 2); Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO); SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void ginRedoCreateIndex(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer RootBuffer, MetaBuffer; Page page; MetaBuffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO); page = (Page) BufferGetPage(MetaBuffer); GinInitMetabuffer(MetaBuffer); PageSetLSN(page, lsn); MarkBufferDirty(MetaBuffer); RootBuffer = XLogInitBufferForRedo(record, 1); Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO); page = (Page) BufferGetPage(RootBuffer); GinInitBuffer(RootBuffer, GIN_LEAF); PageSetLSN(page, lsn); MarkBufferDirty(RootBuffer); UnlockReleaseBuffer(RootBuffer); UnlockReleaseBuffer(MetaBuffer); }
static void ginRedoDeleteListPages(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record); Buffer metabuffer; Page metapage; int i; metabuffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO); metapage = BufferGetPage(metabuffer); GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer)); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); PageSetLSN(metapage, lsn); MarkBufferDirty(metabuffer); /* * In normal operation, shiftList() takes exclusive lock on all the * pages-to-be-deleted simultaneously. During replay, however, it should * be all right to lock them one at a time. This is dependent on the fact * that we are deleting pages from the head of the list, and that readers * share-lock the next page before releasing the one they are on. So we * cannot get past a reader that is on, or due to visit, any page we are * going to delete. New incoming readers will block behind our metapage * lock and then see a fully updated page list. * * No full-page images are taken of the deleted pages. Instead, they are * re-initialized as empty, deleted pages. Their right-links don't need to * be preserved, because no new___ readers can see the pages, as explained * above. */ for (i = 0; i < data->ndeleted; i++) { Buffer buffer; Page page; buffer = XLogInitBufferForRedo(record, i + 1); page = BufferGetPage(buffer); GinInitBuffer(buffer, GIN_DELETED); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); } UnlockReleaseBuffer(metabuffer); }
/* * Replay a revmap page extension */ static void brin_xlog_revmap_extend(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_brin_revmap_extend *xlrec; Buffer metabuf; Buffer buf; Page page; BlockNumber targetBlk; XLogRedoAction action; xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record); XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk); Assert(xlrec->targetBlk == targetBlk); /* Update the metapage */ action = XLogReadBufferForRedo(record, 0, &metabuf); if (action == BLK_NEEDS_REDO) { Page metapg; BrinMetaPageData *metadata; metapg = BufferGetPage(metabuf); metadata = (BrinMetaPageData *) PageGetContents(metapg); Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1); metadata->lastRevmapPage = xlrec->targetBlk; PageSetLSN(metapg, lsn); /* * Set pd_lower just past the end of the metadata. This is essential, * because without doing so, metadata will be lost if xlog.c * compresses the page. (We must do this here because pre-v11 * versions of PG did not set the metapage's pd_lower correctly, so a * pg_upgraded index might contain the wrong value.) */ ((PageHeader) metapg)->pd_lower = ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapg; MarkBufferDirty(metabuf); } /* * Re-init the target block as a revmap page. There's never a full- page * image here. */ buf = XLogInitBufferForRedo(record, 1); page = (Page) BufferGetPage(buf); brin_page_init(page, BRIN_PAGETYPE_REVMAP); PageSetLSN(page, lsn); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
static void ginRedoCreatePTree(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record); char *ptr; Buffer buffer; Page page; buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED); ptr = XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree); /* Place page data */ memcpy(GinDataLeafPageGetPostingList(page), ptr, data->size); GinDataPageSetDataSize(page, data->size); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void ginRedoInsertListPage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record); Buffer buffer; Page page; OffsetNumber l, off = FirstOffsetNumber; int i, tupsize; char *payload; IndexTuple tuples; Size totaltupsize; /* We always re-initialize the page. */ buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); GinInitBuffer(buffer, GIN_LIST); GinPageGetOpaque(page)->rightlink = data->rightlink; if (data->rightlink == InvalidBlockNumber) { /* tail of sublist */ GinPageSetFullRow(page); GinPageGetOpaque(page)->maxoff = 1; } else { GinPageGetOpaque(page)->maxoff = 0; } payload = XLogRecGetBlockData(record, 0, &totaltupsize); tuples = (IndexTuple) payload; for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); l = PageAddItem(page, (Item) tuples, tupsize, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); off++; } Assert((char *) tuples == payload + totaltupsize); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
/* * xlog replay routines */ static void brin_xlog_createidx(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record); Buffer buf; Page page; /* create the index' metapage */ buf = XLogInitBufferForRedo(record, 0); Assert(BufferIsValid(buf)); page = (Page) BufferGetPage(buf); brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version); PageSetLSN(page, lsn); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); }
static void gistRedoCreateIndex(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; Page page; buffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); page = (Page) BufferGetPage(buffer); GISTInitBuffer(buffer, F_LEAF); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void _bt_restore_meta(XLogReaderState *record, uint8 block_id) { XLogRecPtr lsn = record->EndRecPtr; Buffer metabuf; Page metapg; BTMetaPageData *md; BTPageOpaque pageop; xl_btree_metadata *xlrec; char *ptr; Size len; metabuf = XLogInitBufferForRedo(record, block_id); ptr = XLogRecGetBlockData(record, block_id, &len); Assert(len == sizeof(xl_btree_metadata)); Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE); xlrec = (xl_btree_metadata *) ptr; metapg = BufferGetPage(metabuf); _bt_pageinit(metapg, BufferGetPageSize(metabuf)); md = BTPageGetMeta(metapg); md->btm_magic = BTREE_MAGIC; md->btm_version = BTREE_VERSION; md->btm_root = xlrec->root; md->btm_level = xlrec->level; md->btm_fastroot = xlrec->fastroot; md->btm_fastlevel = xlrec->fastlevel; pageop = (BTPageOpaque) PageGetSpecialPointer(metapg); pageop->btpo_flags = BTP_META; /* * Set pd_lower just past the end of the metadata. This is not essential * but it makes the page look compressible to xlog.c. */ ((PageHeader) metapg)->pd_lower = ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg; PageSetLSN(metapg, lsn); MarkBufferDirty(metabuf); UnlockReleaseBuffer(metabuf); }
/* * replay a hash index bitmap page */ static void hash_xlog_init_bitmap_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer bitmapbuf; Buffer metabuf; Page page; HashMetaPage metap; uint32 num_buckets; xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record); /* * Initialize bitmap page */ bitmapbuf = XLogInitBufferForRedo(record, 0); _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true); PageSetLSN(BufferGetPage(bitmapbuf), lsn); MarkBufferDirty(bitmapbuf); UnlockReleaseBuffer(bitmapbuf); /* add the new bitmap page to the metapage's list of bitmaps */ if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO) { /* * Note: in normal operation, we'd update the metapage while still * holding lock on the bitmap page. But during replay it's not * necessary to hold that lock, since nobody can see it yet; the * creating transaction hasn't yet committed. */ page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); num_buckets = metap->hashm_maxbucket + 1; metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1; metap->hashm_nmaps++; PageSetLSN(page, lsn); MarkBufferDirty(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
/* * replay a hash index meta page */ static void hash_xlog_init_meta_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Page page; Buffer metabuf; xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record); /* create the index' metapage */ metabuf = XLogInitBufferForRedo(record, 0); Assert(BufferIsValid(metabuf)); _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid, xlrec->ffactor, true); page = (Page) BufferGetPage(metabuf); PageSetLSN(page, lsn); MarkBufferDirty(metabuf); /* all done */ UnlockReleaseBuffer(metabuf); }
static void btree_xlog_newroot(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record); Buffer buffer; Page page; BTPageOpaque pageop; char *ptr; Size len; buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_flags = BTP_ROOT; pageop->btpo_prev = pageop->btpo_next = P_NONE; pageop->btpo.level = xlrec->level; if (xlrec->level == 0) pageop->btpo_flags |= BTP_LEAF; pageop->btpo_cycleid = 0; if (xlrec->level > 0) { ptr = XLogRecGetBlockData(record, 0, &len); _bt_restore_page(page, ptr, len); /* Clear the incomplete-split flag in left child */ _bt_clear_incomplete_split(record, 1); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); _bt_restore_meta(record, 2); }
static void spgRedoAddLeaf(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr; char *leafTuple; SpGistLeafTupleData leafTupleHdr; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddLeaf); leafTuple = ptr; /* the leaf tuple is unaligned, so make a copy to access its header */ memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); /* * In normal operation we would have both current and parent pages locked * simultaneously; but in WAL replay it should be safe to update the leaf * page before updating the parent. */ if (xldata->newPage) { buffer = XLogInitBufferForRedo(record, 0); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 0, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); /* insert new tuple */ if (xldata->offnumLeaf != xldata->offnumHeadLeaf) { /* normal cases, tuple was added by SpGistPageAddNewItem */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf); /* update head tuple's chain link if needed */ if (xldata->offnumHeadLeaf != InvalidOffsetNumber) { SpGistLeafTuple head; head = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumHeadLeaf)); Assert(head->nextOffset == leafTupleHdr.nextOffset); head->nextOffset = xldata->offnumLeaf; } } else { /* replacing a DEAD tuple */ PageIndexTupleDelete(page, xldata->offnumLeaf); if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf, false, false) != xldata->offnumLeaf) elog(ERROR, "failed to add item of size %u to SPGiST index page", leafTupleHdr.size); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* update parent downlink if necessary */ if (xldata->offnumParent != InvalidOffsetNumber) { if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; BlockNumber blknoLeaf; XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf); page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, blknoLeaf, xldata->offnumLeaf); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
static void gistRedoPageSplitRecord(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record); Buffer firstbuffer = InvalidBuffer; Buffer buffer; Page page; int i; bool isrootsplit = false; /* * We must hold lock on the first-listed page throughout the action, * including while updating the left child page (if any). We can unlock * remaining pages in the list as soon as they've been written, because * there is no path for concurrent queries to reach those pages without * first visiting the first-listed page. */ /* loop around all pages */ for (i = 0; i < xldata->npage; i++) { int flags; char *data; Size datalen; int num; BlockNumber blkno; IndexTuple *tuples; XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno); if (blkno == GIST_ROOT_BLKNO) { Assert(i == 0); isrootsplit = true; } buffer = XLogInitBufferForRedo(record, i + 1); page = (Page) BufferGetPage(buffer); data = XLogRecGetBlockData(record, i + 1, &datalen); tuples = decodePageSplitRecord(data, datalen, &num); /* ok, clear buffer */ if (xldata->origleaf && blkno != GIST_ROOT_BLKNO) flags = F_LEAF; else flags = 0; GISTInitBuffer(buffer, flags); /* and fill it */ gistfillbuffer(page, tuples, num, FirstOffsetNumber); if (blkno == GIST_ROOT_BLKNO) { GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; GistPageSetNSN(page, xldata->orignsn); GistClearFollowRight(page); } else { if (i < xldata->npage - 1) { BlockNumber nextblkno; XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno); GistPageGetOpaque(page)->rightlink = nextblkno; } else GistPageGetOpaque(page)->rightlink = xldata->origrlink; GistPageSetNSN(page, xldata->orignsn); if (i < xldata->npage - 1 && !isrootsplit && xldata->markfollowright) GistMarkFollowRight(page); else GistClearFollowRight(page); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); if (i == 0) firstbuffer = buffer; else UnlockReleaseBuffer(buffer); } /* Fix follow-right data on left child page, if any */ if (XLogRecHasBlockRef(record, 0)) gistRedoClearFollowRight(record, 0); /* Finally, release lock on the first page */ UnlockReleaseBuffer(firstbuffer); }
static void spgRedoAddNode(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddNode); innerTuple = ptr; /* the tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); fillFakeState(&state, xldata->stateSrc); if (!XLogRecHasBlockRef(record, 1)) { /* update in place */ Assert(xldata->parentBlk == -1); if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", innerTupleHdr.size); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else { BlockNumber blkno; BlockNumber blknoNew; XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno); XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew); /* * In normal operation we would have all three pages (source, dest, * and parent) locked simultaneously; but in WAL replay it should be * safe to update them one at a time, as long as we do it in the right * order. We must insert the new tuple before replacing the old tuple * with the redirect tuple. */ /* Install new tuple first so redirect is valid */ if (xldata->newPage) { /* AddNode is not used for nulls pages */ buffer = XLogInitBufferForRedo(record, 1); SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumNew); /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 1) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete old tuple, replacing it with redirect or placeholder tuple */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { SpGistDeadTuple dt; page = BufferGetPage(buffer); if (state.isBuild) dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); else dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, blknoNew, xldata->offnumNew); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", dt->size); if (state.isBuild) SpGistPageGetOpaque(page)->nPlaceholder++; else SpGistPageGetOpaque(page)->nRedirection++; /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 0) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* * Update parent downlink (if we didn't do it as part of the source or * destination page update already). */ if (xldata->parentBlk == 2) { if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parentTuple; page = BufferGetPage(buffer); parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } } }
static void spgRedoSplitTuple(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr; char *prefixTuple; SpGistInnerTupleData prefixTupleHdr; char *postfixTuple; SpGistInnerTupleData postfixTupleHdr; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogSplitTuple); prefixTuple = ptr; /* the prefix tuple is unaligned, so make a copy to access its header */ memcpy(&prefixTupleHdr, prefixTuple, sizeof(SpGistInnerTupleData)); ptr += prefixTupleHdr.size; postfixTuple = ptr; /* postfix tuple is also unaligned */ memcpy(&postfixTupleHdr, postfixTuple, sizeof(SpGistInnerTupleData)); /* * In normal operation we would have both pages locked simultaneously; but * in WAL replay it should be safe to update them one at a time, as long * as we do it in the right order. */ /* insert postfix tuple first to avoid dangling link */ if (!xldata->postfixBlkSame) { if (xldata->newPage) { buffer = XLogInitBufferForRedo(record, 1); /* SplitTuple is not used for nulls pages */ SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } /* now handle the original page */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnumPrefix); if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size, xldata->offnumPrefix, false, false) != xldata->offnumPrefix) elog(ERROR, "failed to add item of size %u to SPGiST index page", prefixTupleHdr.size); if (xldata->postfixBlkSame) addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoPickSplit(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; uint8 *leafPageSelect; Buffer srcBuffer; Buffer destBuffer; Buffer innerBuffer; Page srcPage; Page destPage; Page page; int i; BlockNumber blknoInner; XLogRedoAction action; XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner); fillFakeState(&state, xldata->stateSrc); ptr += SizeOfSpgxlogPickSplit; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDelete; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nInsert; leafPageSelect = (uint8 *) ptr; ptr += sizeof(uint8) * xldata->nInsert; innerTuple = ptr; /* the inner tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); ptr += innerTupleHdr.size; /* now ptr points to the list of leaf tuples */ if (xldata->isRootSplit) { /* when splitting root, we touch it only in the guise of new inner */ srcBuffer = InvalidBuffer; srcPage = NULL; } else if (xldata->initSrc) { /* just re-init the source page */ srcBuffer = XLogInitBufferForRedo(record, 0); srcPage = (Page) BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * Delete the specified tuples from source page. (In case we're in * Hot Standby, we need to hold lock on the page till we're done * inserting leaf tuples and the new inner tuple, else the added * redirect tuple will be a dangling link.) */ srcPage = NULL; if (XLogReadBufferForRedo(record, 0, &srcBuffer) == BLK_NEEDS_REDO) { srcPage = BufferGetPage(srcBuffer); /* * We have it a bit easier here than in doPickSplit(), because we * know the inner tuple's location already, so we can inject the * correct redirection tuple now. */ if (!state.isBuild) spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_REDIRECT, SPGIST_PLACEHOLDER, blknoInner, xldata->offnumInner); else spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* don't update LSN etc till we're done with it */ } } /* try to access dest page if any */ if (!XLogRecHasBlockRef(record, 1)) { destBuffer = InvalidBuffer; destPage = NULL; } else if (xldata->initDest) { /* just re-init the dest page */ destBuffer = XLogInitBufferForRedo(record, 1); destPage = (Page) BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * We could probably release the page lock immediately in the * full-page-image case, but for safety let's hold it till later. */ if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO) destPage = (Page) BufferGetPage(destBuffer); else destPage = NULL; /* don't do any page updates */ } /* restore leaf tuples to src and/or dest page */ for (i = 0; i < xldata->nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* the tuples are not aligned, so must copy to access the size field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); ptr += leafTupleHdr.size; page = leafPageSelect[i] ? destPage : srcPage; if (page == NULL) continue; /* no need to touch this page */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); } /* Now update src and dest page LSNs if needed */ if (srcPage != NULL) { PageSetLSN(srcPage, lsn); MarkBufferDirty(srcBuffer); } if (destPage != NULL) { PageSetLSN(destPage, lsn); MarkBufferDirty(destBuffer); } /* restore new inner tuple */ if (xldata->initInner) { innerBuffer = XLogInitBufferForRedo(record, 2); SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 2, &innerBuffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(innerBuffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumInner); /* if inner is also parent, update link while we're here */ if (xldata->innerIsParent) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, blknoInner, xldata->offnumInner); } PageSetLSN(page, lsn); MarkBufferDirty(innerBuffer); } if (BufferIsValid(innerBuffer)) UnlockReleaseBuffer(innerBuffer); /* * Now we can release the leaf-page locks. It's okay to do this before * updating the parent downlink. */ if (BufferIsValid(srcBuffer)) UnlockReleaseBuffer(srcBuffer); if (BufferIsValid(destBuffer)) UnlockReleaseBuffer(destBuffer); /* update parent downlink, unless we did it above */ if (XLogRecHasBlockRef(record, 3)) { Buffer parentBuffer; if (XLogReadBufferForRedo(record, 3, &parentBuffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parent; page = BufferGetPage(parentBuffer); parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, blknoInner, xldata->offnumInner); PageSetLSN(page, lsn); MarkBufferDirty(parentBuffer); } if (BufferIsValid(parentBuffer)) UnlockReleaseBuffer(parentBuffer); } else Assert(xldata->innerIsParent || xldata->isRootSplit); }
static void ginRedoUpdateMetapage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record); Buffer metabuffer; Page metapage; Buffer buffer; /* * Restore the metapage. This is essentially the same as a full-page * image, so restore the metapage unconditionally without looking at the * LSN, to avoid torn page hazards. */ metabuffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO); metapage = BufferGetPage(metabuffer); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); PageSetLSN(metapage, lsn); MarkBufferDirty(metabuffer); if (data->ntuples > 0) { /* * insert into tail page */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); OffsetNumber off; int i; Size tupsize; char *payload; IndexTuple tuples; Size totaltupsize; payload = XLogRecGetBlockData(record, 1, &totaltupsize); tuples = (IndexTuple) payload; if (PageIsEmpty(page)) off = FirstOffsetNumber; else off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); if (PageAddItem(page, (Item) tuples, tupsize, off, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); off++; } Assert(payload + totaltupsize == (char *) tuples); /* * Increase counter of heap tuples */ GinPageGetOpaque(page)->maxoff++; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else if (data->prevTail != InvalidBlockNumber) { /* * New tail */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); GinPageGetOpaque(page)->rightlink = data->newRightlink; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } UnlockReleaseBuffer(metabuffer); }
/* * Common part of an insert or update. Inserts the new tuple and updates the * revmap. */ static void brin_xlog_insert_update(XLogReaderState *record, xl_brin_insert *xlrec) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; BlockNumber regpgno; Page page; XLogRedoAction action; /* * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ if (XLogRecGetInfo(record) & XLOG_BRIN_INIT_PAGE) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); brin_page_init(page, BRIN_PAGETYPE_REGULAR); action = BLK_NEEDS_REDO; } else { action = XLogReadBufferForRedo(record, 0, &buffer); } /* need this page's blkno to store in revmap */ regpgno = BufferGetBlockNumber(buffer); /* insert the index item into the page */ if (action == BLK_NEEDS_REDO) { OffsetNumber offnum; BrinTuple *tuple; Size tuplen; tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen); Assert(tuple->bt_blkno == xlrec->heapBlk); page = (Page) BufferGetPage(buffer); offnum = xlrec->offnum; if (PageGetMaxOffsetNumber(page) + 1 < offnum) elog(PANIC, "brin_xlog_insert_update: invalid max offset number"); offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false); if (offnum == InvalidOffsetNumber) elog(PANIC, "brin_xlog_insert_update: failed to add tuple"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* update the revmap */ action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { ItemPointerData tid; ItemPointerSet(&tid, regpgno, xlrec->offnum); page = (Page) BufferGetPage(buffer); brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, tid); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* XXX no FSM updates here ... */ }
static void btree_xlog_unlink_page(uint8 info, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record); BlockNumber leftsib; BlockNumber rightsib; Buffer buffer; Page page; BTPageOpaque pageop; leftsib = xlrec->leftsib; rightsib = xlrec->rightsib; /* * In normal operation, we would lock all the pages this WAL record * touches before changing any of them. In WAL replay, it should be okay * to lock just one page at a time, since no concurrent index updates can * be happening, and readers should not care whether they arrive at the * target page or not (since it's surely empty). */ /* Fix left-link of right sibling */ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = leftsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Fix right-link of left sibling, if any */ if (leftsib != P_NONE) { if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_next = rightsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } /* Rewrite target page as empty deleted page */ buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = leftsib; pageop->btpo_next = rightsib; pageop->btpo.xact = xlrec->btpo_xact; pageop->btpo_flags = BTP_DELETED; pageop->btpo_cycleid = 0; PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); /* * If we deleted a parent of the targeted leaf page, instead of the leaf * itself, update the leaf to point to the next remaining child in the * branch. */ if (XLogRecHasBlockRef(record, 3)) { /* * There is no real data on the page, so we just re-create it from * scratch using the information from the WAL record. */ IndexTupleData trunctuple; buffer = XLogInitBufferForRedo(record, 3); page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF; pageop->btpo_prev = xlrec->leafleftsib; pageop->btpo_next = xlrec->leafrightsib; pageop->btpo.level = 0; pageop->btpo_cycleid = 0; /* Add a dummy hikey item */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); } /* Update metapage if needed */ if (info == XLOG_BTREE_UNLINK_PAGE_META) _bt_restore_meta(record, 4); }
/* * replay addition of overflow page for hash index */ static void hash_xlog_add_ovfl_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record); Buffer leftbuf; Buffer ovflbuf; Buffer metabuf; BlockNumber leftblk; BlockNumber rightblk; BlockNumber newmapblk = InvalidBlockNumber; Page ovflpage; HashPageOpaque ovflopaque; uint32 *num_bucket; char *data; Size datalen PG_USED_FOR_ASSERTS_ONLY; bool new_bmpage = false; XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk); XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk); ovflbuf = XLogInitBufferForRedo(record, 0); Assert(BufferIsValid(ovflbuf)); data = XLogRecGetBlockData(record, 0, &datalen); num_bucket = (uint32 *) data; Assert(datalen == sizeof(uint32)); _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE, true); /* update backlink */ ovflpage = BufferGetPage(ovflbuf); ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); ovflopaque->hasho_prevblkno = leftblk; PageSetLSN(ovflpage, lsn); MarkBufferDirty(ovflbuf); if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO) { Page leftpage; HashPageOpaque leftopaque; leftpage = BufferGetPage(leftbuf); leftopaque = (HashPageOpaque) PageGetSpecialPointer(leftpage); leftopaque->hasho_nextblkno = rightblk; PageSetLSN(leftpage, lsn); MarkBufferDirty(leftbuf); } if (BufferIsValid(leftbuf)) UnlockReleaseBuffer(leftbuf); UnlockReleaseBuffer(ovflbuf); /* * Note: in normal operation, we'd update the bitmap and meta page while * still holding lock on the overflow pages. But during replay it's not * necessary to hold those locks, since no other index updates can be * happening concurrently. */ if (XLogRecHasBlockRef(record, 2)) { Buffer mapbuffer; if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO) { Page mappage = (Page) BufferGetPage(mapbuffer); uint32 *freep = NULL; char *data; uint32 *bitmap_page_bit; freep = HashPageGetBitmap(mappage); data = XLogRecGetBlockData(record, 2, &datalen); bitmap_page_bit = (uint32 *) data; SETBIT(freep, *bitmap_page_bit); PageSetLSN(mappage, lsn); MarkBufferDirty(mapbuffer); } if (BufferIsValid(mapbuffer)) UnlockReleaseBuffer(mapbuffer); } if (XLogRecHasBlockRef(record, 3)) { Buffer newmapbuf; newmapbuf = XLogInitBufferForRedo(record, 3); _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true); new_bmpage = true; newmapblk = BufferGetBlockNumber(newmapbuf); MarkBufferDirty(newmapbuf); PageSetLSN(BufferGetPage(newmapbuf), lsn); UnlockReleaseBuffer(newmapbuf); } if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO) { HashMetaPage metap; Page page; uint32 *firstfree_ovflpage; data = XLogRecGetBlockData(record, 4, &datalen); firstfree_ovflpage = (uint32 *) data; page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); metap->hashm_firstfree = *firstfree_ovflpage; if (!xlrec->bmpage_found) { metap->hashm_spares[metap->hashm_ovflpoint]++; if (new_bmpage) { Assert(BlockNumberIsValid(newmapblk)); metap->hashm_mapp[metap->hashm_nmaps] = newmapblk; metap->hashm_nmaps++; metap->hashm_spares[metap->hashm_ovflpoint]++; } } PageSetLSN(page, lsn); MarkBufferDirty(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
static void btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record); Buffer buffer; Page page; BTPageOpaque pageop; IndexTupleData trunctuple; /* * In normal operation, we would lock all the pages this WAL record * touches before changing any of them. In WAL replay, it should be okay * to lock just one page at a time, since no concurrent index updates can * be happening, and readers should not care whether they arrive at the * target page or not (since it's surely empty). */ /* parent page */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { OffsetNumber poffset; ItemId itemid; IndexTuple itup; OffsetNumber nextoffset; BlockNumber rightsib; page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); poffset = xlrec->poffset; nextoffset = OffsetNumberNext(poffset); itemid = PageGetItemId(page, nextoffset); itup = (IndexTuple) PageGetItem(page, itemid); rightsib = ItemPointerGetBlockNumber(&itup->t_tid); itemid = PageGetItemId(page, poffset); itup = (IndexTuple) PageGetItem(page, itemid); ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY); nextoffset = OffsetNumberNext(poffset); PageIndexTupleDelete(page, nextoffset); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Rewrite the leaf page as a halfdead page */ buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = xlrec->leftblk; pageop->btpo_next = xlrec->rightblk; pageop->btpo.level = 0; pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF; pageop->btpo_cycleid = 0; /* * Construct a dummy hikey item that points to the next parent to be * deleted (if any). */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); bool isleaf = (xlrec->level == 0); Buffer lbuf; Buffer rbuf; Page rpage; BTPageOpaque ropaque; char *datapos; Size datalen; Item left_hikey = NULL; Size left_hikeysz = 0; BlockNumber leftsib; BlockNumber rightsib; BlockNumber rnext; XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib); XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib); if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext)) rnext = P_NONE; /* * Clear the incomplete split flag on the left sibling of the child page * this is a downlink for. (Like in btree_xlog_insert, this can be done * before locking the other pages) */ if (!isleaf) _bt_clear_incomplete_split(record, 3); /* Reconstruct right (new) sibling page from scratch */ rbuf = XLogInitBufferForRedo(record, 1); datapos = XLogRecGetBlockData(record, 1, &datalen); rpage = (Page) BufferGetPage(rbuf); _bt_pageinit(rpage, BufferGetPageSize(rbuf)); ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); ropaque->btpo_prev = leftsib; ropaque->btpo_next = rnext; ropaque->btpo.level = xlrec->level; ropaque->btpo_flags = isleaf ? BTP_LEAF : 0; ropaque->btpo_cycleid = 0; _bt_restore_page(rpage, datapos, datalen); /* * On leaf level, the high key of the left page is equal to the first key * on the right page. */ if (isleaf) { ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque)); left_hikey = PageGetItem(rpage, hiItemId); left_hikeysz = ItemIdGetLength(hiItemId); } PageSetLSN(rpage, lsn); MarkBufferDirty(rbuf); /* don't release the buffer yet; we touch right page's first item below */ /* Now reconstruct left (original) sibling page */ if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO) { /* * To retain the same physical order of the tuples that they had, we * initialize a temporary empty page for the left page and add all the * items to that in item number order. This mirrors how _bt_split() * works. It's not strictly required to retain the same physical * order, as long as the items are in the correct item number order, * but it helps debugging. See also _bt_restore_page(), which does * the same for the right page. */ Page lpage = (Page) BufferGetPage(lbuf); BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage); OffsetNumber off; Item newitem = NULL; Size newitemsz = 0; Page newlpage; OffsetNumber leftoff; datapos = XLogRecGetBlockData(record, 0, &datalen); if (onleft) { newitem = (Item) datapos; newitemsz = MAXALIGN(IndexTupleSize(newitem)); datapos += newitemsz; datalen -= newitemsz; } /* Extract left hikey and its size (assuming 16-bit alignment) */ if (!isleaf) { left_hikey = (Item) datapos; left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey)); datapos += left_hikeysz; datalen -= left_hikeysz; } Assert(datalen == 0); newlpage = PageGetTempPageCopySpecial(lpage); /* Set high key */ leftoff = P_HIKEY; if (PageAddItem(newlpage, left_hikey, left_hikeysz, P_HIKEY, false, false) == InvalidOffsetNumber) elog(PANIC, "failed to add high key to left page after split"); leftoff = OffsetNumberNext(leftoff); for (off = P_FIRSTDATAKEY(lopaque); off < xlrec->firstright; off++) { ItemId itemid; Size itemsz; Item item; /* add the new item if it was inserted on left page */ if (onleft && off == xlrec->newitemoff) { if (PageAddItem(newlpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add new item to left page after split"); leftoff = OffsetNumberNext(leftoff); } itemid = PageGetItemId(lpage, off); itemsz = ItemIdGetLength(itemid); item = PageGetItem(lpage, itemid); if (PageAddItem(newlpage, item, itemsz, leftoff, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add old item to left page after split"); leftoff = OffsetNumberNext(leftoff); } /* cope with possibility that newitem goes at the end */ if (onleft && off == xlrec->newitemoff) { if (PageAddItem(newlpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add new item to left page after split"); leftoff = OffsetNumberNext(leftoff); } PageRestoreTempPage(newlpage, lpage); /* Fix opaque fields */ lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT; if (isleaf) lopaque->btpo_flags |= BTP_LEAF; lopaque->btpo_next = rightsib; lopaque->btpo_cycleid = 0; PageSetLSN(lpage, lsn); MarkBufferDirty(lbuf); } /* We no longer need the buffers */ if (BufferIsValid(lbuf)) UnlockReleaseBuffer(lbuf); UnlockReleaseBuffer(rbuf); /* * Fix left-link of the page to the right of the new right sibling. * * Note: in normal operation, we do this while still holding lock on the * two split pages. However, that's not necessary for correctness in WAL * replay, because no other index update can be in progress, and readers * will cope properly when following an obsolete left-link. */ if (rnext != P_NONE) { Buffer buffer; if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { Page page = (Page) BufferGetPage(buffer); BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = rightsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
static void spgRedoMoveLeafs(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; int nInsert; Buffer buffer; Page page; XLogRedoAction action; BlockNumber blknoDst; XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst); fillFakeState(&state, xldata->stateSrc); nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1; ptr += SizeOfSpgxlogMoveLeafs; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMoves; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * nInsert; /* now ptr points to the list of leaf tuples */ /* * In normal operation we would have all three pages (source, dest, and * parent) locked simultaneously; but in WAL replay it should be safe to * update them one at a time, as long as we do it in the right order. */ /* Insert tuples on the dest page (do first, so redirect is valid) */ if (xldata->newPage) { buffer = XLogInitBufferForRedo(record, 1); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { int i; page = BufferGetPage(buffer); for (i = 0; i < nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* * the tuples are not aligned, so must copy to access the size * field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); ptr += leafTupleHdr.size; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete tuples from the source page, inserting a redirection pointer */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, SPGIST_PLACEHOLDER, blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* And update the parent downlink */ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * replay allocation of page for split operation */ static void hash_xlog_split_allocate_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record); Buffer oldbuf; Buffer newbuf; Buffer metabuf; Size datalen PG_USED_FOR_ASSERTS_ONLY; char *data; XLogRedoAction action; /* * To be consistent with normal operation, here we take cleanup locks on * both the old and new buckets even though there can't be any concurrent * inserts. */ /* replay the record for old bucket */ action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf); /* * Note that we still update the page even if it was restored from a full * page image, because the special space is not included in the image. */ if (action == BLK_NEEDS_REDO || action == BLK_RESTORED) { Page oldpage; HashPageOpaque oldopaque; oldpage = BufferGetPage(oldbuf); oldopaque = (HashPageOpaque) PageGetSpecialPointer(oldpage); oldopaque->hasho_flag = xlrec->old_bucket_flag; oldopaque->hasho_prevblkno = xlrec->new_bucket; PageSetLSN(oldpage, lsn); MarkBufferDirty(oldbuf); } /* replay the record for new bucket */ newbuf = XLogInitBufferForRedo(record, 1); _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket, xlrec->new_bucket_flag, true); if (!IsBufferCleanupOK(newbuf)) elog(PANIC, "hash_xlog_split_allocate_page: failed to acquire cleanup lock"); MarkBufferDirty(newbuf); PageSetLSN(BufferGetPage(newbuf), lsn); /* * We can release the lock on old bucket early as well but doing here to * consistent with normal operation. */ if (BufferIsValid(oldbuf)) UnlockReleaseBuffer(oldbuf); if (BufferIsValid(newbuf)) UnlockReleaseBuffer(newbuf); /* * Note: in normal operation, we'd update the meta page while still * holding lock on the old and new bucket pages. But during replay it's * not necessary to hold those locks, since no other bucket splits can be * happening concurrently. */ /* replay the record for metapage changes */ if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO) { Page page; HashMetaPage metap; page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); metap->hashm_maxbucket = xlrec->new_bucket; data = XLogRecGetBlockData(record, 2, &datalen); if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS) { uint32 lowmask; uint32 *highmask; /* extract low and high masks. */ memcpy(&lowmask, data, sizeof(uint32)); highmask = (uint32 *) ((char *) data + sizeof(uint32)); /* update metapage */ metap->hashm_lowmask = lowmask; metap->hashm_highmask = *highmask; data += sizeof(uint32) * 2; } if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT) { uint32 ovflpoint; uint32 *ovflpages; /* extract information of overflow pages. */ memcpy(&ovflpoint, data, sizeof(uint32)); ovflpages = (uint32 *) ((char *) data + sizeof(uint32)); /* update metapage */ metap->hashm_spares[ovflpoint] = *ovflpages; metap->hashm_ovflpoint = ovflpoint; } MarkBufferDirty(metabuf); PageSetLSN(BufferGetPage(metabuf), lsn); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }