/* * redo delete on gist index page to remove tuples marked as DEAD during index * tuple insertion */ static void gistRedoDeleteRecord(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; gistxlogDelete *xldata = (gistxlogDelete *) XLogRecGetData(record); Buffer buffer; Page page; /* * If we have any conflict processing to do, it must happen before we * update the page. * * GiST delete records can conflict with standby queries. You might think * that vacuum records would conflict as well, but we've handled that * already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid * cleaned by the vacuum of the heap and so we can resolve any conflicts * just once when that arrives. After that we know that no conflicts * exist from individual gist vacuum records on that index. */ if (InHotStandby) { TransactionId latestRemovedXid = gistRedoDeleteRecordGetLatestRemovedXid(record); RelFileNode rnode; XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL); ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode); } if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); if (XLogRecGetDataLen(record) > SizeOfGistxlogDelete) { OffsetNumber *todelete; todelete = (OffsetNumber *) ((char *) xldata + SizeOfGistxlogDelete); PageIndexMultiDelete(page, todelete, xldata->ntodelete); } GistClearPageHasGarbage(page); GistMarkTuplesDeleted(page); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void brin_xlog_desummarize_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_brin_desummarize *xlrec; Buffer buffer; XLogRedoAction action; xlrec = (xl_brin_desummarize *) XLogRecGetData(record); /* Update the revmap */ action = XLogReadBufferForRedo(record, 0, &buffer); if (action == BLK_NEEDS_REDO) { ItemPointerData iptr; ItemPointerSetInvalid(&iptr); brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, iptr); PageSetLSN(BufferGetPage(buffer), lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* remove the leftover entry from the regular page */ action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { Page regPg = BufferGetPage(buffer); PageIndexTupleDeleteNoCompact(regPg, xlrec->regOffset); PageSetLSN(regPg, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record); Buffer buffer; Page page; /* * Insertion to an internal page finishes an incomplete split at the child * level. Clear the incomplete-split flag in the child. Note: during * normal operation, the child and parent pages are locked at the same * time, so that clearing the flag and inserting the downlink appear * atomic to other backends. We don't bother with that during replay, * because readers don't care about the incomplete-split flag and there * cannot be updates happening. */ if (!isleaf) _bt_clear_incomplete_split(record, 1); if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { Size datalen; char *datapos = XLogRecGetBlockData(record, 0, &datalen); page = BufferGetPage(buffer); if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum, false, false) == InvalidOffsetNumber) elog(PANIC, "btree_insert_redo: failed to add item"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* * Note: in normal operation, we'd update the metapage while still holding * lock on the page we inserted into. But during replay it's not * necessary to hold that lock, since no other index updates can be * happening concurrently, and readers will cope fine with following an * obsolete link from the metapage. */ if (ismeta) _bt_restore_meta(record, 2); }
static void ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; Page page; if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * replay a hash index bitmap page */ static void hash_xlog_init_bitmap_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer bitmapbuf; Buffer metabuf; Page page; HashMetaPage metap; uint32 num_buckets; xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record); /* * Initialize bitmap page */ bitmapbuf = XLogInitBufferForRedo(record, 0); _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true); PageSetLSN(BufferGetPage(bitmapbuf), lsn); MarkBufferDirty(bitmapbuf); UnlockReleaseBuffer(bitmapbuf); /* add the new bitmap page to the metapage's list of bitmaps */ if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO) { /* * Note: in normal operation, we'd update the metapage while still * holding lock on the bitmap page. But during replay it's not * necessary to hold that lock, since nobody can see it yet; the * creating transaction hasn't yet committed. */ page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); num_buckets = metap->hashm_maxbucket + 1; metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1; metap->hashm_nmaps++; PageSetLSN(page, lsn); MarkBufferDirty(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
/* * _bt_clear_incomplete_split -- clear INCOMPLETE_SPLIT flag on a page * * This is a common subroutine of the redo functions of all the WAL record * types that can insert a downlink: insert, split, and newroot. */ static void _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id) { XLogRecPtr lsn = record->EndRecPtr; Buffer buf; if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO) { Page page = (Page) BufferGetPage(buf); BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0); pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT; PageSetLSN(page, lsn); MarkBufferDirty(buf); } if (BufferIsValid(buf)) UnlockReleaseBuffer(buf); }
/* * replay split cleanup flag operation for primary bucket page. */ static void hash_xlog_split_cleanup(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; Page page; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { HashPageOpaque bucket_opaque; page = (Page) BufferGetPage(buffer); bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page); bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * replay for update meta page */ static void hash_xlog_update_meta_page(XLogReaderState *record) { HashMetaPage metap; XLogRecPtr lsn = record->EndRecPtr; xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record); Buffer metabuf; Page page; if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO) { page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); metap->hashm_ntuples = xldata->ntuples; PageSetLSN(page, lsn); MarkBufferDirty(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
static void ginRedoVacuumDataLeafPage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); Size len; ginxlogVacuumDataLeafPage *xlrec; xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len); Assert(GinPageIsLeaf(page)); Assert(GinPageIsData(page)); ginRedoRecompress(page, &xlrec->data); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr; char *leafTuple; SpGistLeafTupleData leafTupleHdr; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddLeaf); leafTuple = ptr; /* the leaf tuple is unaligned, so make a copy to access its header */ memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); /* * In normal operation we would have both current and parent pages locked * simultaneously; but in WAL replay it should be safe to update the leaf * page before updating the parent. */ if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoLeaf, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); /* insert new tuple */ if (xldata->offnumLeaf != xldata->offnumHeadLeaf) { /* normal cases, tuple was added by SpGistPageAddNewItem */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf); /* update head tuple's chain link if needed */ if (xldata->offnumHeadLeaf != InvalidOffsetNumber) { SpGistLeafTuple head; head = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumHeadLeaf)); Assert(head->nextOffset == leafTupleHdr.nextOffset); head->nextOffset = xldata->offnumLeaf; } } else { /* replacing a DEAD tuple */ PageIndexTupleDelete(page, xldata->offnumLeaf); if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf, false, false) != xldata->offnumLeaf) elog(ERROR, "failed to add item of size %u to SPGiST index page", leafTupleHdr.size); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* update parent downlink if necessary */ if (xldata->blknoParent != InvalidBlockNumber) { if (XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoParent, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoLeaf, xldata->offnumLeaf); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
static void btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); bool isleaf = (xlrec->level == 0); Buffer lbuf; Buffer rbuf; Page rpage; BTPageOpaque ropaque; char *datapos; Size datalen; Item left_hikey = NULL; Size left_hikeysz = 0; BlockNumber leftsib; BlockNumber rightsib; BlockNumber rnext; XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib); XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib); if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext)) rnext = P_NONE; /* * Clear the incomplete split flag on the left sibling of the child page * this is a downlink for. (Like in btree_xlog_insert, this can be done * before locking the other pages) */ if (!isleaf) _bt_clear_incomplete_split(record, 3); /* Reconstruct right (new) sibling page from scratch */ rbuf = XLogInitBufferForRedo(record, 1); datapos = XLogRecGetBlockData(record, 1, &datalen); rpage = (Page) BufferGetPage(rbuf); _bt_pageinit(rpage, BufferGetPageSize(rbuf)); ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); ropaque->btpo_prev = leftsib; ropaque->btpo_next = rnext; ropaque->btpo.level = xlrec->level; ropaque->btpo_flags = isleaf ? BTP_LEAF : 0; ropaque->btpo_cycleid = 0; _bt_restore_page(rpage, datapos, datalen); /* * On leaf level, the high key of the left page is equal to the first key * on the right page. */ if (isleaf) { ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque)); left_hikey = PageGetItem(rpage, hiItemId); left_hikeysz = ItemIdGetLength(hiItemId); } PageSetLSN(rpage, lsn); MarkBufferDirty(rbuf); /* don't release the buffer yet; we touch right page's first item below */ /* Now reconstruct left (original) sibling page */ if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO) { /* * To retain the same physical order of the tuples that they had, we * initialize a temporary empty page for the left page and add all the * items to that in item number order. This mirrors how _bt_split() * works. It's not strictly required to retain the same physical * order, as long as the items are in the correct item number order, * but it helps debugging. See also _bt_restore_page(), which does * the same for the right page. */ Page lpage = (Page) BufferGetPage(lbuf); BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage); OffsetNumber off; Item newitem = NULL; Size newitemsz = 0; Page newlpage; OffsetNumber leftoff; datapos = XLogRecGetBlockData(record, 0, &datalen); if (onleft) { newitem = (Item) datapos; newitemsz = MAXALIGN(IndexTupleSize(newitem)); datapos += newitemsz; datalen -= newitemsz; } /* Extract left hikey and its size (assuming 16-bit alignment) */ if (!isleaf) { left_hikey = (Item) datapos; left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey)); datapos += left_hikeysz; datalen -= left_hikeysz; } Assert(datalen == 0); newlpage = PageGetTempPageCopySpecial(lpage); /* Set high key */ leftoff = P_HIKEY; if (PageAddItem(newlpage, left_hikey, left_hikeysz, P_HIKEY, false, false) == InvalidOffsetNumber) elog(PANIC, "failed to add high key to left page after split"); leftoff = OffsetNumberNext(leftoff); for (off = P_FIRSTDATAKEY(lopaque); off < xlrec->firstright; off++) { ItemId itemid; Size itemsz; Item item; /* add the new item if it was inserted on left page */ if (onleft && off == xlrec->newitemoff) { if (PageAddItem(newlpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add new item to left page after split"); leftoff = OffsetNumberNext(leftoff); } itemid = PageGetItemId(lpage, off); itemsz = ItemIdGetLength(itemid); item = PageGetItem(lpage, itemid); if (PageAddItem(newlpage, item, itemsz, leftoff, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add old item to left page after split"); leftoff = OffsetNumberNext(leftoff); } /* cope with possibility that newitem goes at the end */ if (onleft && off == xlrec->newitemoff) { if (PageAddItem(newlpage, newitem, newitemsz, leftoff, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add new item to left page after split"); leftoff = OffsetNumberNext(leftoff); } PageRestoreTempPage(newlpage, lpage); /* Fix opaque fields */ lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT; if (isleaf) lopaque->btpo_flags |= BTP_LEAF; lopaque->btpo_next = rightsib; lopaque->btpo_cycleid = 0; PageSetLSN(lpage, lsn); MarkBufferDirty(lbuf); } /* We no longer need the buffers */ if (BufferIsValid(lbuf)) UnlockReleaseBuffer(lbuf); UnlockReleaseBuffer(rbuf); /* * Fix left-link of the page to the right of the new right sibling. * * Note: in normal operation, we do this while still holding lock on the * two split pages. However, that's not necessary for correctness in WAL * replay, because no other index update can be in progress, and readers * will cope properly when following an obsolete left-link. */ if (rnext != P_NONE) { Buffer buffer; if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { Page page = (Page) BufferGetPage(buffer); BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = rightsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
static void spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; Buffer buffer; Page page; int bbi; XLogRedoAction action; ptr += sizeof(spgxlogAddNode); innerTuple = ptr; /* the tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); fillFakeState(&state, xldata->stateSrc); if (xldata->blknoNew == InvalidBlockNumber) { /* update in place */ Assert(xldata->blknoParent == InvalidBlockNumber); if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", innerTupleHdr.size); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else { /* * In normal operation we would have all three pages (source, dest, * and parent) locked simultaneously; but in WAL replay it should be * safe to update them one at a time, as long as we do it in the right * order. * * The logic here depends on the assumption that blkno != blknoNew, * else we can't tell which BKP bit goes with which page, and the LSN * checks could go wrong too. */ Assert(xldata->blkno != xldata->blknoNew); /* Install new tuple first so redirect is valid */ if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true); /* AddNode is not used for nulls pages */ SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoNew, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumNew); /* * If parent is in this same page, don't advance LSN; doing so * would fool us into not applying the parent downlink update * below. We'll update the LSN when we fix the parent downlink. */ if (xldata->blknoParent != xldata->blknoNew) { PageSetLSN(page, lsn); } MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete old tuple, replacing it with redirect or placeholder tuple */ if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, &buffer) == BLK_NEEDS_REDO) { SpGistDeadTuple dt; page = BufferGetPage(buffer); if (state.isBuild) dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); else dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, xldata->blknoNew, xldata->offnumNew); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", dt->size); if (state.isBuild) SpGistPageGetOpaque(page)->nPlaceholder++; else SpGistPageGetOpaque(page)->nRedirection++; /* * If parent is in this same page, don't advance LSN; doing so * would fool us into not applying the parent downlink update * below. We'll update the LSN when we fix the parent downlink. */ if (xldata->blknoParent != xldata->blkno) { PageSetLSN(page, lsn); } MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* * Update parent downlink. Since parent could be in either of the * previous two buffers, it's a bit tricky to determine which BKP bit * applies. */ if (xldata->blknoParent == xldata->blkno) bbi = 0; else if (xldata->blknoParent == xldata->blknoNew) bbi = 1; else bbi = 2; if (record->xl_info & XLR_BKP_BLOCK(bbi)) { if (bbi == 2) /* else we already did it */ (void) RestoreBackupBlock(lsn, record, bbi, false, false); action = BLK_RESTORED; buffer = InvalidBuffer; } else { action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoParent, &buffer); Assert(action != BLK_RESTORED); } if (action == BLK_NEEDS_REDO) { SpGistInnerTuple innerTuple; page = BufferGetPage(buffer); innerTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(innerTuple, xldata->nodeI, xldata->blknoNew, xldata->offnumNew); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
static void spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; int nInsert; Buffer buffer; Page page; XLogRedoAction action; fillFakeState(&state, xldata->stateSrc); nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1; ptr += SizeOfSpgxlogMoveLeafs; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMoves; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * nInsert; /* now ptr points to the list of leaf tuples */ /* * In normal operation we would have all three pages (source, dest, and * parent) locked simultaneously; but in WAL replay it should be safe to * update them one at a time, as long as we do it in the right order. */ /* Insert tuples on the dest page (do first, so redirect is valid) */ if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoDst, &buffer); if (action == BLK_NEEDS_REDO) { int i; page = BufferGetPage(buffer); for (i = 0; i < nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* * the tuples are not aligned, so must copy to access the size * field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); ptr += leafTupleHdr.size; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete tuples from the source page, inserting a redirection pointer */ if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* And update the parent downlink */ if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, xldata->blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr; OffsetNumber *itemToPlaceholder; Buffer buffer; itemToPlaceholder = xldata->offsets; /* * If any redirection tuples are being removed, make sure there are no * live Hot Standby transactions that might need to see them. */ if (InHotStandby) { if (TransactionIdIsValid(xldata->newestRedirectXid)) ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid, xldata->node); } if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); SpGistPageOpaque opaque = SpGistPageGetOpaque(page); int i; /* Convert redirect pointers to plain placeholders */ for (i = 0; i < xldata->nToPlaceholder; i++) { SpGistDeadTuple dt; dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, itemToPlaceholder[i])); Assert(dt->tupstate == SPGIST_REDIRECT); dt->tupstate = SPGIST_PLACEHOLDER; ItemPointerSetInvalid(&dt->pointer); } Assert(opaque->nRedirection >= xldata->nToPlaceholder); opaque->nRedirection -= xldata->nToPlaceholder; opaque->nPlaceholder += xldata->nToPlaceholder; /* Remove placeholder tuples at end of page */ if (xldata->firstPlaceholder != InvalidOffsetNumber) { int max = PageGetMaxOffsetNumber(page); OffsetNumber *toDelete; toDelete = palloc(sizeof(OffsetNumber) * max); for (i = xldata->firstPlaceholder; i <= max; i++) toDelete[i - xldata->firstPlaceholder] = i; i = max - xldata->firstPlaceholder + 1; Assert(opaque->nPlaceholder >= i); opaque->nPlaceholder -= i; /* The array is sorted, so can use PageIndexMultiDelete */ PageIndexMultiDelete(page, toDelete, i); pfree(toDelete); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr; char *prefixTuple; SpGistInnerTupleData prefixTupleHdr; char *postfixTuple; SpGistInnerTupleData postfixTupleHdr; Buffer buffer; Page page; ptr += sizeof(spgxlogSplitTuple); prefixTuple = ptr; /* the prefix tuple is unaligned, so make a copy to access its header */ memcpy(&prefixTupleHdr, prefixTuple, sizeof(SpGistInnerTupleData)); ptr += prefixTupleHdr.size; postfixTuple = ptr; /* postfix tuple is also unaligned */ memcpy(&postfixTupleHdr, postfixTuple, sizeof(SpGistInnerTupleData)); /* * In normal operation we would have both pages locked simultaneously; but * in WAL replay it should be safe to update them one at a time, as long * as we do it in the right order. */ /* insert postfix tuple first to avoid dangling link */ if (xldata->blknoPostfix != xldata->blknoPrefix) { XLogRedoAction action; if (xldata->newPage) { buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true); /* SplitTuple is not used for nulls pages */ SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, 1, xldata->node, xldata->blknoPostfix, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } /* now handle the original page */ if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnumPrefix); if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size, xldata->offnumPrefix, false, false) != xldata->offnumPrefix) elog(ERROR, "failed to add item of size %u to SPGiST index page", prefixTupleHdr.size); if (xldata->blknoPostfix == xldata->blknoPrefix) addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; uint8 *leafPageSelect; Buffer srcBuffer; Buffer destBuffer; Page srcPage; Page destPage; Buffer innerBuffer; Page page; int bbi; int i; XLogRedoAction action; fillFakeState(&state, xldata->stateSrc); ptr += SizeOfSpgxlogPickSplit; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDelete; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nInsert; leafPageSelect = (uint8 *) ptr; ptr += sizeof(uint8) * xldata->nInsert; innerTuple = ptr; /* the inner tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); ptr += innerTupleHdr.size; /* now ptr points to the list of leaf tuples */ /* * It's a bit tricky to identify which pages have been handled as * full-page images, so we explicitly count each referenced buffer. */ bbi = 0; if (SpGistBlockIsRoot(xldata->blknoSrc)) { /* when splitting root, we touch it only in the guise of new inner */ srcBuffer = InvalidBuffer; srcPage = NULL; } else if (xldata->initSrc) { /* just re-init the source page */ srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true); Assert(BufferIsValid(srcBuffer)); srcPage = (Page) BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * Delete the specified tuples from source page. (In case we're in * Hot Standby, we need to hold lock on the page till we're done * inserting leaf tuples and the new inner tuple, else the added * redirect tuple will be a dangling link.) */ if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoSrc, &srcBuffer) == BLK_NEEDS_REDO) { srcPage = BufferGetPage(srcBuffer); /* * We have it a bit easier here than in doPickSplit(), because we * know the inner tuple's location already, so we can inject the * correct redirection tuple now. */ if (!state.isBuild) spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_REDIRECT, SPGIST_PLACEHOLDER, xldata->blknoInner, xldata->offnumInner); else spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* don't update LSN etc till we're done with it */ } else { srcPage = NULL; /* don't do any page updates */ } bbi++; } /* try to access dest page if any */ if (xldata->blknoDest == InvalidBlockNumber) { destBuffer = InvalidBuffer; destPage = NULL; } else if (xldata->initDest) { /* just re-init the dest page */ destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true); Assert(BufferIsValid(destBuffer)); destPage = (Page) BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * We could probably release the page lock immediately in the * full-page-image case, but for safety let's hold it till later. */ if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoDest, &destBuffer) == BLK_NEEDS_REDO) { destPage = (Page) BufferGetPage(destBuffer); } else { destPage = NULL; /* don't do any page updates */ } bbi++; } /* restore leaf tuples to src and/or dest page */ for (i = 0; i < xldata->nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* the tuples are not aligned, so must copy to access the size field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); ptr += leafTupleHdr.size; page = leafPageSelect[i] ? destPage : srcPage; if (page == NULL) continue; /* no need to touch this page */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); } /* Now update src and dest page LSNs if needed */ if (srcPage != NULL) { PageSetLSN(srcPage, lsn); MarkBufferDirty(srcBuffer); } if (destPage != NULL) { PageSetLSN(destPage, lsn); MarkBufferDirty(destBuffer); } /* restore new inner tuple */ if (xldata->initInner) { innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true); SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoInner, &innerBuffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(innerBuffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumInner); /* if inner is also parent, update link while we're here */ if (xldata->blknoInner == xldata->blknoParent) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); } PageSetLSN(page, lsn); MarkBufferDirty(innerBuffer); } if (BufferIsValid(innerBuffer)) UnlockReleaseBuffer(innerBuffer); bbi++; /* * Now we can release the leaf-page locks. It's okay to do this before * updating the parent downlink. */ if (BufferIsValid(srcBuffer)) UnlockReleaseBuffer(srcBuffer); if (BufferIsValid(destBuffer)) UnlockReleaseBuffer(destBuffer); /* update parent downlink, unless we did it above */ if (xldata->blknoParent == InvalidBlockNumber) { /* no parent cause we split the root */ Assert(SpGistBlockIsRoot(xldata->blknoInner)); } else if (xldata->blknoInner != xldata->blknoParent) { Buffer parentBuffer; if (XLogReadBufferForRedo(lsn, record, bbi, xldata->node, xldata->blknoParent, &parentBuffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parent; page = BufferGetPage(parentBuffer); parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, xldata->blknoInner, xldata->offnumInner); PageSetLSN(page, lsn); MarkBufferDirty(parentBuffer); } if (BufferIsValid(parentBuffer)) UnlockReleaseBuffer(parentBuffer); } }
/* * Common part of an insert or update. Inserts the new tuple and updates the * revmap. */ static void brin_xlog_insert_update(XLogReaderState *record, xl_brin_insert *xlrec) { XLogRecPtr lsn = record->EndRecPtr; Buffer buffer; BlockNumber regpgno; Page page; XLogRedoAction action; /* * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ if (XLogRecGetInfo(record) & XLOG_BRIN_INIT_PAGE) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); brin_page_init(page, BRIN_PAGETYPE_REGULAR); action = BLK_NEEDS_REDO; } else { action = XLogReadBufferForRedo(record, 0, &buffer); } /* need this page's blkno to store in revmap */ regpgno = BufferGetBlockNumber(buffer); /* insert the index item into the page */ if (action == BLK_NEEDS_REDO) { OffsetNumber offnum; BrinTuple *tuple; Size tuplen; tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen); Assert(tuple->bt_blkno == xlrec->heapBlk); page = (Page) BufferGetPage(buffer); offnum = xlrec->offnum; if (PageGetMaxOffsetNumber(page) + 1 < offnum) elog(PANIC, "brin_xlog_insert_update: invalid max offset number"); offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false); if (offnum == InvalidOffsetNumber) elog(PANIC, "brin_xlog_insert_update: failed to add tuple"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* update the revmap */ action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { ItemPointerData tid; ItemPointerSet(&tid, regpgno, xlrec->offnum); page = (Page) BufferGetPage(buffer); brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, tid); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* XXX no FSM updates here ... */ }
static void ginRedoUpdateMetapage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record); Buffer metabuffer; Page metapage; Buffer buffer; /* * Restore the metapage. This is essentially the same as a full-page * image, so restore the metapage unconditionally without looking at the * LSN, to avoid torn page hazards. */ metabuffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO); metapage = BufferGetPage(metabuffer); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); PageSetLSN(metapage, lsn); MarkBufferDirty(metabuffer); if (data->ntuples > 0) { /* * insert into tail page */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); OffsetNumber off; int i; Size tupsize; char *payload; IndexTuple tuples; Size totaltupsize; payload = XLogRecGetBlockData(record, 1, &totaltupsize); tuples = (IndexTuple) payload; if (PageIsEmpty(page)) off = FirstOffsetNumber; else off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); if (PageAddItem(page, (Item) tuples, tupsize, off, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); off++; } Assert(payload + totaltupsize == (char *) tuples); /* * Increase counter of heap tuples */ GinPageGetOpaque(page)->maxoff++; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else if (data->prevTail != InvalidBlockNumber) { /* * New tail */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); GinPageGetOpaque(page)->rightlink = data->newRightlink; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } UnlockReleaseBuffer(metabuffer); }
static void spgRedoAddNode(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddNode); innerTuple = ptr; /* the tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); fillFakeState(&state, xldata->stateSrc); if (!XLogRecHasBlockRef(record, 1)) { /* update in place */ Assert(xldata->parentBlk == -1); if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", innerTupleHdr.size); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else { BlockNumber blkno; BlockNumber blknoNew; XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno); XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew); /* * In normal operation we would have all three pages (source, dest, * and parent) locked simultaneously; but in WAL replay it should be * safe to update them one at a time, as long as we do it in the right * order. We must insert the new tuple before replacing the old tuple * with the redirect tuple. */ /* Install new tuple first so redirect is valid */ if (xldata->newPage) { /* AddNode is not used for nulls pages */ buffer = XLogInitBufferForRedo(record, 1); SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumNew); /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 1) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete old tuple, replacing it with redirect or placeholder tuple */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { SpGistDeadTuple dt; page = BufferGetPage(buffer); if (state.isBuild) dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); else dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, blknoNew, xldata->offnumNew); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", dt->size); if (state.isBuild) SpGistPageGetOpaque(page)->nPlaceholder++; else SpGistPageGetOpaque(page)->nRedirection++; /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 0) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* * Update parent downlink (if we didn't do it as part of the source or * destination page update already). */ if (xldata->parentBlk == 2) { if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parentTuple; page = BufferGetPage(buffer); parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } } }
/* * replay addition of overflow page for hash index */ static void hash_xlog_add_ovfl_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record); Buffer leftbuf; Buffer ovflbuf; Buffer metabuf; BlockNumber leftblk; BlockNumber rightblk; BlockNumber newmapblk = InvalidBlockNumber; Page ovflpage; HashPageOpaque ovflopaque; uint32 *num_bucket; char *data; Size datalen PG_USED_FOR_ASSERTS_ONLY; bool new_bmpage = false; XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk); XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk); ovflbuf = XLogInitBufferForRedo(record, 0); Assert(BufferIsValid(ovflbuf)); data = XLogRecGetBlockData(record, 0, &datalen); num_bucket = (uint32 *) data; Assert(datalen == sizeof(uint32)); _hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE, true); /* update backlink */ ovflpage = BufferGetPage(ovflbuf); ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); ovflopaque->hasho_prevblkno = leftblk; PageSetLSN(ovflpage, lsn); MarkBufferDirty(ovflbuf); if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO) { Page leftpage; HashPageOpaque leftopaque; leftpage = BufferGetPage(leftbuf); leftopaque = (HashPageOpaque) PageGetSpecialPointer(leftpage); leftopaque->hasho_nextblkno = rightblk; PageSetLSN(leftpage, lsn); MarkBufferDirty(leftbuf); } if (BufferIsValid(leftbuf)) UnlockReleaseBuffer(leftbuf); UnlockReleaseBuffer(ovflbuf); /* * Note: in normal operation, we'd update the bitmap and meta page while * still holding lock on the overflow pages. But during replay it's not * necessary to hold those locks, since no other index updates can be * happening concurrently. */ if (XLogRecHasBlockRef(record, 2)) { Buffer mapbuffer; if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO) { Page mappage = (Page) BufferGetPage(mapbuffer); uint32 *freep = NULL; char *data; uint32 *bitmap_page_bit; freep = HashPageGetBitmap(mappage); data = XLogRecGetBlockData(record, 2, &datalen); bitmap_page_bit = (uint32 *) data; SETBIT(freep, *bitmap_page_bit); PageSetLSN(mappage, lsn); MarkBufferDirty(mapbuffer); } if (BufferIsValid(mapbuffer)) UnlockReleaseBuffer(mapbuffer); } if (XLogRecHasBlockRef(record, 3)) { Buffer newmapbuf; newmapbuf = XLogInitBufferForRedo(record, 3); _hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true); new_bmpage = true; newmapblk = BufferGetBlockNumber(newmapbuf); MarkBufferDirty(newmapbuf); PageSetLSN(BufferGetPage(newmapbuf), lsn); UnlockReleaseBuffer(newmapbuf); } if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO) { HashMetaPage metap; Page page; uint32 *firstfree_ovflpage; data = XLogRecGetBlockData(record, 4, &datalen); firstfree_ovflpage = (uint32 *) data; page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); metap->hashm_firstfree = *firstfree_ovflpage; if (!xlrec->bmpage_found) { metap->hashm_spares[metap->hashm_ovflpoint]++; if (new_bmpage) { Assert(BlockNumberIsValid(newmapblk)); metap->hashm_mapp[metap->hashm_nmaps] = newmapblk; metap->hashm_nmaps++; metap->hashm_spares[metap->hashm_ovflpoint]++; } } PageSetLSN(page, lsn); MarkBufferDirty(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
/* * replay delete operation in hash index to remove * tuples marked as DEAD during index tuple insertion. */ static void hash_xlog_vacuum_one_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_vacuum_one_page *xldata; Buffer buffer; Buffer metabuf; Page page; XLogRedoAction action; HashPageOpaque pageopaque; xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record); /* * If we have any conflict processing to do, it must happen before we * update the page. * * Hash index records that are marked as LP_DEAD and being removed during * hash index tuple insertion can conflict with standby queries. You might * think that vacuum records would conflict as well, but we've handled * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid * cleaned by the vacuum of the heap and so we can resolve any conflicts * just once when that arrives. After that we know that no conflicts * exist from individual hash index vacuum records on that index. */ if (InHotStandby) { TransactionId latestRemovedXid = hash_xlog_vacuum_get_latestRemovedXid(record); RelFileNode rnode; XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL); ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode); } action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer); if (action == BLK_NEEDS_REDO) { char *ptr; Size len; ptr = XLogRecGetBlockData(record, 0, &len); page = (Page) BufferGetPage(buffer); if (len > 0) { OffsetNumber *unused; OffsetNumber *unend; unused = (OffsetNumber *) ptr; unend = (OffsetNumber *) ((char *) ptr + len); if ((unend - unused) > 0) PageIndexMultiDelete(page, unused, unend - unused); } /* * Mark the page as not containing any LP_DEAD items. See comments * in _hash_vacuum_one_page() for details. */ pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO) { Page metapage; HashMetaPage metap; metapage = BufferGetPage(metabuf); metap = HashPageGetMeta(metapage); metap->hashm_ntuples -= xldata->ntuples; PageSetLSN(metapage, lsn); MarkBufferDirty(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
/* * redo any page update (except page split) */ static void gistRedoPageUpdateRecord(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record); Buffer buffer; Page page; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { char *begin; char *data; Size datalen; int ninserted = 0; data = begin = XLogRecGetBlockData(record, 0, &datalen); page = (Page) BufferGetPage(buffer); /* Delete old tuples */ if (xldata->ntodelete > 0) { int i; OffsetNumber *todelete = (OffsetNumber *) data; data += sizeof(OffsetNumber) * xldata->ntodelete; for (i = 0; i < xldata->ntodelete; i++) PageIndexTupleDelete(page, todelete[i]); if (GistPageIsLeaf(page)) GistMarkTuplesDeleted(page); } /* add tuples */ if (data - begin < datalen) { OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); while (data - begin < datalen) { IndexTuple itup = (IndexTuple) data; Size sz = IndexTupleSize(itup); OffsetNumber l; data += sz; l = PageAddItem(page, (Item) itup, sz, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to GiST index page, size %d bytes", (int) sz); off++; ninserted++; } } Assert(ninserted == xldata->ntoinsert); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } /* * Fix follow-right data on left child page * * This must be done while still holding the lock on the target page. Note * that even if the target page no longer exists, we still attempt to * replay the change on the child page. */ if (XLogRecHasBlockRef(record, 1)) gistRedoClearFollowRight(record, 1); if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record); Buffer buffer; Page page; BTPageOpaque pageop; IndexTupleData trunctuple; /* * In normal operation, we would lock all the pages this WAL record * touches before changing any of them. In WAL replay, it should be okay * to lock just one page at a time, since no concurrent index updates can * be happening, and readers should not care whether they arrive at the * target page or not (since it's surely empty). */ /* parent page */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { OffsetNumber poffset; ItemId itemid; IndexTuple itup; OffsetNumber nextoffset; BlockNumber rightsib; page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); poffset = xlrec->poffset; nextoffset = OffsetNumberNext(poffset); itemid = PageGetItemId(page, nextoffset); itup = (IndexTuple) PageGetItem(page, itemid); rightsib = ItemPointerGetBlockNumber(&itup->t_tid); itemid = PageGetItemId(page, poffset); itup = (IndexTuple) PageGetItem(page, itemid); ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY); nextoffset = OffsetNumberNext(poffset); PageIndexTupleDelete(page, nextoffset); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Rewrite the leaf page as a halfdead page */ buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = xlrec->leftblk; pageop->btpo_next = xlrec->rightblk; pageop->btpo.level = 0; pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF; pageop->btpo_cycleid = 0; /* * Construct a dummy hikey item that points to the next parent to be * deleted (if any). */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
/* * replay squeeze page operation of hash index */ static void hash_xlog_squeeze_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record); Buffer bucketbuf = InvalidBuffer; Buffer writebuf; Buffer ovflbuf; Buffer prevbuf = InvalidBuffer; Buffer mapbuf; XLogRedoAction action; /* * Ensure we have a cleanup lock on primary bucket page before we start * with the actual replay operation. This is to ensure that neither a * scan can start nor a scan can be already-in-progress during the replay * of this operation. If we allow scans during this operation, then they * can miss some records or show the same record multiple times. */ if (xldata->is_prim_bucket_same_wrt) action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf); else { /* * we don't care for return value as the purpose of reading bucketbuf * is to ensure a cleanup lock on primary bucket page. */ (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf); action = XLogReadBufferForRedo(record, 1, &writebuf); } /* replay the record for adding entries in overflow buffer */ if (action == BLK_NEEDS_REDO) { Page writepage; char *begin; char *data; Size datalen; uint16 ninserted = 0; data = begin = XLogRecGetBlockData(record, 1, &datalen); writepage = (Page) BufferGetPage(writebuf); if (xldata->ntups > 0) { OffsetNumber *towrite = (OffsetNumber *) data; data += sizeof(OffsetNumber) * xldata->ntups; while (data - begin < datalen) { IndexTuple itup = (IndexTuple) data; Size itemsz; OffsetNumber l; itemsz = IndexTupleDSize(*itup); itemsz = MAXALIGN(itemsz); data += itemsz; l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false); if (l == InvalidOffsetNumber) elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes", (int) itemsz); ninserted++; } } /* * number of tuples inserted must be same as requested in REDO record. */ Assert(ninserted == xldata->ntups); /* * if the page on which are adding tuples is a page previous to freed * overflow page, then update its nextblno. */ if (xldata->is_prev_bucket_same_wrt) { HashPageOpaque writeopaque = (HashPageOpaque) PageGetSpecialPointer(writepage); writeopaque->hasho_nextblkno = xldata->nextblkno; } PageSetLSN(writepage, lsn); MarkBufferDirty(writebuf); } /* replay the record for initializing overflow buffer */ if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO) { Page ovflpage; ovflpage = BufferGetPage(ovflbuf); _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf)); PageSetLSN(ovflpage, lsn); MarkBufferDirty(ovflbuf); } if (BufferIsValid(ovflbuf)) UnlockReleaseBuffer(ovflbuf); /* replay the record for page previous to the freed overflow page */ if (!xldata->is_prev_bucket_same_wrt && XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO) { Page prevpage = BufferGetPage(prevbuf); HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage); prevopaque->hasho_nextblkno = xldata->nextblkno; PageSetLSN(prevpage, lsn); MarkBufferDirty(prevbuf); } if (BufferIsValid(prevbuf)) UnlockReleaseBuffer(prevbuf); /* replay the record for page next to the freed overflow page */ if (XLogRecHasBlockRef(record, 4)) { Buffer nextbuf; if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO) { Page nextpage = BufferGetPage(nextbuf); HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage); nextopaque->hasho_prevblkno = xldata->prevblkno; PageSetLSN(nextpage, lsn); MarkBufferDirty(nextbuf); } if (BufferIsValid(nextbuf)) UnlockReleaseBuffer(nextbuf); } if (BufferIsValid(writebuf)) UnlockReleaseBuffer(writebuf); if (BufferIsValid(bucketbuf)) UnlockReleaseBuffer(bucketbuf); /* * Note: in normal operation, we'd update the bitmap and meta page while * still holding lock on the primary bucket page and overflow pages. But * during replay it's not necessary to hold those locks, since no other * index updates can be happening concurrently. */ /* replay the record for bitmap page */ if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO) { Page mappage = (Page) BufferGetPage(mapbuf); uint32 *freep = NULL; char *data; uint32 *bitmap_page_bit; Size datalen; freep = HashPageGetBitmap(mappage); data = XLogRecGetBlockData(record, 5, &datalen); bitmap_page_bit = (uint32 *) data; CLRBIT(freep, *bitmap_page_bit); PageSetLSN(mappage, lsn); MarkBufferDirty(mapbuf); } if (BufferIsValid(mapbuf)) UnlockReleaseBuffer(mapbuf); /* replay the record for meta page */ if (XLogRecHasBlockRef(record, 6)) { Buffer metabuf; if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO) { HashMetaPage metap; Page page; char *data; uint32 *firstfree_ovflpage; Size datalen; data = XLogRecGetBlockData(record, 6, &datalen); firstfree_ovflpage = (uint32 *) data; page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); metap->hashm_firstfree = *firstfree_ovflpage; PageSetLSN(page, lsn); MarkBufferDirty(metabuf); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); } }
static void btree_xlog_unlink_page(uint8 info, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record); BlockNumber leftsib; BlockNumber rightsib; Buffer buffer; Page page; BTPageOpaque pageop; leftsib = xlrec->leftsib; rightsib = xlrec->rightsib; /* * In normal operation, we would lock all the pages this WAL record * touches before changing any of them. In WAL replay, it should be okay * to lock just one page at a time, since no concurrent index updates can * be happening, and readers should not care whether they arrive at the * target page or not (since it's surely empty). */ /* Fix left-link of right sibling */ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = leftsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Fix right-link of left sibling, if any */ if (leftsib != P_NONE) { if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_next = rightsib; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } /* Rewrite target page as empty deleted page */ buffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = (BTPageOpaque) PageGetSpecialPointer(page); pageop->btpo_prev = leftsib; pageop->btpo_next = rightsib; pageop->btpo.xact = xlrec->btpo_xact; pageop->btpo_flags = BTP_DELETED; pageop->btpo_cycleid = 0; PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); /* * If we deleted a parent of the targeted leaf page, instead of the leaf * itself, update the leaf to point to the next remaining child in the * branch. */ if (XLogRecHasBlockRef(record, 3)) { /* * There is no real data on the page, so we just re-create it from * scratch using the information from the WAL record. */ IndexTupleData trunctuple; buffer = XLogInitBufferForRedo(record, 3); page = (Page) BufferGetPage(buffer); pageop = (BTPageOpaque) PageGetSpecialPointer(page); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF; pageop->btpo_prev = xlrec->leafleftsib; pageop->btpo_next = xlrec->leafrightsib; pageop->btpo.level = 0; pageop->btpo_cycleid = 0; /* Add a dummy hikey item */ MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, false, false) == InvalidOffsetNumber) elog(ERROR, "could not add dummy high key to half-dead page"); PageSetLSN(page, lsn); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); } /* Update metapage if needed */ if (info == XLOG_BTREE_UNLINK_PAGE_META) _bt_restore_meta(record, 4); }
/* * replay delete operation of hash index */ static void hash_xlog_delete(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record); Buffer bucketbuf = InvalidBuffer; Buffer deletebuf; Page page; XLogRedoAction action; /* * Ensure we have a cleanup lock on primary bucket page before we start * with the actual replay operation. This is to ensure that neither a * scan can start nor a scan can be already-in-progress during the replay * of this operation. If we allow scans during this operation, then they * can miss some records or show the same record multiple times. */ if (xldata->is_primary_bucket_page) action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf); else { /* * we don't care for return value as the purpose of reading bucketbuf * is to ensure a cleanup lock on primary bucket page. */ (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf); action = XLogReadBufferForRedo(record, 1, &deletebuf); } /* replay the record for deleting entries in bucket page */ if (action == BLK_NEEDS_REDO) { char *ptr; Size len; ptr = XLogRecGetBlockData(record, 1, &len); page = (Page) BufferGetPage(deletebuf); if (len > 0) { OffsetNumber *unused; OffsetNumber *unend; unused = (OffsetNumber *) ptr; unend = (OffsetNumber *) ((char *) ptr + len); if ((unend - unused) > 0) PageIndexMultiDelete(page, unused, unend - unused); } /* * Mark the page as not containing any LP_DEAD items only if * clear_dead_marking flag is set to true. See comments in * hashbucketcleanup() for details. */ if (xldata->clear_dead_marking) { HashPageOpaque pageopaque; pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES; } PageSetLSN(page, lsn); MarkBufferDirty(deletebuf); } if (BufferIsValid(deletebuf)) UnlockReleaseBuffer(deletebuf); if (BufferIsValid(bucketbuf)) UnlockReleaseBuffer(bucketbuf); }
/* * replay allocation of page for split operation */ static void hash_xlog_split_allocate_page(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record); Buffer oldbuf; Buffer newbuf; Buffer metabuf; Size datalen PG_USED_FOR_ASSERTS_ONLY; char *data; XLogRedoAction action; /* * To be consistent with normal operation, here we take cleanup locks on * both the old and new buckets even though there can't be any concurrent * inserts. */ /* replay the record for old bucket */ action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf); /* * Note that we still update the page even if it was restored from a full * page image, because the special space is not included in the image. */ if (action == BLK_NEEDS_REDO || action == BLK_RESTORED) { Page oldpage; HashPageOpaque oldopaque; oldpage = BufferGetPage(oldbuf); oldopaque = (HashPageOpaque) PageGetSpecialPointer(oldpage); oldopaque->hasho_flag = xlrec->old_bucket_flag; oldopaque->hasho_prevblkno = xlrec->new_bucket; PageSetLSN(oldpage, lsn); MarkBufferDirty(oldbuf); } /* replay the record for new bucket */ newbuf = XLogInitBufferForRedo(record, 1); _hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket, xlrec->new_bucket_flag, true); if (!IsBufferCleanupOK(newbuf)) elog(PANIC, "hash_xlog_split_allocate_page: failed to acquire cleanup lock"); MarkBufferDirty(newbuf); PageSetLSN(BufferGetPage(newbuf), lsn); /* * We can release the lock on old bucket early as well but doing here to * consistent with normal operation. */ if (BufferIsValid(oldbuf)) UnlockReleaseBuffer(oldbuf); if (BufferIsValid(newbuf)) UnlockReleaseBuffer(newbuf); /* * Note: in normal operation, we'd update the meta page while still * holding lock on the old and new bucket pages. But during replay it's * not necessary to hold those locks, since no other bucket splits can be * happening concurrently. */ /* replay the record for metapage changes */ if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO) { Page page; HashMetaPage metap; page = BufferGetPage(metabuf); metap = HashPageGetMeta(page); metap->hashm_maxbucket = xlrec->new_bucket; data = XLogRecGetBlockData(record, 2, &datalen); if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS) { uint32 lowmask; uint32 *highmask; /* extract low and high masks. */ memcpy(&lowmask, data, sizeof(uint32)); highmask = (uint32 *) ((char *) data + sizeof(uint32)); /* update metapage */ metap->hashm_lowmask = lowmask; metap->hashm_highmask = *highmask; data += sizeof(uint32) * 2; } if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT) { uint32 ovflpoint; uint32 *ovflpages; /* extract information of overflow pages. */ memcpy(&ovflpoint, data, sizeof(uint32)); ovflpages = (uint32 *) ((char *) data + sizeof(uint32)); /* update metapage */ metap->hashm_spares[ovflpoint] = *ovflpages; metap->hashm_ovflpoint = ovflpoint; } MarkBufferDirty(metabuf); PageSetLSN(BufferGetPage(metabuf), lsn); } if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); }
static void spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr; OffsetNumber *toDead; OffsetNumber *toPlaceholder; OffsetNumber *moveSrc; OffsetNumber *moveDest; OffsetNumber *chainSrc; OffsetNumber *chainDest; SpGistState state; Buffer buffer; Page page; int i; fillFakeState(&state, xldata->stateSrc); ptr += SizeOfSpgxlogVacuumLeaf; toDead = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDead; toPlaceholder = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nPlaceholder; moveSrc = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMove; moveDest = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMove; chainSrc = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nChain; chainDest = (OffsetNumber *) ptr; if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); spgPageIndexMultiDelete(&state, page, toDead, xldata->nDead, SPGIST_DEAD, SPGIST_DEAD, InvalidBlockNumber, InvalidOffsetNumber); spgPageIndexMultiDelete(&state, page, toPlaceholder, xldata->nPlaceholder, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* see comments in vacuumLeafPage() */ for (i = 0; i < xldata->nMove; i++) { ItemId idSrc = PageGetItemId(page, moveSrc[i]); ItemId idDest = PageGetItemId(page, moveDest[i]); ItemIdData tmp; tmp = *idSrc; *idSrc = *idDest; *idDest = tmp; } spgPageIndexMultiDelete(&state, page, moveSrc, xldata->nMove, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); for (i = 0; i < xldata->nChain; i++) { SpGistLeafTuple lt; lt = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, chainSrc[i])); Assert(lt->tupstate == SPGIST_LIVE); lt->nextOffset = chainDest[i]; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * redo any page update (except page split) */ static void gistRedoPageUpdateRecord(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record); Buffer buffer; Page page; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { char *begin; char *data; Size datalen; int ninserted = 0; data = begin = XLogRecGetBlockData(record, 0, &datalen); page = (Page) BufferGetPage(buffer); if (xldata->ntodelete == 1 && xldata->ntoinsert == 1) { /* * When replacing one tuple with one other tuple, we must use * PageIndexTupleOverwrite for consistency with gistplacetopage. */ OffsetNumber offnum = *((OffsetNumber *) data); IndexTuple itup; Size itupsize; data += sizeof(OffsetNumber); itup = (IndexTuple) data; itupsize = IndexTupleSize(itup); if (!PageIndexTupleOverwrite(page, offnum, (Item) itup, itupsize)) elog(ERROR, "failed to add item to GiST index page, size %d bytes", (int) itupsize); data += itupsize; /* should be nothing left after consuming 1 tuple */ Assert(data - begin == datalen); /* update insertion count for assert check below */ ninserted++; } else if (xldata->ntodelete > 0) { /* Otherwise, delete old tuples if any */ OffsetNumber *todelete = (OffsetNumber *) data; data += sizeof(OffsetNumber) * xldata->ntodelete; PageIndexMultiDelete(page, todelete, xldata->ntodelete); if (GistPageIsLeaf(page)) GistMarkTuplesDeleted(page); } /* Add new tuples if any */ if (data - begin < datalen) { OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); while (data - begin < datalen) { IndexTuple itup = (IndexTuple) data; Size sz = IndexTupleSize(itup); OffsetNumber l; data += sz; l = PageAddItem(page, (Item) itup, sz, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to GiST index page, size %d bytes", (int) sz); off++; ninserted++; } } /* Check that XLOG record contained expected number of tuples */ Assert(ninserted == xldata->ntoinsert); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } /* * Fix follow-right data on left child page * * This must be done while still holding the lock on the target page. Note * that even if the target page no longer exists, we still attempt to * replay the change on the child page. */ if (XLogRecHasBlockRef(record, 1)) gistRedoClearFollowRight(record, 1); if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * replay move of page contents for squeeze operation of hash index */ static void hash_xlog_move_page_contents(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record); Buffer bucketbuf = InvalidBuffer; Buffer writebuf = InvalidBuffer; Buffer deletebuf = InvalidBuffer; XLogRedoAction action; /* * Ensure we have a cleanup lock on primary bucket page before we start * with the actual replay operation. This is to ensure that neither a * scan can start nor a scan can be already-in-progress during the replay * of this operation. If we allow scans during this operation, then they * can miss some records or show the same record multiple times. */ if (xldata->is_prim_bucket_same_wrt) action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf); else { /* * we don't care for return value as the purpose of reading bucketbuf * is to ensure a cleanup lock on primary bucket page. */ (void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf); action = XLogReadBufferForRedo(record, 1, &writebuf); } /* replay the record for adding entries in overflow buffer */ if (action == BLK_NEEDS_REDO) { Page writepage; char *begin; char *data; Size datalen; uint16 ninserted = 0; data = begin = XLogRecGetBlockData(record, 1, &datalen); writepage = (Page) BufferGetPage(writebuf); if (xldata->ntups > 0) { OffsetNumber *towrite = (OffsetNumber *) data; data += sizeof(OffsetNumber) * xldata->ntups; while (data - begin < datalen) { IndexTuple itup = (IndexTuple) data; Size itemsz; OffsetNumber l; itemsz = IndexTupleDSize(*itup); itemsz = MAXALIGN(itemsz); data += itemsz; l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false); if (l == InvalidOffsetNumber) elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes", (int) itemsz); ninserted++; } } /* * number of tuples inserted must be same as requested in REDO record. */ Assert(ninserted == xldata->ntups); PageSetLSN(writepage, lsn); MarkBufferDirty(writebuf); } /* replay the record for deleting entries from overflow buffer */ if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO) { Page page; char *ptr; Size len; ptr = XLogRecGetBlockData(record, 2, &len); page = (Page) BufferGetPage(deletebuf); if (len > 0) { OffsetNumber *unused; OffsetNumber *unend; unused = (OffsetNumber *) ptr; unend = (OffsetNumber *) ((char *) ptr + len); if ((unend - unused) > 0) PageIndexMultiDelete(page, unused, unend - unused); } PageSetLSN(page, lsn); MarkBufferDirty(deletebuf); } /* * Replay is complete, now we can release the buffers. We release locks at * end of replay operation to ensure that we hold lock on primary bucket * page till end of operation. We can optimize by releasing the lock on * write buffer as soon as the operation for same is complete, if it is * not same as primary bucket page, but that doesn't seem to be worth * complicating the code. */ if (BufferIsValid(deletebuf)) UnlockReleaseBuffer(deletebuf); if (BufferIsValid(writebuf)) UnlockReleaseBuffer(writebuf); if (BufferIsValid(bucketbuf)) UnlockReleaseBuffer(bucketbuf); }