static void gistRedoPageSplitRecord(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record); Buffer firstbuffer = InvalidBuffer; Buffer buffer; Page page; int i; bool isrootsplit = false; /* * We must hold lock on the first-listed page throughout the action, * including while updating the left child page (if any). We can unlock * remaining pages in the list as soon as they've been written, because * there is no path for concurrent queries to reach those pages without * first visiting the first-listed page. */ /* loop around all pages */ for (i = 0; i < xldata->npage; i++) { int flags; char *data; Size datalen; int num; BlockNumber blkno; IndexTuple *tuples; XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno); if (blkno == GIST_ROOT_BLKNO) { Assert(i == 0); isrootsplit = true; } buffer = XLogInitBufferForRedo(record, i + 1); page = (Page) BufferGetPage(buffer); data = XLogRecGetBlockData(record, i + 1, &datalen); tuples = decodePageSplitRecord(data, datalen, &num); /* ok, clear buffer */ if (xldata->origleaf && blkno != GIST_ROOT_BLKNO) flags = F_LEAF; else flags = 0; GISTInitBuffer(buffer, flags); /* and fill it */ gistfillbuffer(page, tuples, num, FirstOffsetNumber); if (blkno == GIST_ROOT_BLKNO) { GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; GistPageSetNSN(page, xldata->orignsn); GistClearFollowRight(page); } else { if (i < xldata->npage - 1) { BlockNumber nextblkno; XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno); GistPageGetOpaque(page)->rightlink = nextblkno; } else GistPageGetOpaque(page)->rightlink = xldata->origrlink; GistPageSetNSN(page, xldata->orignsn); if (i < xldata->npage - 1 && !isrootsplit && xldata->markfollowright) GistMarkFollowRight(page); else GistClearFollowRight(page); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); if (i == 0) firstbuffer = buffer; else UnlockReleaseBuffer(buffer); } /* Fix follow-right data on left child page, if any */ if (XLogRecHasBlockRef(record, 0)) gistRedoClearFollowRight(record, 0); /* Finally, release lock on the first page */ UnlockReleaseBuffer(firstbuffer); }
/* * redo any page update (except page split) */ static void gistRedoPageUpdateRecord(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record); Buffer buffer; Page page; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { char *begin; char *data; Size datalen; int ninserted = 0; data = begin = XLogRecGetBlockData(record, 0, &datalen); page = (Page) BufferGetPage(buffer); if (xldata->ntodelete == 1 && xldata->ntoinsert == 1) { /* * When replacing one tuple with one other tuple, we must use * PageIndexTupleOverwrite for consistency with gistplacetopage. */ OffsetNumber offnum = *((OffsetNumber *) data); IndexTuple itup; Size itupsize; data += sizeof(OffsetNumber); itup = (IndexTuple) data; itupsize = IndexTupleSize(itup); if (!PageIndexTupleOverwrite(page, offnum, (Item) itup, itupsize)) elog(ERROR, "failed to add item to GiST index page, size %d bytes", (int) itupsize); data += itupsize; /* should be nothing left after consuming 1 tuple */ Assert(data - begin == datalen); /* update insertion count for assert check below */ ninserted++; } else if (xldata->ntodelete > 0) { /* Otherwise, delete old tuples if any */ OffsetNumber *todelete = (OffsetNumber *) data; data += sizeof(OffsetNumber) * xldata->ntodelete; PageIndexMultiDelete(page, todelete, xldata->ntodelete); if (GistPageIsLeaf(page)) GistMarkTuplesDeleted(page); } /* Add new tuples if any */ if (data - begin < datalen) { OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); while (data - begin < datalen) { IndexTuple itup = (IndexTuple) data; Size sz = IndexTupleSize(itup); OffsetNumber l; data += sz; l = PageAddItem(page, (Item) itup, sz, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to GiST index page, size %d bytes", (int) sz); off++; ninserted++; } } /* Check that XLOG record contained expected number of tuples */ Assert(ninserted == xldata->ntoinsert); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } /* * Fix follow-right data on left child page * * This must be done while still holding the lock on the target page. Note * that even if the target page no longer exists, we still attempt to * replay the change on the child page. */ if (XLogRecHasBlockRef(record, 1)) gistRedoClearFollowRight(record, 1); if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
/* * Write an empty XLOG file, containing only the checkpoint record * already set up in ControlFile. */ static void WriteEmptyXLOG(void) { char *buffer; XLogPageHeader page; XLogLongPageHeader longpage; XLogRecord *record; pg_crc32 crc; char path[MAXPGPATH]; int fd; int nbytes; FILE *fp = NULL; char *pch = NULL; char buf[BUFFER_LEN]; /* Use malloc() to ensure buffer is MAXALIGNED */ buffer = (char *) malloc(XLOG_BLCKSZ); page = (XLogPageHeader) buffer; memset(buffer, 0, XLOG_BLCKSZ); /* Set up the XLOG page header */ page->xlp_magic = XLOG_PAGE_MAGIC; page->xlp_info = XLP_LONG_HEADER; page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID; page->xlp_pageaddr.xlogid = ControlFile.checkPointCopy.redo.xlogid; page->xlp_pageaddr.xrecoff = ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD; longpage = (XLogLongPageHeader) page; longpage->xlp_sysid = ControlFile.system_identifier; longpage->xlp_seg_size = XLogSegSize; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; /* Insert the initial checkpoint record */ record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD); record->xl_prev.xlogid = 0; record->xl_prev.xrecoff = 0; record->xl_xid = InvalidTransactionId; record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint); record->xl_len = sizeof(CheckPoint); record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_rmid = RM_XLOG_ID; memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy, sizeof(CheckPoint)); INIT_CRC32C(crc); COMP_CRC32C(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint)); COMP_CRC32C(crc, (char *) record + sizeof(pg_crc32), SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32C(crc); record->xl_crc = crc; /* * If we make the filespace for transaction files configurable, then * pg_resetxlog should pick up the XLOG files from the right location. * Check the flat file for determining the right location of XLOG files */ fp = fopen(TXN_FILESPACE_FLATFILE, "r"); if (fp) { MemSet(buf, 0, BUFFER_LEN); if (fgets(buf, BUFFER_LEN, fp)) ; /* First line is Filespace OID, skip it */ MemSet(buf, 0, BUFFER_LEN); if (fgets(buf, BUFFER_LEN, fp)) { buf[strlen(buf)-1]='\0'; pch = strtok(buf, " "); /* The first part is DBID. Skip it */ pch = strtok(NULL, " "); sprintf(path,"%s/%s", pch, XLOGDIR); } } else { /* No flat file. Use the default pg_system filespace */ sprintf(path, "%s", XLOGDIR); } /* Write the first page */ XLogFilePath2(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogId, newXlogSeg); unlink(path); fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) { fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, path, strerror(errno)); exit(1); } errno = 0; if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) errno = ENOSPC; fprintf(stderr, _("%s: could not write file \"%s\": %s\n"), progname, path, strerror(errno)); exit(1); } /* Fill the rest of the file with zeroes */ memset(buffer, 0, XLOG_BLCKSZ); for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { errno = 0; if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) { if (errno == 0) errno = ENOSPC; fprintf(stderr, _("%s: could not write file \"%s\": %s\n"), progname, path, strerror(errno)); exit(1); } } if (fsync(fd) != 0) { fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno)); exit(1); } close(fd); }
static void spgRedoMoveLeafs(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; int nInsert; Buffer buffer; Page page; XLogRedoAction action; BlockNumber blknoDst; XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst); fillFakeState(&state, xldata->stateSrc); nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1; ptr += SizeOfSpgxlogMoveLeafs; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nMoves; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * nInsert; /* now ptr points to the list of leaf tuples */ /* * In normal operation we would have all three pages (source, dest, and * parent) locked simultaneously; but in WAL replay it should be safe to * update them one at a time, as long as we do it in the right order. */ /* Insert tuples on the dest page (do first, so redirect is valid) */ if (xldata->newPage) { buffer = XLogInitBufferForRedo(record, 1); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { int i; page = BufferGetPage(buffer); for (i = 0; i < nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* * the tuples are not aligned, so must copy to access the size * field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); ptr += leafTupleHdr.size; } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete tuples from the source page, inserting a redirection pointer */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves, state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT, SPGIST_PLACEHOLDER, blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* And update the parent downlink */ if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, blknoDst, toInsert[nInsert - 1]); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
static void spgRedoSplitTuple(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr; char *prefixTuple; SpGistInnerTupleData prefixTupleHdr; char *postfixTuple; SpGistInnerTupleData postfixTupleHdr; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogSplitTuple); prefixTuple = ptr; /* the prefix tuple is unaligned, so make a copy to access its header */ memcpy(&prefixTupleHdr, prefixTuple, sizeof(SpGistInnerTupleData)); ptr += prefixTupleHdr.size; postfixTuple = ptr; /* postfix tuple is also unaligned */ memcpy(&postfixTupleHdr, postfixTuple, sizeof(SpGistInnerTupleData)); /* * In normal operation we would have both pages locked simultaneously; but * in WAL replay it should be safe to update them one at a time, as long * as we do it in the right order. */ /* insert postfix tuple first to avoid dangling link */ if (!xldata->postfixBlkSame) { if (xldata->newPage) { buffer = XLogInitBufferForRedo(record, 1); /* SplitTuple is not used for nulls pages */ SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } /* now handle the original page */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnumPrefix); if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size, xldata->offnumPrefix, false, false) != xldata->offnumPrefix) elog(ERROR, "failed to add item of size %u to SPGiST index page", prefixTupleHdr.size); if (xldata->postfixBlkSame) addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size, xldata->offnumPostfix); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
void btree_desc(StringInfo buf, XLogReaderState *record) { char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; switch (info) { case XLOG_BTREE_INSERT_LEAF: case XLOG_BTREE_INSERT_UPPER: case XLOG_BTREE_INSERT_META: { xl_btree_insert *xlrec = (xl_btree_insert *) rec; appendStringInfo(buf, "off %u", xlrec->offnum); break; } case XLOG_BTREE_SPLIT_L: case XLOG_BTREE_SPLIT_R: case XLOG_BTREE_SPLIT_L_ROOT: case XLOG_BTREE_SPLIT_R_ROOT: { xl_btree_split *xlrec = (xl_btree_split *) rec; appendStringInfo(buf, "level %u, firstright %d", xlrec->level, xlrec->firstright); break; } case XLOG_BTREE_VACUUM: { xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec; appendStringInfo(buf, "lastBlockVacuumed %u", xlrec->lastBlockVacuumed); break; } case XLOG_BTREE_DELETE: { xl_btree_delete *xlrec = (xl_btree_delete *) rec; appendStringInfo(buf, "%d items", xlrec->nitems); break; } case XLOG_BTREE_MARK_PAGE_HALFDEAD: { xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec; appendStringInfo(buf, "topparent %u; leaf %u; left %u; right %u", xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk); break; } case XLOG_BTREE_UNLINK_PAGE_META: case XLOG_BTREE_UNLINK_PAGE: { xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec; appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ", xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact); appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u", xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent); break; } case XLOG_BTREE_NEWROOT: { xl_btree_newroot *xlrec = (xl_btree_newroot *) rec; appendStringInfo(buf, "lev %u", xlrec->level); break; } case XLOG_BTREE_REUSE_PAGE: { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->latestRemovedXid); break; } } }
/* * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs. * * Currently MULTI_INSERT will always contain the full tuples. */ static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) { XLogReaderState *r = buf->record; xl_heap_multi_insert *xlrec; int i; char *data; char *tupledata; Size tuplelen; RelFileNode rnode; xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); /* only interested in our database */ XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL); if (rnode.dbNode != ctx->slot->data.database) return; /* output plugin doesn't look for this origin, no need to queue */ if (FilterByOrigin(ctx, XLogRecGetOrigin(r))) return; tupledata = XLogRecGetBlockData(r, 0, &tuplelen); data = tupledata; for (i = 0; i < xlrec->ntuples; i++) { ReorderBufferChange *change; xl_multi_insert_tuple *xlhdr; int datalen; ReorderBufferTupleBuf *tuple; change = ReorderBufferGetChange(ctx->reorder); change->action = REORDER_BUFFER_CHANGE_INSERT; change->origin_id = XLogRecGetOrigin(r); memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode)); /* * CONTAINS_NEW_TUPLE will always be set currently as multi_insert * isn't used for catalogs, but better be future proof. * * We decode the tuple in pretty much the same way as DecodeXLogTuple, * but since the layout is slightly different, we can't use it here. */ if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE) { change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); tuple = change->data.tp.newtuple; /* not a disk based tuple */ ItemPointerSetInvalid(&tuple->tuple.t_self); xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data); data = ((char *) xlhdr) + SizeOfMultiInsertTuple; datalen = xlhdr->datalen; /* * We can only figure this out after reassembling the * transactions. */ tuple->tuple.t_tableOid = InvalidOid; tuple->tuple.t_data = &tuple->t_data.header; tuple->tuple.t_len = datalen + SizeofHeapTupleHeader; memset(&tuple->t_data.header, 0, SizeofHeapTupleHeader); memcpy((char *) &tuple->t_data.header + SizeofHeapTupleHeader, (char *) data, datalen); data += datalen; tuple->t_data.header.t_infomask = xlhdr->t_infomask; tuple->t_data.header.t_infomask2 = xlhdr->t_infomask2; tuple->t_data.header.t_hoff = xlhdr->t_hoff; } /* * Reset toast reassembly state only after the last row in the last * xl_multi_insert_tuple record emitted by one heap_multi_insert() * call. */ if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI && (i + 1) == xlrec->ntuples) change->data.tp.clear_toast_afterwards = true; else change->data.tp.clear_toast_afterwards = false; ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change); } Assert(data == tupledata + tuplelen); }
void spg_desc(StringInfo buf, XLogReaderState *record) { char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; switch (info) { case XLOG_SPGIST_ADD_LEAF: { spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec; appendStringInfoString(buf, "add leaf to page"); appendStringInfo(buf, "; off %u; headoff %u; parentoff %u", xlrec->offnumLeaf, xlrec->offnumHeadLeaf, xlrec->offnumParent); if (xlrec->newPage) appendStringInfoString(buf, " (newpage)"); if (xlrec->storesNulls) appendStringInfoString(buf, " (nulls)"); } break; case XLOG_SPGIST_MOVE_LEAFS: appendStringInfo(buf, "%u leafs", ((spgxlogMoveLeafs *) rec)->nMoves); break; case XLOG_SPGIST_ADD_NODE: appendStringInfo(buf, "off %u", ((spgxlogAddNode *) rec)->offnum); break; case XLOG_SPGIST_SPLIT_TUPLE: appendStringInfo(buf, "prefix off: %u, postfix off: %u (same %d, new %d)", ((spgxlogSplitTuple *) rec)->offnumPrefix, ((spgxlogSplitTuple *) rec)->offnumPostfix, ((spgxlogSplitTuple *) rec)->postfixBlkSame, ((spgxlogSplitTuple *) rec)->newPage ); break; case XLOG_SPGIST_PICKSPLIT: { spgxlogPickSplit *xlrec = (spgxlogPickSplit *) rec; appendStringInfo(buf, "ndel %u; nins %u", xlrec->nDelete, xlrec->nInsert); if (xlrec->innerIsParent) appendStringInfoString(buf, " (innerIsParent)"); if (xlrec->isRootSplit) appendStringInfoString(buf, " (isRootSplit)"); } break; case XLOG_SPGIST_VACUUM_LEAF: /* no further information */ break; case XLOG_SPGIST_VACUUM_ROOT: /* no further information */ break; case XLOG_SPGIST_VACUUM_REDIRECT: appendStringInfo(buf, "newest XID %u", ((spgxlogVacuumRedirect *) rec)->newestRedirectXid); break; } }
void heap_desc(StringInfo buf, XLogReaderState *record) { char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; info &= XLOG_HEAP_OPMASK; if (info == XLOG_HEAP_INSERT) { xl_heap_insert *xlrec = (xl_heap_insert *) rec; appendStringInfo(buf, "off %u", xlrec->offnum); } else if (info == XLOG_HEAP_DELETE) { xl_heap_delete *xlrec = (xl_heap_delete *) rec; appendStringInfo(buf, "off %u ", xlrec->offnum); out_infobits(buf, xlrec->infobits_set); } else if (info == XLOG_HEAP_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; appendStringInfo(buf, "off %u xmax %u ", xlrec->old_offnum, xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); appendStringInfo(buf, "; new off %u xmax %u", xlrec->new_offnum, xlrec->new_xmax); } else if (info == XLOG_HEAP_HOT_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; appendStringInfo(buf, "off %u xmax %u ", xlrec->old_offnum, xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); appendStringInfo(buf, "; new off %u xmax %u", xlrec->new_offnum, xlrec->new_xmax); } else if (info == XLOG_HEAP_CONFIRM) { xl_heap_confirm *xlrec = (xl_heap_confirm *) rec; appendStringInfo(buf, "off %u", xlrec->offnum); } else if (info == XLOG_HEAP_LOCK) { xl_heap_lock *xlrec = (xl_heap_lock *) rec; appendStringInfo(buf, "off %u: xid %u: flags %u ", xlrec->offnum, xlrec->locking_xid, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } else if (info == XLOG_HEAP_INPLACE) { xl_heap_inplace *xlrec = (xl_heap_inplace *) rec; appendStringInfo(buf, "off %u", xlrec->offnum); } }
/* * redo any page update (except page split) */ static void gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record) { char *begin = XLogRecGetData(record); gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin; Buffer buffer; Page page; char *data; /* * We need to acquire and hold lock on target page while updating the left * child page. If we have a full-page image of target page, getting the * lock is a side-effect of restoring that image. Note that even if the * target page no longer exists, we'll still attempt to replay the change * on the child page. */ if (record->xl_info & XLR_BKP_BLOCK(0)) buffer = RestoreBackupBlock(lsn, record, 0, false, true); else buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); /* Fix follow-right data on left child page */ if (BlockNumberIsValid(xldata->leftchild)) gistRedoClearFollowRight(lsn, record, 1, xldata->node, xldata->leftchild); /* Done if target page no longer exists */ if (!BufferIsValid(buffer)) return; /* nothing more to do if page was backed up (and no info to do it with) */ if (record->xl_info & XLR_BKP_BLOCK(0)) { UnlockReleaseBuffer(buffer); return; } page = (Page) BufferGetPage(buffer); /* nothing more to do if change already applied */ if (lsn <= PageGetLSN(page)) { UnlockReleaseBuffer(buffer); return; } data = begin + sizeof(gistxlogPageUpdate); /* Delete old tuples */ if (xldata->ntodelete > 0) { int i; OffsetNumber *todelete = (OffsetNumber *) data; data += sizeof(OffsetNumber) * xldata->ntodelete; for (i = 0; i < xldata->ntodelete; i++) PageIndexTupleDelete(page, todelete[i]); if (GistPageIsLeaf(page)) GistMarkTuplesDeleted(page); } /* add tuples */ if (data - begin < record->xl_len) { OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); while (data - begin < record->xl_len) { IndexTuple itup = (IndexTuple) data; Size sz = IndexTupleSize(itup); OffsetNumber l; data += sz; l = PageAddItem(page, (Item) itup, sz, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to GiST index page, size %d bytes", (int) sz); off++; } } else { /* * special case: leafpage, nothing to insert, nothing to delete, then * vacuum marks page */ if (GistPageIsLeaf(page) && xldata->ntodelete == 0) GistClearTuplesDeleted(page); } if (!GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO) { /* * all links on non-leaf root page was deleted by vacuum full, so root * page becomes a leaf */ GistPageSetLeaf(page); } GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
void smgr_redo(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; /* Backup blocks are not used in smgr records */ Assert(!XLogRecHasAnyBlockRefs(record)); if (info == XLOG_SMGR_CREATE) { xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; reln = smgropen(xlrec->rnode, InvalidBackendId); smgrcreate(reln, xlrec->forkNum, true); } else if (info == XLOG_SMGR_TRUNCATE) { xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record); SMgrRelation reln; Relation rel; reln = smgropen(xlrec->rnode, InvalidBackendId); /* * Forcibly create relation if it doesn't exist (which suggests that * it was dropped somewhere later in the WAL sequence). As in * XLogReadBufferForRedo, we prefer to recreate the rel and replay the * log as best we can until the drop is seen. */ smgrcreate(reln, MAIN_FORKNUM, true); /* * Before we perform the truncation, update minimum recovery point to * cover this WAL record. Once the relation is truncated, there's no * going back. The buffer manager enforces the WAL-first rule for * normal updates to relation files, so that the minimum recovery * point is always updated before the corresponding change in the data * file is flushed to disk. We have to do the same manually here. * * Doing this before the truncation means that if the truncation fails * for some reason, you cannot start up the system even after restart, * until you fix the underlying situation so that the truncation will * succeed. Alternatively, we could update the minimum recovery point * after truncation, but that would leave a small window where the * WAL-first rule could be violated. */ XLogFlush(lsn); if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0) { smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); /* Also tell xlogutils.c about it */ XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); } /* Truncate FSM and VM too */ rel = CreateFakeRelcacheEntry(xlrec->rnode); if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 && smgrexists(reln, FSM_FORKNUM)) FreeSpaceMapTruncateRel(rel, xlrec->blkno); if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 && smgrexists(reln, VISIBILITYMAP_FORKNUM)) visibilitymap_truncate(rel, xlrec->blkno); FreeFakeRelcacheEntry(rel); } else elog(PANIC, "smgr_redo: unknown op code %u", info); }
static void gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record) { gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record); PageSplitRecord xlrec; Buffer firstbuffer = InvalidBuffer; Buffer buffer; Page page; int i; bool isrootsplit = false; decodePageSplitRecord(&xlrec, record); /* * We must hold lock on the first-listed page throughout the action, * including while updating the left child page (if any). We can unlock * remaining pages in the list as soon as they've been written, because * there is no path for concurrent queries to reach those pages without * first visiting the first-listed page. */ /* loop around all pages */ for (i = 0; i < xlrec.data->npage; i++) { NewPage *newpage = xlrec.page + i; int flags; if (newpage->header->blkno == GIST_ROOT_BLKNO) { Assert(i == 0); isrootsplit = true; } buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); /* ok, clear buffer */ if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO) flags = F_LEAF; else flags = 0; GISTInitBuffer(buffer, flags); /* and fill it */ gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber); if (newpage->header->blkno == GIST_ROOT_BLKNO) { GistPageGetOpaque(page)->rightlink = InvalidBlockNumber; GistPageSetNSN(page, xldata->orignsn); GistClearFollowRight(page); } else { if (i < xlrec.data->npage - 1) GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno; else GistPageGetOpaque(page)->rightlink = xldata->origrlink; GistPageSetNSN(page, xldata->orignsn); if (i < xlrec.data->npage - 1 && !isrootsplit && xldata->markfollowright) GistMarkFollowRight(page); else GistClearFollowRight(page); } PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); if (i == 0) firstbuffer = buffer; else UnlockReleaseBuffer(buffer); } /* Fix follow-right data on left child page, if any */ if (BlockNumberIsValid(xldata->leftchild)) gistRedoClearFollowRight(lsn, record, 0, xldata->node, xldata->leftchild); /* Finally, release lock on the first page */ UnlockReleaseBuffer(firstbuffer); }
void heap_desc(StringInfo buf, XLogRecord *record) { char *rec = XLogRecGetData(record); uint8 info = record->xl_info & ~XLR_INFO_MASK; info &= XLOG_HEAP_OPMASK; if (info == XLOG_HEAP_INSERT) { xl_heap_insert *xlrec = (xl_heap_insert *) rec; out_target(buf, &(xlrec->target)); } else if (info == XLOG_HEAP_DELETE) { xl_heap_delete *xlrec = (xl_heap_delete *) rec; out_target(buf, &(xlrec->target)); appendStringInfoChar(buf, ' '); out_infobits(buf, xlrec->infobits_set); } else if (info == XLOG_HEAP_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; out_target(buf, &(xlrec->target)); appendStringInfo(buf, " xmax %u ", xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); appendStringInfo(buf, "; new tid %u/%u xmax %u", ItemPointerGetBlockNumber(&(xlrec->newtid)), ItemPointerGetOffsetNumber(&(xlrec->newtid)), xlrec->new_xmax); } else if (info == XLOG_HEAP_HOT_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; out_target(buf, &(xlrec->target)); appendStringInfo(buf, " xmax %u ", xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); appendStringInfo(buf, "; new tid %u/%u xmax %u", ItemPointerGetBlockNumber(&(xlrec->newtid)), ItemPointerGetOffsetNumber(&(xlrec->newtid)), xlrec->new_xmax); } else if (info == XLOG_HEAP_LOCK) { xl_heap_lock *xlrec = (xl_heap_lock *) rec; appendStringInfo(buf, "xid %u: ", xlrec->locking_xid); out_target(buf, &(xlrec->target)); appendStringInfoChar(buf, ' '); out_infobits(buf, xlrec->infobits_set); } else if (info == XLOG_HEAP_INPLACE) { xl_heap_inplace *xlrec = (xl_heap_inplace *) rec; out_target(buf, &(xlrec->target)); } }
void heap2_desc(StringInfo buf, XLogRecord *record) { char *rec = XLogRecGetData(record); uint8 info = record->xl_info & ~XLR_INFO_MASK; info &= XLOG_HEAP_OPMASK; if (info == XLOG_HEAP2_CLEAN) { xl_heap_clean *xlrec = (xl_heap_clean *) rec; appendStringInfo(buf, "rel %u/%u/%u; blk %u remxid %u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_FREEZE_PAGE) { xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec; appendStringInfo(buf, "rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, xlrec->cutoff_xid, xlrec->ntuples); } else if (info == XLOG_HEAP2_CLEANUP_INFO) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_VISIBLE) { xl_heap_visible *xlrec = (xl_heap_visible *) rec; appendStringInfo(buf, "rel %u/%u/%u; blk %u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block); } else if (info == XLOG_HEAP2_MULTI_INSERT) { xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec; appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->blkno, xlrec->ntuples); } else if (info == XLOG_HEAP2_LOCK_UPDATED) { xl_heap_lock_updated *xlrec = (xl_heap_lock_updated *) rec; appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax, xlrec->infobits_set); out_target(buf, &(xlrec->target)); } else if (info == XLOG_HEAP2_NEW_CID) { xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec; out_target(buf, &(xlrec->target)); appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u", xlrec->cmin, xlrec->cmax, xlrec->combocid); } }
/* * redo any page update (except page split) */ static void gistRedoPageUpdateRecord(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record); Buffer buffer; Page page; if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { char *begin; char *data; Size datalen; int ninserted = 0; data = begin = XLogRecGetBlockData(record, 0, &datalen); page = (Page) BufferGetPage(buffer); /* Delete old tuples */ if (xldata->ntodelete > 0) { OffsetNumber *todelete = (OffsetNumber *) data; data += sizeof(OffsetNumber) * xldata->ntodelete; PageIndexMultiDelete(page, todelete, xldata->ntodelete); if (GistPageIsLeaf(page)) GistMarkTuplesDeleted(page); } /* add tuples */ if (data - begin < datalen) { OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber : OffsetNumberNext(PageGetMaxOffsetNumber(page)); while (data - begin < datalen) { IndexTuple itup = (IndexTuple) data; Size sz = IndexTupleSize(itup); OffsetNumber l; data += sz; l = PageAddItem(page, (Item) itup, sz, off, false, false); if (l == InvalidOffsetNumber) elog(ERROR, "failed to add item to GiST index page, size %d bytes", (int) sz); off++; ninserted++; } } Assert(ninserted == xldata->ntoinsert); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } /* * Fix follow-right data on left child page * * This must be done while still holding the lock on the target page. Note * that even if the target page no longer exists, we still attempt to * replay the change on the child page. */ if (XLogRecHasBlockRef(record, 1)) gistRedoClearFollowRight(record, 1); if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); }
void heap2_desc(StringInfo buf, XLogReaderState *record) { char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; info &= XLOG_HEAP_OPMASK; if (info == XLOG_HEAP2_CLEAN) { xl_heap_clean *xlrec = (xl_heap_clean *) rec; appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_FREEZE_PAGE) { xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec; appendStringInfo(buf, "cutoff xid %u ntuples %u", xlrec->cutoff_xid, xlrec->ntuples); } else if (info == XLOG_HEAP2_CLEANUP_INFO) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_VISIBLE) { xl_heap_visible *xlrec = (xl_heap_visible *) rec; appendStringInfo(buf, "cutoff xid %u flags %d", xlrec->cutoff_xid, xlrec->flags); } else if (info == XLOG_HEAP2_MULTI_INSERT) { xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec; appendStringInfo(buf, "%d tuples", xlrec->ntuples); } else if (info == XLOG_HEAP2_LOCK_UPDATED) { xl_heap_lock_updated *xlrec = (xl_heap_lock_updated *) rec; appendStringInfo(buf, "off %u: xmax %u: flags %u ", xlrec->offnum, xlrec->xmax, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } else if (info == XLOG_HEAP2_NEW_CID) { xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec; appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u", xlrec->target_node.spcNode, xlrec->target_node.dbNode, xlrec->target_node.relNode, ItemPointerGetBlockNumber(&(xlrec->target_tid)), ItemPointerGetOffsetNumber(&(xlrec->target_tid))); appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u", xlrec->cmin, xlrec->cmax, xlrec->combocid); } }
/* * TABLESPACE resource manager's routines */ void tblspc_redo(XLogRecPtr beginLoc, XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; if (info == XLOG_TBLSPC_CREATE) { xl_tblspc_create_rec *xlrec = (xl_tblspc_create_rec *) XLogRecGetData(record); char *location = xlrec->ts_path; char *linkloc; char *sublocation; struct stat st; /* * MPP-2333: Try to create the target directory if it does not exist. */ if (stat(location, &st) < 0) { if (mkdir(location, 0700) != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not create location directory \"%s\": %m", location))); } } /* * Attempt to coerce target directory to safe permissions. If this * fails, it doesn't exist or has the wrong owner. */ if (chmod(location, 0700) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not set permissions on directory \"%s\": %m", location))); /* Create segment subdirectory. */ sublocation = palloc(MAXPGPATH); sprintf(sublocation,"%s/seg%d",location,Gp_segment); if (mkdir(sublocation, 0700) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create subdirectory \"%s\": %m", sublocation))); /* Create or re-create the PG_VERSION file in the target directory */ set_short_version(sublocation, NULL, false); /* Create the symlink if not already present */ linkloc = (char *) palloc(10 + 10 + 1); sprintf(linkloc, "pg_tblspc/%u", xlrec->ts_id); if (symlink(sublocation, linkloc) < 0) { if (errno != EEXIST) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create symbolic link \"%s\": %m", linkloc))); } pfree(sublocation); pfree(linkloc); } else if (info == XLOG_TBLSPC_DROP) { xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record); if (!remove_tablespace_directories(xlrec->ts_id, true, xlrec->ts_path)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace %u is not empty", xlrec->ts_id))); } else elog(PANIC, "tblspc_redo: unknown op code %u", info); }
/* * CommitTS resource manager's routines */ void commit_ts_redo(XLogReaderState *record) { uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; /* Backup blocks are not used in commit_ts records */ Assert(!XLogRecHasAnyBlockRefs(record)); if (info == COMMIT_TS_ZEROPAGE) { int pageno; int slotno; memcpy(&pageno, XLogRecGetData(record), sizeof(int)); LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE); slotno = ZeroCommitTsPage(pageno, false); SimpleLruWritePage(CommitTsCtl, slotno); Assert(!CommitTsCtl->shared->page_dirty[slotno]); LWLockRelease(CommitTsControlLock); } else if (info == COMMIT_TS_TRUNCATE) { int pageno; memcpy(&pageno, XLogRecGetData(record), sizeof(int)); /* * During XLOG replay, latest_page_number isn't set up yet; insert a * suitable value to bypass the sanity test in SimpleLruTruncate. */ CommitTsCtl->shared->latest_page_number = pageno; SimpleLruTruncate(CommitTsCtl, pageno); } else if (info == COMMIT_TS_SETTS) { xl_commit_ts_set *setts = (xl_commit_ts_set *) XLogRecGetData(record); int nsubxids; TransactionId *subxids; nsubxids = ((XLogRecGetDataLen(record) - SizeOfCommitTsSet) / sizeof(TransactionId)); if (nsubxids > 0) { subxids = palloc(sizeof(TransactionId) * nsubxids); memcpy(subxids, XLogRecGetData(record) + SizeOfCommitTsSet, sizeof(TransactionId) * nsubxids); } else subxids = NULL; TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids, setts->timestamp, setts->nodeid, true); if (subxids) pfree(subxids); } else elog(PANIC, "commit_ts_redo: unknown op code %u", info); }
/* * Handle rmgr XACT_ID records for DecodeRecordIntoReorderBuffer(). */ static void DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) { SnapBuild *builder = ctx->snapshot_builder; ReorderBuffer *reorder = ctx->reorder; XLogReaderState *r = buf->record; uint8 info = XLogRecGetInfo(r) & XLOG_XACT_OPMASK; /* no point in doing anything yet, data could not be decoded anyway */ if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) return; switch (info) { case XLOG_XACT_COMMIT: case XLOG_XACT_COMMIT_PREPARED: { xl_xact_commit *xlrec; xl_xact_parsed_commit parsed; TransactionId xid; xlrec = (xl_xact_commit *) XLogRecGetData(r); ParseCommitRecord(XLogRecGetInfo(buf->record), xlrec, &parsed); if (!TransactionIdIsValid(parsed.twophase_xid)) xid = XLogRecGetXid(r); else xid = parsed.twophase_xid; DecodeCommit(ctx, buf, &parsed, xid); break; } case XLOG_XACT_ABORT: case XLOG_XACT_ABORT_PREPARED: { xl_xact_abort *xlrec; xl_xact_parsed_abort parsed; TransactionId xid; xlrec = (xl_xact_abort *) XLogRecGetData(r); ParseAbortRecord(XLogRecGetInfo(buf->record), xlrec, &parsed); if (!TransactionIdIsValid(parsed.twophase_xid)) xid = XLogRecGetXid(r); else xid = parsed.twophase_xid; DecodeAbort(ctx, buf, &parsed, xid); break; } case XLOG_XACT_ASSIGNMENT: { xl_xact_assignment *xlrec; int i; TransactionId *sub_xid; xlrec = (xl_xact_assignment *) XLogRecGetData(r); sub_xid = &xlrec->xsub[0]; for (i = 0; i < xlrec->nsubxacts; i++) { ReorderBufferAssignChild(reorder, xlrec->xtop, *(sub_xid++), buf->origptr); } break; } case XLOG_XACT_PREPARE: /* * Currently decoding ignores PREPARE TRANSACTION and will just * decode the transaction when the COMMIT PREPARED is sent or * throw away the transaction's contents when a ROLLBACK PREPARED * is received. In the future we could add code to expose prepared * transactions in the changestream allowing for a kind of * distributed 2PC. */ break; default: elog(ERROR, "unexpected RM_XACT_ID record type: %u", info); } }
/* * Write an empty XLOG file, containing only the checkpoint record * already set up in ControlFile. */ static void WriteEmptyXLOG(void) { char *buffer; XLogPageHeader page; XLogLongPageHeader longpage; XLogRecord *record; pg_crc32 crc; char path[MAXPGPATH]; int fd; int nbytes; /* Use malloc() to ensure buffer is MAXALIGNED */ buffer = (char *) malloc(XLOG_BLCKSZ); page = (XLogPageHeader) buffer; memset(buffer, 0, XLOG_BLCKSZ); /* Set up the XLOG page header */ page->xlp_magic = XLOG_PAGE_MAGIC; page->xlp_info = XLP_LONG_HEADER; page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID; page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD; longpage = (XLogLongPageHeader) page; longpage->xlp_sysid = ControlFile.system_identifier; longpage->xlp_seg_size = XLogSegSize; longpage->xlp_xlog_blcksz = XLOG_BLCKSZ; /* Insert the initial checkpoint record */ record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD); record->xl_prev = 0; record->xl_xid = InvalidTransactionId; record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint); record->xl_len = sizeof(CheckPoint); record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_rmid = RM_XLOG_ID; memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy, sizeof(CheckPoint)); INIT_CRC32(crc); COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint)); COMP_CRC32(crc, (char *) record, offsetof(XLogRecord, xl_crc)); FIN_CRC32(crc); record->xl_crc = crc; /* Write the first page */ XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo); unlink(path); fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) { fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, path, strerror(errno)); exit(1); } errno = 0; if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) errno = ENOSPC; fprintf(stderr, _("%s: could not write file \"%s\": %s\n"), progname, path, strerror(errno)); exit(1); } /* Fill the rest of the file with zeroes */ memset(buffer, 0, XLOG_BLCKSZ); for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { errno = 0; if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) { if (errno == 0) errno = ENOSPC; fprintf(stderr, _("%s: could not write file \"%s\": %s\n"), progname, path, strerror(errno)); exit(1); } } if (fsync(fd) != 0) { fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno)); exit(1); } close(fd); }
static void spgRedoAddLeaf(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr; char *leafTuple; SpGistLeafTupleData leafTupleHdr; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddLeaf); leafTuple = ptr; /* the leaf tuple is unaligned, so make a copy to access its header */ memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); /* * In normal operation we would have both current and parent pages locked * simultaneously; but in WAL replay it should be safe to update the leaf * page before updating the parent. */ if (xldata->newPage) { buffer = XLogInitBufferForRedo(record, 0); SpGistInitBuffer(buffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 0, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); /* insert new tuple */ if (xldata->offnumLeaf != xldata->offnumHeadLeaf) { /* normal cases, tuple was added by SpGistPageAddNewItem */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf); /* update head tuple's chain link if needed */ if (xldata->offnumHeadLeaf != InvalidOffsetNumber) { SpGistLeafTuple head; head = (SpGistLeafTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumHeadLeaf)); Assert(head->nextOffset == leafTupleHdr.nextOffset); head->nextOffset = xldata->offnumLeaf; } } else { /* replacing a DEAD tuple */ PageIndexTupleDelete(page, xldata->offnumLeaf); if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size, xldata->offnumLeaf, false, false) != xldata->offnumLeaf) elog(ERROR, "failed to add item of size %u to SPGiST index page", leafTupleHdr.size); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* update parent downlink if necessary */ if (xldata->offnumParent != InvalidOffsetNumber) { if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple tuple; BlockNumber blknoLeaf; XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf); page = BufferGetPage(buffer); tuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(tuple, xldata->nodeI, blknoLeaf, xldata->offnumLeaf); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } }
/* * CRC-check an XLOG record. We do not believe the contents of an XLOG * record (other than to the minimal extent of computing the amount of * data to read in) until we've checked the CRCs. * * We assume all of the record has been read into memory at *record. */ static bool RecordIsValid(XLogRecord *record, XLogRecPtr recptr) { pg_crc32 crc; int i; uint32 len = record->xl_len; BkpBlock bkpb; char *blk; /* First the rmgr data */ INIT_CRC32C(crc); COMP_CRC32C(crc, XLogRecGetData(record), len); /* Add in the backup blocks, if any */ blk = (char *) XLogRecGetData(record) + len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { uint32 blen; if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; memcpy(&bkpb, blk, sizeof(BkpBlock)); if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ) { printf("incorrect hole size in record at %X/%X\n", recptr.xlogid, recptr.xrecoff); return false; } blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length; COMP_CRC32C(crc, blk, blen); blk += blen; } /* skip total xl_tot_len check if physical log has been removed. */ #if PG_VERSION_NUM < 80300 || PG_VERSION_NUM >= 90200 if (record->xl_info & XLR_BKP_BLOCK_MASK) #else if (!(record->xl_info & XLR_BKP_REMOVABLE) || record->xl_info & XLR_BKP_BLOCK_MASK) #endif { /* Check that xl_tot_len agrees with our calculation */ if (blk != (char *) record + record->xl_tot_len) { printf("incorrect total length in record at %X/%X\n", recptr.xlogid, recptr.xrecoff); return false; } } /* Finally include the record header */ COMP_CRC32C(crc, (char *) record + sizeof(pg_crc32), SizeOfXLogRecord - sizeof(pg_crc32)); FIN_CRC32C(crc); if (!EQ_CRC32C(record->xl_crc, crc)) { printf("incorrect resource manager data checksum in record at %X/%X\n", recptr.xlogid, recptr.xrecoff); return false; } return true; }
static void spgRedoAddNode(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogAddNode *xldata = (spgxlogAddNode *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; Buffer buffer; Page page; XLogRedoAction action; ptr += sizeof(spgxlogAddNode); innerTuple = ptr; /* the tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); fillFakeState(&state, xldata->stateSrc); if (!XLogRecHasBlockRef(record, 1)) { /* update in place */ Assert(xldata->parentBlk == -1); if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", innerTupleHdr.size); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else { BlockNumber blkno; BlockNumber blknoNew; XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno); XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew); /* * In normal operation we would have all three pages (source, dest, * and parent) locked simultaneously; but in WAL replay it should be * safe to update them one at a time, as long as we do it in the right * order. We must insert the new tuple before replacing the old tuple * with the redirect tuple. */ /* Install new tuple first so redirect is valid */ if (xldata->newPage) { /* AddNode is not used for nulls pages */ buffer = XLogInitBufferForRedo(record, 1); SpGistInitBuffer(buffer, 0); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 1, &buffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(buffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumNew); /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 1) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* Delete old tuple, replacing it with redirect or placeholder tuple */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { SpGistDeadTuple dt; page = BufferGetPage(buffer); if (state.isBuild) dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); else dt = spgFormDeadTuple(&state, SPGIST_REDIRECT, blknoNew, xldata->offnumNew); PageIndexTupleDelete(page, xldata->offnum); if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum, false, false) != xldata->offnum) elog(ERROR, "failed to add item of size %u to SPGiST index page", dt->size); if (state.isBuild) SpGistPageGetOpaque(page)->nPlaceholder++; else SpGistPageGetOpaque(page)->nRedirection++; /* * If parent is in this same page, update it now. */ if (xldata->parentBlk == 0) { SpGistInnerTuple parentTuple; parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); } PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); /* * Update parent downlink (if we didn't do it as part of the source or * destination page update already). */ if (xldata->parentBlk == 2) { if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parentTuple; page = BufferGetPage(buffer); parentTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parentTuple, xldata->nodeI, blknoNew, xldata->offnumNew); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } } }
static void ginRedoUpdateMetapage(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record); Buffer metabuffer; Page metapage; Buffer buffer; /* * Restore the metapage. This is essentially the same as a full-page * image, so restore the metapage unconditionally without looking at the * LSN, to avoid torn page hazards. */ metabuffer = XLogInitBufferForRedo(record, 0); Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO); metapage = BufferGetPage(metabuffer); GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer)); memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData)); PageSetLSN(metapage, lsn); MarkBufferDirty(metabuffer); if (data->ntuples > 0) { /* * insert into tail page */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); OffsetNumber off; int i; Size tupsize; char *payload; IndexTuple tuples; Size totaltupsize; payload = XLogRecGetBlockData(record, 1, &totaltupsize); tuples = (IndexTuple) payload; if (PageIsEmpty(page)) off = FirstOffsetNumber; else off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); for (i = 0; i < data->ntuples; i++) { tupsize = IndexTupleSize(tuples); if (PageAddItem(page, (Item) tuples, tupsize, off, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add item to index page"); tuples = (IndexTuple) (((char *) tuples) + tupsize); off++; } Assert(payload + totaltupsize == (char *) tuples); /* * Increase counter of heap tuples */ GinPageGetOpaque(page)->maxoff++; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } else if (data->prevTail != InvalidBlockNumber) { /* * New tail */ if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); GinPageGetOpaque(page)->rightlink = data->newRightlink; PageSetLSN(page, lsn); MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) UnlockReleaseBuffer(buffer); } UnlockReleaseBuffer(metabuffer); }
static void spgRedoPickSplit(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; char *ptr = XLogRecGetData(record); spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr; char *innerTuple; SpGistInnerTupleData innerTupleHdr; SpGistState state; OffsetNumber *toDelete; OffsetNumber *toInsert; uint8 *leafPageSelect; Buffer srcBuffer; Buffer destBuffer; Buffer innerBuffer; Page srcPage; Page destPage; Page page; int i; BlockNumber blknoInner; XLogRedoAction action; XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner); fillFakeState(&state, xldata->stateSrc); ptr += SizeOfSpgxlogPickSplit; toDelete = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nDelete; toInsert = (OffsetNumber *) ptr; ptr += sizeof(OffsetNumber) * xldata->nInsert; leafPageSelect = (uint8 *) ptr; ptr += sizeof(uint8) * xldata->nInsert; innerTuple = ptr; /* the inner tuple is unaligned, so make a copy to access its header */ memcpy(&innerTupleHdr, innerTuple, sizeof(SpGistInnerTupleData)); ptr += innerTupleHdr.size; /* now ptr points to the list of leaf tuples */ if (xldata->isRootSplit) { /* when splitting root, we touch it only in the guise of new inner */ srcBuffer = InvalidBuffer; srcPage = NULL; } else if (xldata->initSrc) { /* just re-init the source page */ srcBuffer = XLogInitBufferForRedo(record, 0); srcPage = (Page) BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * Delete the specified tuples from source page. (In case we're in * Hot Standby, we need to hold lock on the page till we're done * inserting leaf tuples and the new inner tuple, else the added * redirect tuple will be a dangling link.) */ srcPage = NULL; if (XLogReadBufferForRedo(record, 0, &srcBuffer) == BLK_NEEDS_REDO) { srcPage = BufferGetPage(srcBuffer); /* * We have it a bit easier here than in doPickSplit(), because we * know the inner tuple's location already, so we can inject the * correct redirection tuple now. */ if (!state.isBuild) spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_REDIRECT, SPGIST_PLACEHOLDER, blknoInner, xldata->offnumInner); else spgPageIndexMultiDelete(&state, srcPage, toDelete, xldata->nDelete, SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, InvalidBlockNumber, InvalidOffsetNumber); /* don't update LSN etc till we're done with it */ } } /* try to access dest page if any */ if (!XLogRecHasBlockRef(record, 1)) { destBuffer = InvalidBuffer; destPage = NULL; } else if (xldata->initDest) { /* just re-init the dest page */ destBuffer = XLogInitBufferForRedo(record, 1); destPage = (Page) BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); /* don't update LSN etc till we're done with it */ } else { /* * We could probably release the page lock immediately in the * full-page-image case, but for safety let's hold it till later. */ if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO) destPage = (Page) BufferGetPage(destBuffer); else destPage = NULL; /* don't do any page updates */ } /* restore leaf tuples to src and/or dest page */ for (i = 0; i < xldata->nInsert; i++) { char *leafTuple; SpGistLeafTupleData leafTupleHdr; /* the tuples are not aligned, so must copy to access the size field. */ leafTuple = ptr; memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData)); ptr += leafTupleHdr.size; page = leafPageSelect[i] ? destPage : srcPage; if (page == NULL) continue; /* no need to touch this page */ addOrReplaceTuple(page, (Item) leafTuple, leafTupleHdr.size, toInsert[i]); } /* Now update src and dest page LSNs if needed */ if (srcPage != NULL) { PageSetLSN(srcPage, lsn); MarkBufferDirty(srcBuffer); } if (destPage != NULL) { PageSetLSN(destPage, lsn); MarkBufferDirty(destBuffer); } /* restore new inner tuple */ if (xldata->initInner) { innerBuffer = XLogInitBufferForRedo(record, 2); SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0)); action = BLK_NEEDS_REDO; } else action = XLogReadBufferForRedo(record, 2, &innerBuffer); if (action == BLK_NEEDS_REDO) { page = BufferGetPage(innerBuffer); addOrReplaceTuple(page, (Item) innerTuple, innerTupleHdr.size, xldata->offnumInner); /* if inner is also parent, update link while we're here */ if (xldata->innerIsParent) { SpGistInnerTuple parent; parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, blknoInner, xldata->offnumInner); } PageSetLSN(page, lsn); MarkBufferDirty(innerBuffer); } if (BufferIsValid(innerBuffer)) UnlockReleaseBuffer(innerBuffer); /* * Now we can release the leaf-page locks. It's okay to do this before * updating the parent downlink. */ if (BufferIsValid(srcBuffer)) UnlockReleaseBuffer(srcBuffer); if (BufferIsValid(destBuffer)) UnlockReleaseBuffer(destBuffer); /* update parent downlink, unless we did it above */ if (XLogRecHasBlockRef(record, 3)) { Buffer parentBuffer; if (XLogReadBufferForRedo(record, 3, &parentBuffer) == BLK_NEEDS_REDO) { SpGistInnerTuple parent; page = BufferGetPage(parentBuffer); parent = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, xldata->offnumParent)); spgUpdateNodeLink(parent, xldata->nodeI, blknoInner, xldata->offnumInner); PageSetLSN(page, lsn); MarkBufferDirty(parentBuffer); } if (BufferIsValid(parentBuffer)) UnlockReleaseBuffer(parentBuffer); } else Assert(xldata->innerIsParent || xldata->isRootSplit); }
/* * Get the latestRemovedXid from the heap pages pointed at by the index * tuples being deleted. See also btree_xlog_delete_get_latestRemovedXid, * on which this function is based. */ static TransactionId gistRedoDeleteRecordGetLatestRemovedXid(XLogReaderState *record) { gistxlogDelete *xlrec = (gistxlogDelete *) XLogRecGetData(record); OffsetNumber *todelete; Buffer ibuffer, hbuffer; Page ipage, hpage; RelFileNode rnode; BlockNumber blkno; ItemId iitemid, hitemid; IndexTuple itup; HeapTupleHeader htuphdr; BlockNumber hblkno; OffsetNumber hoffnum; TransactionId latestRemovedXid = InvalidTransactionId; int i; /* * If there's nothing running on the standby we don't need to derive a * full latestRemovedXid value, so use a fast path out of here. This * returns InvalidTransactionId, and so will conflict with all HS * transactions; but since we just worked out that that's zero people, * it's OK. * * XXX There is a race condition here, which is that a new backend might * start just after we look. If so, it cannot need to conflict, but this * coding will result in throwing a conflict anyway. */ if (CountDBBackends(InvalidOid) == 0) return latestRemovedXid; /* * In what follows, we have to examine the previous state of the index * page, as well as the heap page(s) it points to. This is only valid if * WAL replay has reached a consistent database state; which means that * the preceding check is not just an optimization, but is *necessary*. We * won't have let in any user sessions before we reach consistency. */ if (!reachedConsistency) elog(PANIC, "gistRedoDeleteRecordGetLatestRemovedXid: cannot operate with inconsistent data"); /* * Get index page. If the DB is consistent, this should not fail, nor * should any of the heap page fetches below. If one does, we return * InvalidTransactionId to cancel all HS transactions. That's probably * overkill, but it's safe, and certainly better than panicking here. */ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL); if (!BufferIsValid(ibuffer)) return InvalidTransactionId; LockBuffer(ibuffer, BUFFER_LOCK_EXCLUSIVE); ipage = (Page) BufferGetPage(ibuffer); /* * Loop through the deleted index items to obtain the TransactionId from * the heap items they point to. */ todelete = (OffsetNumber *) ((char *) xlrec + SizeOfGistxlogDelete); for (i = 0; i < xlrec->ntodelete; i++) { /* * Identify the index tuple about to be deleted */ iitemid = PageGetItemId(ipage, todelete[i]); itup = (IndexTuple) PageGetItem(ipage, iitemid); /* * Locate the heap page that the index tuple points at */ hblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL); if (!BufferIsValid(hbuffer)) { UnlockReleaseBuffer(ibuffer); return InvalidTransactionId; } LockBuffer(hbuffer, BUFFER_LOCK_SHARE); hpage = (Page) BufferGetPage(hbuffer); /* * Look up the heap tuple header that the index tuple points at by * using the heap node supplied with the xlrec. We can't use * heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer. * Note that we are not looking at tuple data here, just headers. */ hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid)); hitemid = PageGetItemId(hpage, hoffnum); /* * Follow any redirections until we find something useful. */ while (ItemIdIsRedirected(hitemid)) { hoffnum = ItemIdGetRedirect(hitemid); hitemid = PageGetItemId(hpage, hoffnum); CHECK_FOR_INTERRUPTS(); } /* * If the heap item has storage, then read the header and use that to * set latestRemovedXid. * * Some LP_DEAD items may not be accessible, so we ignore them. */ if (ItemIdHasStorage(hitemid)) { htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid); HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid); } else if (ItemIdIsDead(hitemid)) { /* * Conjecture: if hitemid is dead then it had xids before the xids * marked on LP_NORMAL items. So we just ignore this item and move * onto the next, for the purposes of calculating * latestRemovedxids. */ } else Assert(!ItemIdIsUsed(hitemid)); UnlockReleaseBuffer(hbuffer); } UnlockReleaseBuffer(ibuffer); /* * If all heap tuples were LP_DEAD then we will be returning * InvalidTransactionId here, which avoids conflicts. This matches * existing logic which assumes that LP_DEAD tuples must already be older * than the latestRemovedXid on the cleanup record that set them as * LP_DEAD, hence must already have generated a conflict. */ return latestRemovedXid; }