static void spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); Buffer buffer; Page page; buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); SpGistInitMetapage(page); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); buffer = XLogReadBuffer(*node, SPGIST_HEAD_BLKNO, true); Assert(BufferIsValid(buffer)); SpGistInitBuffer(buffer, SPGIST_LEAF); page = (Page) BufferGetPage(buffer); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
/* * _bitmap_log_bitmap_lastwords() -- log the last two words in a bitmap. */ void _bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer, OffsetNumber lovOffset, BMLOVItem lovItem) { xl_bm_bitmap_lastwords xlLastwords; XLogRecPtr recptr; XLogRecData rdata[1]; // Fetch gp_persistent_relation_node information that will be added to XLOG record. RelationFetchGpRelationNodeForXLog(rel); xlLastwords.bm_node = rel->rd_node; xlLastwords.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid; xlLastwords.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum; xlLastwords.bm_last_compword = lovItem->bm_last_compword; xlLastwords.bm_last_word = lovItem->bm_last_word; xlLastwords.lov_words_header = lovItem->lov_words_header; xlLastwords.bm_last_setbit = lovItem->bm_last_setbit; xlLastwords.bm_last_tid_location = lovItem->bm_last_tid_location; xlLastwords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); xlLastwords.bm_lov_offset = lovOffset; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)&xlLastwords; rdata[0].len = sizeof(xl_bm_bitmap_lastwords); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP_LASTWORDS, rdata); PageSetLSN(BufferGetPage(lovBuffer), recptr); PageSetTLI(BufferGetPage(lovBuffer), ThisTimeLineID); }
/* * _bitmap_log_metapage() -- log the changes to the metapage */ void _bitmap_log_metapage(Relation rel, BMMetaPage metapage) { /* XLOG stuff */ START_CRIT_SECTION(); if (!(rel->rd_istemp)) { xl_bm_metapage* xlMeta; XLogRecPtr recptr; XLogRecData rdata[1]; #ifdef BM_DEBUG elog(LOG, "call _bitmap_log_metapage."); #endif xlMeta = (xl_bm_metapage*) palloc(MAXALIGN(sizeof(xl_bm_metapage))); xlMeta->bm_node = rel->rd_node; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)xlMeta; rdata[0].len = MAXALIGN(sizeof(xl_bm_metapage)); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_META, rdata); PageSetLSN(metapage, recptr); PageSetTLI(metapage, ThisTimeLineID); pfree(xlMeta); } END_CRIT_SECTION(); }
/* * Replay the clearing of F_FOLLOW_RIGHT flag. */ static void gistRedoClearFollowRight(RelFileNode node, XLogRecPtr lsn, BlockNumber leftblkno) { Buffer buffer; buffer = XLogReadBuffer(node, leftblkno, false); if (BufferIsValid(buffer)) { Page page = (Page) BufferGetPage(buffer); /* * Note that we still update the page even if page LSN is equal to the * LSN of this record, because the updated NSN is not included in the * full page image. */ if (!XLByteLT(lsn, PageGetLSN(page))) { GistPageGetOpaque(page)->nsn = lsn; GistClearFollowRight(page); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } }
/* * Replay the clearing of F_FOLLOW_RIGHT flag on a child page. * * Even if the WAL record includes a full-page image, we have to update the * follow-right flag, because that change is not included in the full-page * image. To be sure that the intermediate state with the wrong flag value is * not visible to concurrent Hot Standby queries, this function handles * restoring the full-page image as well as updating the flag. (Note that * we never need to do anything else to the child page in the current WAL * action.) */ static void gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index, RelFileNode node, BlockNumber childblkno) { Buffer buffer; Page page; if (record->xl_info & XLR_BKP_BLOCK(block_index)) buffer = RestoreBackupBlock(lsn, record, block_index, false, true); else { buffer = XLogReadBuffer(node, childblkno, false); if (!BufferIsValid(buffer)) return; /* page was deleted, nothing to do */ } page = (Page) BufferGetPage(buffer); /* * Note that we still update the page even if page LSN is equal to the LSN * of this record, because the updated NSN is not included in the full * page image. */ if (lsn >= PageGetLSN(page)) { GistPageSetNSN(page, lsn); GistClearFollowRight(page); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); }
/* * visibilitymap_set - set a bit on a previously pinned page * * recptr is the LSN of the heap page. The LSN of the visibility map page is * advanced to that, to make sure that the visibility map doesn't get flushed * to disk before the update to the heap page that made all tuples visible. * * This is an opportunistic function. It does nothing, unless *buf * contains the bit for heapBlk. Call visibilitymap_pin first to pin * the right map page. This function doesn't do any I/O. */ void visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr, Buffer *buf) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk); Page page; char *map; #ifdef TRACE_VISIBILITYMAP elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); #endif /* Check that we have the right page pinned */ if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != mapBlock) return; page = BufferGetPage(*buf); map = PageGetContents(page); LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); if (!(map[mapByte] & (1 << mapBit))) { map[mapByte] |= (1 << mapBit); if (XLByteLT(PageGetLSN(page), recptr)) PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(*buf); } LockBuffer(*buf, BUFFER_LOCK_UNLOCK); }
static void spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record) { char *ptr = XLogRecGetData(record); spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr; OffsetNumber *toDelete; Buffer buffer; Page page; ptr += sizeof(spgxlogVacuumRoot); toDelete = (OffsetNumber *) ptr; if (!(record->xl_info & XLR_BKP_BLOCK_1)) { buffer = XLogReadBuffer(xldata->node, SPGIST_HEAD_BLKNO, false); if (BufferIsValid(buffer)) { page = BufferGetPage(buffer); if (!XLByteLE(lsn, PageGetLSN(page))) { /* The tuple numbers are in order */ PageIndexMultiDelete(page, toDelete, xldata->nDelete); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); } UnlockReleaseBuffer(buffer); } } }
/* * buffer must be pinned and locked by caller */ void gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer key) { Page page; Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); page = BufferGetPage(buffer); START_CRIT_SECTION(); GISTInitBuffer(buffer, 0); gistfillbuffer(r, page, itup, len, FirstOffsetNumber); MarkBufferDirty(buffer); if (!r->rd_istemp) { XLogRecPtr recptr; XLogRecData *rdata; rdata = formUpdateRdata(r, buffer, NULL, 0, itup, len, key); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_NEW_ROOT, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } else PageSetLSN(page, XLogRecPtrForTemp); END_CRIT_SECTION(); }
/* * _bitmap_log_newpage() -- log a new page. * * This function is called before writing a new buffer. */ void _bitmap_log_newpage(Relation rel, uint8 info, Buffer buf) { Page page; xl_bm_newpage xlNewPage; XLogRecPtr recptr; XLogRecData rdata[1]; page = BufferGetPage(buf); // Fetch gp_persistent_relation_node information that will be added to XLOG record. RelationFetchGpRelationNodeForXLog(rel); xlNewPage.bm_node = rel->rd_node; xlNewPage.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid; xlNewPage.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum; xlNewPage.bm_new_blkno = BufferGetBlockNumber(buf); elog(DEBUG1, "_bitmap_log_newpage: blkno=%d", xlNewPage.bm_new_blkno); rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *)&xlNewPage; rdata[0].len = sizeof(xl_bm_newpage); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, info, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); }
/* * _bitmap_log_metapage() -- log the changes to the metapage */ void _bitmap_log_metapage(Relation rel, Page page) { BMMetaPage metapage = (BMMetaPage) PageGetContents(page); xl_bm_metapage* xlMeta; XLogRecPtr recptr; XLogRecData rdata[1]; // Fetch gp_persistent_relation_node information that will be added to XLOG record. RelationFetchGpRelationNodeForXLog(rel); xlMeta = (xl_bm_metapage *) palloc(MAXALIGN(sizeof(xl_bm_metapage))); xlMeta->bm_node = rel->rd_node; xlMeta->bm_persistentTid = rel->rd_relationnodeinfo.persistentTid; xlMeta->bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum; xlMeta->bm_lov_heapId = metapage->bm_lov_heapId; xlMeta->bm_lov_indexId = metapage->bm_lov_indexId; xlMeta->bm_lov_lastpage = metapage->bm_lov_lastpage; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)xlMeta; rdata[0].len = MAXALIGN(sizeof(xl_bm_metapage)); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_META, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); pfree(xlMeta); }
/* * _bt_lognewpage() -- create an XLOG entry for a new page of the btree. */ void _bt_lognewpage(Relation index, Page newPage, BlockNumber blockNo) { /* We use the heap NEWPAGE record type for this */ xl_heap_newpage xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; /* NO ELOG(ERROR) from here till newpage op is logged */ START_CRIT_SECTION(); xl_heapnode_set(&xlrec.heapnode, index); xlrec.blkno = blockNo; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapNewpage; rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); rdata[1].data = (char *) newPage; rdata[1].len = BLCKSZ; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); PageSetLSN(newPage, recptr); PageSetTLI(newPage, ThisTimeLineID); END_CRIT_SECTION(); }
/* * _bitmap_log_lovitem() -- log adding a new lov item to a lov page. */ void _bitmap_log_lovitem(Relation rel, Buffer lovBuffer, OffsetNumber offset, BMLOVItem lovItem, Buffer metabuf, bool is_new_lov_blkno) { Page lovPage = BufferGetPage(lovBuffer); xl_bm_lovitem xlLovItem; XLogRecPtr recptr; XLogRecData rdata[1]; // Fetch gp_persistent_relation_node information that will be added to XLOG record. RelationFetchGpRelationNodeForXLog(rel); Assert(BufferGetBlockNumber(lovBuffer) > 0); xlLovItem.bm_node = rel->rd_node; xlLovItem.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid; xlLovItem.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum; xlLovItem.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); xlLovItem.bm_lov_offset = offset; memcpy(&(xlLovItem.bm_lovItem), lovItem, sizeof(BMLOVItemData)); xlLovItem.bm_is_new_lov_blkno = is_new_lov_blkno; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)&xlLovItem; rdata[0].len = sizeof(xl_bm_lovitem); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_LOVITEM, rdata); if (is_new_lov_blkno) { Page metapage = BufferGetPage(metabuf); PageSetLSN(metapage, recptr); PageSetTLI(metapage, ThisTimeLineID); } PageSetLSN(lovPage, recptr); PageSetTLI(lovPage, ThisTimeLineID); elog(DEBUG1, "Insert a new lovItem at (blockno, offset): (%d,%d)", BufferGetBlockNumber(lovBuffer), offset); }
/* * emit a completed btree page, and release the working storage. */ static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) { /* Ensure rd_smgr is open (could have been closed by relcache flush!) */ RelationOpenSmgr(wstate->index); /* XLOG stuff */ if (wstate->btws_use_wal) { /* We use the heap NEWPAGE record type for this */ log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page); } else { /* Leave the page LSN zero if not WAL-logged, but set TLI anyway */ PageSetTLI(page, ThisTimeLineID); } /* * If we have to write pages nonsequentially, fill in the space with * zeroes until we come back and overwrite. This is not logically * necessary on standard Unix filesystems (unwritten space will read as * zeroes anyway), but it should help to avoid fragmentation. The dummy * pages aren't WAL-logged though. */ while (blkno > wstate->btws_pages_written) { if (!wstate->btws_zeropage) wstate->btws_zeropage = (Page) palloc0(BLCKSZ); smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, wstate->btws_pages_written++, (char *) wstate->btws_zeropage, true); } /* * Now write the page. We say isTemp = true even if it's not a temp * index, because there's no need for smgr to schedule an fsync for this * write; we'll do it ourselves before ending the build. */ if (blkno == wstate->btws_pages_written) { /* extending the file... */ smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno, (char *) page, true); wstate->btws_pages_written++; } else { /* overwriting a block we zero-filled before */ smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno, (char *) page, true); } pfree(page); }
/* * Delete item(s) from a btree page. * * This must only be used for deleting leaf items. Deleting an item on a * non-leaf page has to be done as part of an atomic action that includes * deleting the page it points to. * * This routine assumes that the caller has pinned and locked the buffer, * and will write the buffer afterwards. Also, the given itemnos *must* * appear in increasing order in the array. */ void _bt_delitems(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems) { Page page = BufferGetPage(buf); /* No ereport(ERROR) until changes are logged */ START_CRIT_SECTION(); /* Fix the page */ PageIndexMultiDelete(page, itemnos, nitems); /* XLOG stuff */ if (!rel->rd_istemp) { xl_btree_delete xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; xlrec.node = rel->rd_node; xlrec.block = BufferGetBlockNumber(buf); rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeDelete; rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* * The target-offsets array is not in the buffer, but pretend that it * is. When XLogInsert stores the whole buffer, the offsets array * need not be stored too. */ if (nitems > 0) { rdata[1].data = (char *) itemnos; rdata[1].len = nitems * sizeof(OffsetNumber); } else { rdata[1].data = NULL; rdata[1].len = 0; } rdata[1].buffer = buf; rdata[1].buffer_std = true; rdata[1].next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); }
/* * _bitmap_log_lovmetapage() -- log the lov meta page. */ void _bitmap_log_lovmetapage(Relation rel, Buffer lovMetaBuffer, uint8 numOfAttrs) { Page lovMetapage; BMLOVMetaItem metaItems; lovMetapage = BufferGetPage(lovMetaBuffer); metaItems = (BMLOVMetaItem)PageGetContents(lovMetapage); /* XLOG stuff */ START_CRIT_SECTION(); if (!(rel->rd_istemp)) { BMLOVMetaItem copyMetaItems; XLogRecPtr recptr; XLogRecData rdata[1]; xl_bm_lovmetapage* xlLovMeta; #ifdef BM_DEBUG elog(LOG, "call _bitmap_log_lovmetapage: numOfAttrs=%d", numOfAttrs); #endif xlLovMeta = (xl_bm_lovmetapage*) palloc(sizeof(xl_bm_lovmetapage)+ numOfAttrs*sizeof(BMLOVMetaItemData)); xlLovMeta->bm_node = rel->rd_node; xlLovMeta->bm_num_of_attrs = numOfAttrs; copyMetaItems = (BMLOVMetaItem) (((char*)xlLovMeta) + sizeof(xl_bm_lovmetapage)); memcpy(copyMetaItems, metaItems, numOfAttrs*sizeof(BMLOVMetaItemData)); rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)xlLovMeta; rdata[0].len = sizeof(xl_bm_lovmetapage) + numOfAttrs*sizeof(BMLOVMetaItemData); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_LOVMETA, rdata); PageSetLSN(lovMetapage, recptr); PageSetTLI(lovMetapage, ThisTimeLineID); pfree(xlLovMeta); } END_CRIT_SECTION(); }
/* * Creates posting tree with one page. Function * suppose that items[] fits to page */ static BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems) { BlockNumber blkno; Buffer buffer = GinNewBuffer(index); Page page; START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF); page = BufferGetPage(buffer); blkno = BufferGetBlockNumber(buffer); memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems); GinPageGetOpaque(page)->maxoff = nitems; MarkBufferDirty(buffer); if (!index->rd_istemp) { XLogRecPtr recptr; XLogRecData rdata[2]; ginxlogCreatePostingTree data; data.node = index->rd_node; data.blkno = blkno; data.nitem = nitems; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &data; rdata[0].len = sizeof(ginxlogCreatePostingTree); rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) items; rdata[1].len = sizeof(ItemPointerData) * nitems; rdata[1].next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } UnlockReleaseBuffer(buffer); END_CRIT_SECTION(); return blkno; }
/* * _bt_metapinit() -- Initialize the metadata page of a new btree. * * Note: this is actually not used for standard btree index building; * nbtsort.c prefers not to make the metadata page valid until completion * of build. * * Note: there's no real need for any locking here. Since the transaction * creating the index hasn't committed yet, no one else can even see the index * much less be trying to use it. (In a REINDEX-in-place scenario, that's * not true, but we assume the caller holds sufficient locks on the index.) */ void _bt_metapinit(Relation rel) { Buffer buf; Page pg; BTMetaPageData *metad; if (RelationGetNumberOfBlocks(rel) != 0) elog(ERROR, "cannot initialize non-empty btree index \"%s\"", RelationGetRelationName(rel)); buf = ReadBuffer(rel, P_NEW); Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE); pg = BufferGetPage(buf); _bt_initmetapage(pg, P_NONE, 0); metad = BTPageGetMeta(pg); /* NO ELOG(ERROR) from here till newmeta op is logged */ START_CRIT_SECTION(); /* XLOG stuff */ if (!rel->rd_istemp) { xl_btree_newmeta xlrec; XLogRecPtr recptr; XLogRecData rdata[1]; xlrec.node = rel->rd_node; xlrec.meta.root = metad->btm_root; xlrec.meta.level = metad->btm_level; xlrec.meta.fastroot = metad->btm_fastroot; xlrec.meta.fastlevel = metad->btm_fastlevel; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeNewmeta; rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWMETA, rdata); PageSetLSN(pg, recptr); PageSetTLI(pg, ThisTimeLineID); } END_CRIT_SECTION(); WriteBuffer(buf); }
static void bitmap_xlog_insert_bitmap_lastwords(bool redo, XLogRecPtr lsn, XLogRecord* record) { xl_bm_bitmap_lastwords *xlrec = (xl_bm_bitmap_lastwords*) XLogRecGetData(record); Relation reln; reln = XLogOpenRelation(xlrec->bm_node); if (!RelationIsValid(reln)) return; if (redo) { Buffer lovBuffer; Page lovPage; BMLOVItem lovItem; #ifdef BM_DEBUG ereport(LOG, (errcode(LOG), errmsg("call bitmap_xlog_insert_bitmap_lastwords: redo=%d\n", redo))); #endif lovBuffer = XLogReadBuffer(false, reln, xlrec->bm_lov_blkno); if (!BufferIsValid(lovBuffer)) elog(PANIC, "bm_insert_redo: block unfound: %d", xlrec->bm_lov_blkno); lovPage = BufferGetPage(lovBuffer); if (XLByteLT(PageGetLSN(lovPage), lsn)) { lovItem = (BMLOVItem) PageGetItem(lovPage, PageGetItemId(lovPage, xlrec->bm_lov_offset)); lovItem->bm_last_compword = xlrec->bm_last_compword; lovItem->bm_last_word = xlrec->bm_last_word; lovItem->bm_last_two_headerbits = xlrec->bm_last_two_headerbits; PageSetLSN(lovPage, lsn); PageSetTLI(lovPage, ThisTimeLineID); _bitmap_wrtbuf(lovBuffer); } else _bitmap_relbuf(lovBuffer); } else elog(PANIC, "bm_insert_undo: not implemented."); }
/* * lazy_vacuum_page() -- free dead tuples on a page * and repair its fragmentation. * * Caller must hold pin and buffer cleanup lock on the buffer. * * tupindex is the index in vacrelstats->dead_tuples of the first dead * tuple for this page. We assume the rest follow sequentially. * The return value is the first tupindex after the tuples of this page. */ static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats) { OffsetNumber unused[MaxOffsetNumber]; int uncnt; Page page = BufferGetPage(buffer); ItemId itemid; MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; START_CRIT_SECTION(); for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) { BlockNumber tblk; OffsetNumber toff; tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); if (tblk != blkno) break; /* past end of tuples for this block */ toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); itemid = PageGetItemId(page, toff); itemid->lp_flags &= ~LP_USED; } uncnt = PageRepairFragmentation(page, unused); MarkBufferDirty(buffer); /* XLOG stuff */ if (!onerel->rd_istemp) { XLogRecPtr recptr; recptr = log_heap_clean(onerel, buffer, unused, uncnt); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } else { /* No XLOG record, but still need to flag that XID exists on disk */ MyXactMadeTempRelUpdate = true; } END_CRIT_SECTION(); return tupindex; }
/* * lazy_vacuum_page() -- free dead tuples on a page * and repair its fragmentation. * * Caller must hold pin and buffer cleanup lock on the buffer. * * tupindex is the index in vacrelstats->dead_tuples of the first dead * tuple for this page. We assume the rest follow sequentially. * The return value is the first tupindex after the tuples of this page. */ static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats) { Page page = BufferGetPage(buffer); OffsetNumber unused[MaxOffsetNumber]; int uncnt = 0; MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; START_CRIT_SECTION(); for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) { BlockNumber tblk; OffsetNumber toff; ItemId itemid; tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); if (tblk != blkno) break; /* past end of tuples for this block */ toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); itemid = PageGetItemId(page, toff); ItemIdSetUnused(itemid); unused[uncnt++] = toff; } PageRepairFragmentation(page); MarkBufferDirty(buffer); /* XLOG stuff */ if (!onerel->rd_istemp) { XLogRecPtr recptr; recptr = log_heap_clean(onerel, buffer, NULL, 0, NULL, 0, unused, uncnt, false); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); return tupindex; }
/* * Write the given statistics to the index's metapage * * Note: nPendingPages and ginVersion are *not* copied over */ void ginUpdateStats(Relation index, const GinStatsData *stats) { Buffer metabuffer; Page metapage; GinMetaPageData *metadata; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); LockBuffer(metabuffer, GIN_EXCLUSIVE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); START_CRIT_SECTION(); metadata->nTotalPages = stats->nTotalPages; metadata->nEntryPages = stats->nEntryPages; metadata->nDataPages = stats->nDataPages; metadata->nEntries = stats->nEntries; MarkBufferDirty(metabuffer); if (RelationNeedsWAL(index)) { XLogRecPtr recptr; ginxlogUpdateMeta data; XLogRecData rdata; data.node = index->rd_node; data.ntuples = 0; data.newRightlink = data.prevTail = InvalidBlockNumber; memcpy(&data.metadata, metadata, sizeof(GinMetaPageData)); rdata.buffer = InvalidBuffer; rdata.data = (char *) &data; rdata.len = sizeof(ginxlogUpdateMeta); rdata.next = NULL; recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata); PageSetLSN(metapage, recptr); PageSetTLI(metapage, ThisTimeLineID); } UnlockReleaseBuffer(metabuffer); END_CRIT_SECTION(); }
/* * visibilitymap_set - set a bit on a previously pinned page * * recptr is the LSN of the XLOG record we're replaying, if we're in recovery, * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the * one provided; in normal running, we generate a new XLOG record and set the * page LSN to that value. cutoff_xid is the largest xmin on the page being * marked all-visible; it is needed for Hot Standby, and can be * InvalidTransactionId if the page contains no tuples. * * You must pass a buffer containing the correct map page to this function. * Call visibilitymap_pin first to pin the right one. This function doesn't do * any I/O. */ void visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr, Buffer buf, TransactionId cutoff_xid) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk); Page page; char *map; #ifdef TRACE_VISIBILITYMAP elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); #endif Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); /* Check that we have the right page pinned */ if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock) elog(ERROR, "wrong buffer passed to visibilitymap_set"); page = BufferGetPage(buf); map = PageGetContents(page); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (!(map[mapByte] & (1 << mapBit))) { START_CRIT_SECTION(); map[mapByte] |= (1 << mapBit); MarkBufferDirty(buf); if (RelationNeedsWAL(rel)) { if (XLogRecPtrIsInvalid(recptr)) recptr = log_heap_visible(rel->rd_node, heapBlk, buf, cutoff_xid); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } END_CRIT_SECTION(); } LockBuffer(buf, BUFFER_LOCK_UNLOCK); }
static void gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); Buffer buffer; Page page; buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); GISTInitBuffer(buffer, F_LEAF); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
static void bitmap_xlog_insert_meta(bool redo, XLogRecPtr lsn, XLogRecord* record) { xl_bm_metapage *xlrec = (xl_bm_metapage*) XLogRecGetData(record); Relation reln; reln = XLogOpenRelation(xlrec->bm_node); if (!RelationIsValid(reln)) return; if (redo) { Buffer metabuf; BMMetaPage metapage; #ifdef BM_DEBUG ereport(LOG, (errcode(LOG), errmsg("call bitmap_xlog_insert_meta: redo=%d\n", redo))); #endif metabuf = XLogReadBuffer(false, reln, BM_METAPAGE); if (!BufferIsValid(metabuf)) elog(PANIC, "bm_insert_redo: block unfound: %d", BM_METAPAGE); /* restore the page */ metapage = (BMMetaPage)BufferGetPage(metabuf); if (XLByteLT(PageGetLSN(metapage), lsn)) { PageSetLSN(metapage, lsn); PageSetTLI(metapage, ThisTimeLineID); _bitmap_wrtbuf(metabuf); } else _bitmap_relbuf(metabuf); } else elog(PANIC, "bm_insert_undo: not implemented."); }
/* * _bitmap_log_lovitem() -- log adding a new lov item to a lov page. */ void _bitmap_log_lovitem(Relation rel, Buffer lovBuffer, bool isNewItem, OffsetNumber offset, BMLOVItem lovItem) { Page lovPage = BufferGetPage(lovBuffer); /* XLOG stuff */ START_CRIT_SECTION(); if (!(rel->rd_istemp)) { xl_bm_lovitem xlLovItem; XLogRecPtr recptr; XLogRecData rdata[1]; #ifdef BM_DEBUG elog(LOG, "call _bitmap_log_lovitem: blkno=%d, offset=%d, isNew=%d", BufferGetBlockNumber(lovBuffer), offset, isNewItem); #endif xlLovItem.bm_node = rel->rd_node; xlLovItem.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); xlLovItem.bm_isNewItem = isNewItem; xlLovItem.bm_lov_offset = offset; memcpy(&(xlLovItem.bm_lovItem), lovItem, sizeof(BMLOVItemData)); rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)&xlLovItem; rdata[0].len = sizeof(xl_bm_lovitem); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_LOVITEM, rdata); PageSetLSN(lovPage, recptr); PageSetTLI(lovPage, ThisTimeLineID); } END_CRIT_SECTION(); }
/* * _bitmap_log_bitmap_lastwords() -- log the last two words in a bitmap. */ void _bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer, OffsetNumber lovOffset, BMLOVItem lovItem) { /* XLOG stuff */ START_CRIT_SECTION(); if (!(rel->rd_istemp)) { xl_bm_bitmap_lastwords xlLastwords; XLogRecPtr recptr; XLogRecData rdata[1]; #ifdef BM_DEBUG elog(LOG, "call _bitmap_log_bitmap_lastwords: lov_blkno=%d, last_compword=%x, last_word=%x", BufferGetBlockNumber(lovBuffer), lovItem->bm_last_compword, lovItem->bm_last_word); #endif xlLastwords.bm_node = rel->rd_node; xlLastwords.bm_last_compword = lovItem->bm_last_compword; xlLastwords.bm_last_word = lovItem->bm_last_word; xlLastwords.bm_last_two_headerbits = lovItem->bm_last_two_headerbits; xlLastwords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer); xlLastwords.bm_lov_offset = lovOffset; rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)&xlLastwords; rdata[0].len = sizeof(xl_bm_bitmap_lastwords); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP_LASTWORDS, rdata); PageSetLSN(BufferGetPage(lovBuffer), recptr); PageSetTLI(BufferGetPage(lovBuffer), ThisTimeLineID); } END_CRIT_SECTION(); }
static void gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) { RelFileNode *node = (RelFileNode *) XLogRecGetData(record); Buffer buffer; Page page; /* Backup blocks are not used in create_index records */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); GISTInitBuffer(buffer, F_LEAF); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
/* * _bitmap_log_updateword() -- log updating a single word in a given * bitmap page. */ void _bitmap_log_updateword(Relation rel, Buffer bitmapBuffer, int word_no) { Page bitmapPage; BMBitmap bitmap; xl_bm_updateword xlBitmapWord; XLogRecPtr recptr; XLogRecData rdata[1]; // Fetch gp_persistent_relation_node information that will be added to XLOG record. RelationFetchGpRelationNodeForXLog(rel); bitmapPage = BufferGetPage(bitmapBuffer); bitmap = (BMBitmap) PageGetContentsMaxAligned(bitmapPage); xlBitmapWord.bm_node = rel->rd_node; xlBitmapWord.bm_persistentTid = rel->rd_relationnodeinfo.persistentTid; xlBitmapWord.bm_persistentSerialNum = rel->rd_relationnodeinfo.persistentSerialNum; xlBitmapWord.bm_blkno = BufferGetBlockNumber(bitmapBuffer); xlBitmapWord.bm_word_no = word_no; xlBitmapWord.bm_cword = bitmap->cwords[word_no]; xlBitmapWord.bm_hword = bitmap->hwords[word_no/BM_HRL_WORD_SIZE]; elog(DEBUG1, "_bitmap_log_updateword: (blkno, word_no, cword, hword)=" "(%d, %d, " INT64_FORMAT ", " INT64_FORMAT ")", xlBitmapWord.bm_blkno, xlBitmapWord.bm_word_no, xlBitmapWord.bm_cword, xlBitmapWord.bm_hword); rdata[0].buffer = InvalidBuffer; rdata[0].data = (char*)&xlBitmapWord; rdata[0].len = sizeof(xl_bm_updateword); rdata[0].next = NULL; recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_UPDATEWORD, rdata); PageSetLSN(bitmapPage, recptr); PageSetTLI(bitmapPage, ThisTimeLineID); }
static void gistRedoPageDeleteRecord(XLogRecPtr lsn, XLogRecord *record) { gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record); Buffer buffer; Page page; /* nothing else to do if page was backed up (and no info to do it with) */ if (record->xl_info & XLR_BKP_BLOCK_1) return; buffer = XLogReadBuffer(xldata->node, xldata->blkno, false); if (!BufferIsValid(buffer)) return; page = (Page) BufferGetPage(buffer); GistPageSetDeleted(page); PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); }
/* * lazy_scan_heap() -- scan an open heap relation * * This routine sets commit status bits, builds lists of dead tuples * and pages with free space, and calculates statistics on the number * of live tuples in the heap. When done, or when we run low on space * for dead-tuple TIDs, invoke vacuuming of indexes and heap. * * If there are no indexes then we just vacuum each dirty page as we * process it, since there's no point in gathering many tuples. */ static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool scan_all) { BlockNumber nblocks, blkno; HeapTupleData tuple; char *relname; BlockNumber empty_pages, vacuumed_pages; double num_tuples, tups_vacuumed, nkeep, nunused; IndexBulkDeleteResult **indstats; int i; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; BlockNumber next_not_all_visible_block; bool skipping_all_visible_blocks; pg_rusage_init(&ru0); relname = RelationGetRelationName(onerel); ereport(elevel, (errmsg("vacuuming \"%s.%s\"", get_namespace_name(RelationGetNamespace(onerel)), relname))); empty_pages = vacuumed_pages = 0; num_tuples = tups_vacuumed = nkeep = nunused = 0; indstats = (IndexBulkDeleteResult **) palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); nblocks = RelationGetNumberOfBlocks(onerel); vacrelstats->rel_pages = nblocks; vacrelstats->scanned_pages = 0; vacrelstats->nonempty_pages = 0; vacrelstats->latestRemovedXid = InvalidTransactionId; lazy_space_alloc(vacrelstats, nblocks); /* * We want to skip pages that don't require vacuuming according to the * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD * consecutive pages. Since we're reading sequentially, the OS should be * doing readahead for us, so there's no gain in skipping a page now and * then; that's likely to disable readahead and so be counterproductive. * Also, skipping even a single page means that we can't update * relfrozenxid, so we only want to do it if we can skip a goodly number * of pages. * * Before entering the main loop, establish the invariant that * next_not_all_visible_block is the next block number >= blkno that's not * all-visible according to the visibility map, or nblocks if there's no * such block. Also, we set up the skipping_all_visible_blocks flag, * which is needed because we need hysteresis in the decision: once we've * started skipping blocks, we may as well skip everything up to the next * not-all-visible block. * * Note: if scan_all is true, we won't actually skip any pages; but we * maintain next_not_all_visible_block anyway, so as to set up the * all_visible_according_to_vm flag correctly for each page. */ for (next_not_all_visible_block = 0; next_not_all_visible_block < nblocks; next_not_all_visible_block++) { if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer)) break; vacuum_delay_point(); } if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD) skipping_all_visible_blocks = true; else skipping_all_visible_blocks = false; for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; Page page; OffsetNumber offnum, maxoff; bool tupgone, hastup; int prev_dead_count; OffsetNumber frozen[MaxOffsetNumber]; int nfrozen; Size freespace; bool all_visible_according_to_vm; bool all_visible; bool has_dead_tuples; if (blkno == next_not_all_visible_block) { /* Time to advance next_not_all_visible_block */ for (next_not_all_visible_block++; next_not_all_visible_block < nblocks; next_not_all_visible_block++) { if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer)) break; vacuum_delay_point(); } /* * We know we can't skip the current block. But set up * skipping_all_visible_blocks to do the right thing at the * following blocks. */ if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD) skipping_all_visible_blocks = true; else skipping_all_visible_blocks = false; all_visible_according_to_vm = false; } else { /* Current block is all-visible */ if (skipping_all_visible_blocks && !scan_all) continue; all_visible_according_to_vm = true; } vacuum_delay_point(); vacrelstats->scanned_pages++; /* * If we are close to overrunning the available space for dead-tuple * TIDs, pause and do a cycle of vacuuming before we tackle this page. */ if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage && vacrelstats->num_dead_tuples > 0) { /* Log cleanup info before we touch indexes */ vacuum_log_cleanup_info(onerel, vacrelstats); /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); /* * Forget the now-vacuumed tuples, and press on, but be careful * not to reset latestRemovedXid since we want that value to be * valid. */ vacrelstats->num_dead_tuples = 0; vacrelstats->num_index_scans++; } buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); /* We need buffer cleanup lock so that we can prune HOT chains. */ LockBufferForCleanup(buf); page = BufferGetPage(buf); if (PageIsNew(page)) { /* * An all-zeroes page could be left over if a backend extends the * relation but crashes before initializing the page. Reclaim such * pages for use. * * We have to be careful here because we could be looking at a * page that someone has just added to the relation and not yet * been able to initialize (see RelationGetBufferForTuple). To * protect against that, release the buffer lock, grab the * relation extension lock momentarily, and re-lock the buffer. If * the page is still uninitialized by then, it must be left over * from a crashed backend, and we can initialize it. * * We don't really need the relation lock when this is a new or * temp relation, but it's probably not worth the code space to * check that, since this surely isn't a critical path. * * Note: the comparable code in vacuum.c need not worry because * it's got exclusive lock on the whole relation. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); LockRelationForExtension(onerel, ExclusiveLock); UnlockRelationForExtension(onerel, ExclusiveLock); LockBufferForCleanup(buf); if (PageIsNew(page)) { ereport(WARNING, (errmsg("relation \"%s\" page %u is uninitialized --- fixing", relname, blkno))); PageInit(page, BufferGetPageSize(buf), 0); empty_pages++; } freespace = PageGetHeapFreeSpace(page); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, blkno, freespace); continue; } if (PageIsEmpty(page)) { empty_pages++; freespace = PageGetHeapFreeSpace(page); if (!PageIsAllVisible(page)) { PageSetAllVisible(page); SetBufferCommitInfoNeedsSave(buf); } LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* Update the visibility map */ if (!all_visible_according_to_vm) { visibilitymap_pin(onerel, blkno, &vmbuffer); LockBuffer(buf, BUFFER_LOCK_SHARE); if (PageIsAllVisible(page)) visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } ReleaseBuffer(buf); RecordPageWithFreeSpace(onerel, blkno, freespace); continue; } /* * Prune all HOT-update chains in this page. * * We count tuples removed by the pruning step as removed by VACUUM. */ tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false, &vacrelstats->latestRemovedXid); /* * Now scan the page to collect vacuumable items and check for tuples * requiring freezing. */ all_visible = true; has_dead_tuples = false; nfrozen = 0; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId itemid; itemid = PageGetItemId(page, offnum); /* Unused items require no processing, but we count 'em */ if (!ItemIdIsUsed(itemid)) { nunused += 1; continue; } /* Redirect items mustn't be touched */ if (ItemIdIsRedirected(itemid)) { hastup = true; /* this page won't be truncatable */ continue; } ItemPointerSet(&(tuple.t_self), blkno, offnum); /* * DEAD item pointers are to be vacuumed normally; but we don't * count them in tups_vacuumed, else we'd be double-counting (at * least in the common case where heap_page_prune() just freed up * a non-HOT tuple). */ if (ItemIdIsDead(itemid)) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); all_visible = false; continue; } Assert(ItemIdIsNormal(itemid)); tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tupgone = false; switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) { case HEAPTUPLE_DEAD: /* * Ordinarily, DEAD tuples would have been removed by * heap_page_prune(), but it's possible that the tuple * state changed since heap_page_prune() looked. In * particular an INSERT_IN_PROGRESS tuple could have * changed to DEAD if the inserter aborted. So this * cannot be considered an error condition. * * If the tuple is HOT-updated then it must only be * removed by a prune operation; so we keep it just as if * it were RECENTLY_DEAD. Also, if it's a heap-only * tuple, we choose to keep it, because it'll be a lot * cheaper to get rid of it in the next pruning pass than * to treat it like an indexed tuple. */ if (HeapTupleIsHotUpdated(&tuple) || HeapTupleIsHeapOnly(&tuple)) nkeep += 1; else tupgone = true; /* we can delete the tuple */ all_visible = false; break; case HEAPTUPLE_LIVE: /* Tuple is good --- but let's do some validity checks */ if (onerel->rd_rel->relhasoids && !OidIsValid(HeapTupleGetOid(&tuple))) elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", relname, blkno, offnum); /* * Is the tuple definitely visible to all transactions? * * NB: Like with per-tuple hint bits, we can't set the * PD_ALL_VISIBLE flag if the inserter committed * asynchronously. See SetHintBits for more info. Check * that the HEAP_XMIN_COMMITTED hint bit is set because of * that. */ if (all_visible) { TransactionId xmin; if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)) { all_visible = false; break; } /* * The inserter definitely committed. But is it old * enough that everyone sees it as committed? */ xmin = HeapTupleHeaderGetXmin(tuple.t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) { all_visible = false; break; } } break; case HEAPTUPLE_RECENTLY_DEAD: /* * If tuple is recently deleted then we must not remove it * from relation. */ nkeep += 1; all_visible = false; break; case HEAPTUPLE_INSERT_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ all_visible = false; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ all_visible = false; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); break; } if (tupgone) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, &vacrelstats->latestRemovedXid); tups_vacuumed += 1; has_dead_tuples = true; } else { num_tuples += 1; hastup = true; /* * Each non-removable tuple must be checked to see if it needs * freezing. Note we already have exclusive buffer lock. */ if (heap_freeze_tuple(tuple.t_data, FreezeLimit, InvalidBuffer)) frozen[nfrozen++] = offnum; } } /* scan along page */ /* * If we froze any tuples, mark the buffer dirty, and write a WAL * record recording the changes. We must log the changes to be * crash-safe against future truncation of CLOG. */ if (nfrozen > 0) { MarkBufferDirty(buf); if (RelationNeedsWAL(onerel)) { XLogRecPtr recptr; recptr = log_heap_freeze(onerel, buf, FreezeLimit, frozen, nfrozen); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } } /* * If there are no indexes then we can vacuum the page right now * instead of doing a second scan. */ if (nindexes == 0 && vacrelstats->num_dead_tuples > 0) { /* Remove tuples from heap */ lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats); /* * Forget the now-vacuumed tuples, and press on, but be careful * not to reset latestRemovedXid since we want that value to be * valid. */ vacrelstats->num_dead_tuples = 0; vacuumed_pages++; } freespace = PageGetHeapFreeSpace(page); /* Update the all-visible flag on the page */ if (!PageIsAllVisible(page) && all_visible) { PageSetAllVisible(page); SetBufferCommitInfoNeedsSave(buf); } /* * It's possible for the value returned by GetOldestXmin() to move * backwards, so it's not wrong for us to see tuples that appear to * not be visible to everyone yet, while PD_ALL_VISIBLE is already * set. The real safe xmin value never moves backwards, but * GetOldestXmin() is conservative and sometimes returns a value * that's unnecessarily small, so if we see that contradiction it just * means that the tuples that we think are not visible to everyone yet * actually are, and the PD_ALL_VISIBLE flag is correct. * * There should never be dead tuples on a page with PD_ALL_VISIBLE * set, however. */ else if (PageIsAllVisible(page) && has_dead_tuples) { elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", relname, blkno); PageClearAllVisible(page); SetBufferCommitInfoNeedsSave(buf); /* * Normally, we would drop the lock on the heap page before * updating the visibility map, but since this case shouldn't * happen anyway, don't worry about that. */ visibilitymap_clear(onerel, blkno); } LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* Update the visibility map */ if (!all_visible_according_to_vm && all_visible) { visibilitymap_pin(onerel, blkno, &vmbuffer); LockBuffer(buf, BUFFER_LOCK_SHARE); if (PageIsAllVisible(page)) visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } ReleaseBuffer(buf); /* Remember the location of the last page with nonremovable tuples */ if (hastup) vacrelstats->nonempty_pages = blkno + 1; /* * If we remembered any tuples for deletion, then the page will be * visited again by lazy_vacuum_heap, which will compute and record * its post-compaction free space. If not, then we're done with this * page, so remember its free space as-is. (This path will always be * taken if there are no indexes.) */ if (vacrelstats->num_dead_tuples == prev_dead_count) RecordPageWithFreeSpace(onerel, blkno, freespace); } /* save stats for use later */ vacrelstats->scanned_tuples = num_tuples; vacrelstats->tuples_deleted = tups_vacuumed; /* now we can compute the new value for pg_class.reltuples */ vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false, nblocks, vacrelstats->scanned_pages, num_tuples); /* If any tuples need to be deleted, perform final vacuum cycle */ /* XXX put a threshold on min number of tuples here? */ if (vacrelstats->num_dead_tuples > 0) { /* Log cleanup info before we touch indexes */ vacuum_log_cleanup_info(onerel, vacrelstats); /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); vacrelstats->num_index_scans++; } /* Release the pin on the visibility map page */ if (BufferIsValid(vmbuffer)) { ReleaseBuffer(vmbuffer); vmbuffer = InvalidBuffer; } /* Do post-vacuum cleanup and statistics update for each index */ for (i = 0; i < nindexes; i++) lazy_cleanup_index(Irel[i], indstats[i], vacrelstats); /* If no indexes, make log report that lazy_vacuum_heap would've made */ if (vacuumed_pages) ereport(elevel, (errmsg("\"%s\": removed %.0f row versions in %u pages", RelationGetRelationName(onerel), tups_vacuumed, vacuumed_pages))); ereport(elevel, (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages", RelationGetRelationName(onerel), tups_vacuumed, num_tuples, vacrelstats->scanned_pages, nblocks), errdetail("%.0f dead row versions cannot be removed yet.\n" "There were %.0f unused item pointers.\n" "%u pages are entirely empty.\n" "%s.", nkeep, nunused, empty_pages, pg_rusage_show(&ru0)))); }