/* * brin_vacuum_scan * Do a complete scan of the index during VACUUM. * * This routine scans the complete index looking for uncatalogued index pages, * i.e. those that might have been lost due to a crash after index extension * and such. */ static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy) { bool vacuum_fsm = false; BlockNumber blkno; /* * Scan the index in physical order, and clean up any possible mess in * each page. */ for (blkno = 0; blkno < RelationGetNumberOfBlocks(idxrel); blkno++) { Buffer buf; CHECK_FOR_INTERRUPTS(); buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno, RBM_NORMAL, strategy); vacuum_fsm |= brin_page_cleanup(idxrel, buf); ReleaseBuffer(buf); } /* * If we made any change to the FSM, make sure the new info is visible all * the way to the top. */ if (vacuum_fsm) FreeSpaceMapVacuum(idxrel); }
/* * brin_vacuum_scan * Do a complete scan of the index during VACUUM. * * This routine scans the complete index looking for uncatalogued index pages, * i.e. those that might have been lost due to a crash after index extension * and such. */ static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy) { BlockNumber nblocks; BlockNumber blkno; /* * Scan the index in physical order, and clean up any possible mess in * each page. */ nblocks = RelationGetNumberOfBlocks(idxrel); for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; CHECK_FOR_INTERRUPTS(); buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno, RBM_NORMAL, strategy); brin_page_cleanup(idxrel, buf); ReleaseBuffer(buf); } /* * Update all upper pages in the index's FSM, as well. This ensures not * only that we propagate leaf-page FSM updates made by brin_page_cleanup, * but also that any pre-existing damage or out-of-dateness is repaired. */ FreeSpaceMapVacuum(idxrel); }
/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its relpages and reltuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. */ void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) { LVRelStats *vacrelstats; Relation *Irel; int nindexes; BlockNumber possibly_freeable; PGRUsage ru0; TimestampTz starttime = 0; bool scan_all; TransactionId freezeTableLimit; pg_rusage_init(&ru0); /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0) starttime = GetCurrentTimestamp(); if (vacstmt->options & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; vac_strategy = bstrategy; vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit, &freezeTableLimit); scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid, freezeTableLimit); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); vacrelstats->hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable > 0 && (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)) lazy_truncate_heap(onerel, vacrelstats); /* Vacuum the Free Space Map */ FreeSpaceMapVacuum(onerel); /* * Update statistics in pg_class. But don't change relfrozenxid if we * skipped any pages. */ vac_update_relstats(onerel, vacrelstats->rel_pages, vacrelstats->new_rel_tuples, vacrelstats->hasindex, (vacrelstats->scanned_pages < vacrelstats->rel_pages) ? InvalidTransactionId : FreezeLimit); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, vacrelstats->new_rel_tuples); /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { if (Log_autovacuum_min_duration == 0 || TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(), Log_autovacuum_min_duration)) ereport(LOG, (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n" "pages: %d removed, %d remain\n" "tuples: %.0f removed, %.0f remain\n" "system usage: %s", get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel), vacrelstats->num_index_scans, vacrelstats->pages_removed, vacrelstats->rel_pages, vacrelstats->tuples_deleted, vacrelstats->new_rel_tuples, pg_rusage_show(&ru0)))); } }
/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its relpages and reltuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. */ void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) { LVRelStats *vacrelstats; Relation *Irel; int nindexes; BlockNumber possibly_freeable; PGRUsage ru0; TimestampTz starttime = 0; long secs; int usecs; double read_rate, write_rate; bool scan_all; TransactionId freezeTableLimit; BlockNumber new_rel_pages; double new_rel_tuples; BlockNumber new_rel_allvisible; TransactionId new_frozen_xid; /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { pg_rusage_init(&ru0); starttime = GetCurrentTimestamp(); } if (vacstmt->options & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; vac_strategy = bstrategy; vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit, &freezeTableLimit); scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid, freezeTableLimit); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats->old_rel_pages = onerel->rd_rel->relpages; vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); vacrelstats->hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable > 0 && (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)) lazy_truncate_heap(onerel, vacrelstats); /* Vacuum the Free Space Map */ FreeSpaceMapVacuum(onerel); /* * Update statistics in pg_class. * * A corner case here is that if we scanned no pages at all because every * page is all-visible, we should not update relpages/reltuples, because * we have no new information to contribute. In particular this keeps * us from replacing relpages=reltuples=0 (which means "unknown tuple * density") with nonzero relpages and reltuples=0 (which means "zero * tuple density") unless there's some actual evidence for the latter. * * We do update relallvisible even in the corner case, since if the * table is all-visible we'd definitely like to know that. But clamp * the value to be not more than what we're setting relpages to. * * Also, don't change relfrozenxid if we skipped any pages, since then * we don't know for certain that all tuples have a newer xmin. */ new_rel_pages = vacrelstats->rel_pages; new_rel_tuples = vacrelstats->new_rel_tuples; if (vacrelstats->scanned_pages == 0 && new_rel_pages > 0) { new_rel_pages = vacrelstats->old_rel_pages; new_rel_tuples = vacrelstats->old_rel_tuples; } new_rel_allvisible = visibilitymap_count(onerel); if (new_rel_allvisible > new_rel_pages) new_rel_allvisible = new_rel_pages; new_frozen_xid = FreezeLimit; if (vacrelstats->scanned_pages < vacrelstats->rel_pages) new_frozen_xid = InvalidTransactionId; vac_update_relstats(onerel, new_rel_pages, new_rel_tuples, new_rel_allvisible, vacrelstats->hasindex, new_frozen_xid); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, new_rel_tuples); /* and log the action if appropriate */ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) { TimestampTz endtime = GetCurrentTimestamp(); if (Log_autovacuum_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, Log_autovacuum_min_duration)) { TimestampDifference(starttime, endtime, &secs, &usecs); read_rate = 0; write_rate = 0; if ((secs > 0) || (usecs > 0)) { read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) / (secs + usecs / 1000000.0); write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) / (secs + usecs / 1000000.0); } ereport(LOG, (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n" "pages: %d removed, %d remain\n" "tuples: %.0f removed, %.0f remain\n" "buffer usage: %d hits, %d misses, %d dirtied\n" "avg read rate: %.3f MiB/s, avg write rate: %.3f MiB/s\n" "system usage: %s", get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel), vacrelstats->num_index_scans, vacrelstats->pages_removed, vacrelstats->rel_pages, vacrelstats->tuples_deleted, vacrelstats->new_rel_tuples, VacuumPageHit, VacuumPageMiss, VacuumPageDirty, read_rate,write_rate, pg_rusage_show(&ru0)))); } } }
/* * IndexFreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM */ void IndexFreeSpaceMapVacuum(Relation rel) { FreeSpaceMapVacuum(rel); }
/* * Update tuple origtup (size origsz), located in offset oldoff of buffer * oldbuf, to newtup (size newsz) as summary tuple for the page range starting * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit. * * If samepage is true, attempt to put the new tuple in the same page, but if * there's no room, use some other one. * * If the update is successful, return true; the revmap is updated to point to * the new tuple. If the update is not done for whatever reason, return false. * Caller may retry the update if this happens. */ bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage) { Page oldpage; ItemId oldlp; BrinTuple *oldtup; Size oldsz; Buffer newbuf; bool extended; Assert(newsz == MAXALIGN(newsz)); /* If the item is oversized, don't bother. */ if (newsz > BrinMaxItemSize) { ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %lu exceeds maximum %lu for index \"%s\"", (unsigned long) newsz, (unsigned long) BrinMaxItemSize, RelationGetRelationName(idxrel)))); return false; /* keep compiler quiet */ } /* make sure the revmap is long enough to contain the entry we need */ brinRevmapExtend(revmap, heapBlk); if (!samepage) { /* need a page on which to put the item */ newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended); if (!BufferIsValid(newbuf)) { Assert(!extended); return false; } /* * Note: it's possible (though unlikely) that the returned newbuf is * the same as oldbuf, if brin_getinsertbuffer determined that the old * buffer does in fact have enough space. */ if (newbuf == oldbuf) { Assert(!extended); newbuf = InvalidBuffer; } } else { LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE); newbuf = InvalidBuffer; extended = false; } oldpage = BufferGetPage(oldbuf); oldlp = PageGetItemId(oldpage, oldoff); /* * Check that the old tuple wasn't updated concurrently: it might have * moved someplace else entirely ... */ if (!ItemIdIsNormal(oldlp)) { LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); /* * If this happens, and the new buffer was obtained by extending the * relation, then we need to ensure we don't leave it uninitialized or * forget about it. */ if (BufferIsValid(newbuf)) { if (extended) brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); if (extended) FreeSpaceMapVacuum(idxrel); } return false; } oldsz = ItemIdGetLength(oldlp); oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp); /* * ... or it might have been updated in place to different contents. */ if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz)) { LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); if (BufferIsValid(newbuf)) { if (extended) brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); if (extended) FreeSpaceMapVacuum(idxrel); } return false; } /* * Great, the old tuple is intact. We can proceed with the update. * * If there's enough room in the old page for the new tuple, replace it. * * Note that there might now be enough space on the page even though the * caller told us there isn't, if a concurrent update moved another tuple * elsewhere or replaced a tuple with a smaller one. */ if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) && brin_can_do_samepage_update(oldbuf, origsz, newsz)) { if (BufferIsValid(newbuf)) { /* as above */ if (extended) brin_initialize_empty_new_buffer(idxrel, newbuf); UnlockReleaseBuffer(newbuf); } START_CRIT_SECTION(); if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) newtup, newsz)) elog(ERROR, "failed to replace BRIN tuple"); MarkBufferDirty(oldbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_samepage_update xlrec; XLogRecPtr recptr; uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE; xlrec.offnum = oldoff; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate); XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) newtup, newsz); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(oldpage, recptr); } END_CRIT_SECTION(); LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); if (extended) FreeSpaceMapVacuum(idxrel); return true; } else if (newbuf == InvalidBuffer) { /* * Not enough space, but caller said that there was. Tell them to * start over. */ LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); return false; } else { /* * Not enough free space on the oldpage. Put the new tuple on the new * page, and update the revmap. */ Page newpage = BufferGetPage(newbuf); Buffer revmapbuf; ItemPointerData newtid; OffsetNumber newoff; BlockNumber newblk = InvalidBlockNumber; Size freespace = 0; revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk); START_CRIT_SECTION(); /* * We need to initialize the page if it's newly obtained. Note we * will WAL-log the initialization as part of the update, so we don't * need to do that here. */ if (extended) brin_page_init(BufferGetPage(newbuf), BRIN_PAGETYPE_REGULAR); PageIndexTupleDeleteNoCompact(oldpage, oldoff); newoff = PageAddItem(newpage, (Item) newtup, newsz, InvalidOffsetNumber, false, false); if (newoff == InvalidOffsetNumber) elog(ERROR, "failed to add BRIN tuple to new page"); MarkBufferDirty(oldbuf); MarkBufferDirty(newbuf); /* needed to update FSM below */ if (extended) { newblk = BufferGetBlockNumber(newbuf); freespace = br_page_get_freespace(newpage); } ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid); MarkBufferDirty(revmapbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_update xlrec; XLogRecPtr recptr; uint8 info; info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0); xlrec.insert.offnum = newoff; xlrec.insert.heapBlk = heapBlk; xlrec.insert.pagesPerRange = pagesPerRange; xlrec.oldOffnum = oldoff; XLogBeginInsert(); /* new page */ XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate); XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0)); XLogRegisterBufData(0, (char *) newtup, newsz); /* revmap page */ XLogRegisterBuffer(1, revmapbuf, 0); /* old page */ XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(oldpage, recptr); PageSetLSN(newpage, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); } END_CRIT_SECTION(); LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK); LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); UnlockReleaseBuffer(newbuf); if (extended) { Assert(BlockNumberIsValid(newblk)); RecordPageWithFreeSpace(idxrel, newblk, freespace); FreeSpaceMapVacuum(idxrel); } return true; } }
/* * Insert an index tuple into the index relation. The revmap is updated to * mark the range containing the given page as pointing to the inserted entry. * A WAL record is written. * * The buffer, if valid, is first checked for free space to insert the new * entry; if there isn't enough, a new buffer is obtained and pinned. No * buffer lock must be held on entry, no buffer lock is held on exit. * * Return value is the offset number where the tuple was inserted. */ OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz) { Page page; BlockNumber blk; OffsetNumber off; Buffer revmapbuf; ItemPointerData tid; bool extended; Assert(itemsz == MAXALIGN(itemsz)); /* If the item is oversized, don't even bother. */ if (itemsz > BrinMaxItemSize) { ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %lu exceeds maximum %lu for index \"%s\"", (unsigned long) itemsz, (unsigned long) BrinMaxItemSize, RelationGetRelationName(idxrel)))); return InvalidOffsetNumber; /* keep compiler quiet */ } /* Make sure the revmap is long enough to contain the entry we need */ brinRevmapExtend(revmap, heapBlk); /* * Acquire lock on buffer supplied by caller, if any. If it doesn't have * enough space, unpin it to obtain a new one below. */ if (BufferIsValid(*buffer)) { /* * It's possible that another backend (or ourselves!) extended the * revmap over the page we held a pin on, so we cannot assume that * it's still a regular page. */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz) { UnlockReleaseBuffer(*buffer); *buffer = InvalidBuffer; } } /* * If we still don't have a usable buffer, have brin_getinsertbuffer * obtain one for us. */ if (!BufferIsValid(*buffer)) { do *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended); while (!BufferIsValid(*buffer)); } else extended = false; /* Now obtain lock on revmap buffer */ revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk); page = BufferGetPage(*buffer); blk = BufferGetBlockNumber(*buffer); /* Execute the actual insertion */ START_CRIT_SECTION(); if (extended) brin_page_init(BufferGetPage(*buffer), BRIN_PAGETYPE_REGULAR); off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber, false, false); if (off == InvalidOffsetNumber) elog(ERROR, "could not insert new index tuple to page"); MarkBufferDirty(*buffer); BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u", blk, off, heapBlk)); ItemPointerSet(&tid, blk, off); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid); MarkBufferDirty(revmapbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_insert xlrec; XLogRecPtr recptr; uint8 info; info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0); xlrec.heapBlk = heapBlk; xlrec.pagesPerRange = pagesPerRange; xlrec.offnum = off; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinInsert); XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0)); XLogRegisterBufData(0, (char *) tup, itemsz); XLogRegisterBuffer(1, revmapbuf, 0); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); } END_CRIT_SECTION(); /* Tuple is firmly on buffer; we can release our locks */ LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK); if (extended) FreeSpaceMapVacuum(idxrel); return off; }
/* * Update tuple origtup (size origsz), located in offset oldoff of buffer * oldbuf, to newtup (size newsz) as summary tuple for the page range starting * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit. * * If samepage is true, attempt to put the new tuple in the same page, but if * there's no room, use some other one. * * If the update is successful, return true; the revmap is updated to point to * the new tuple. If the update is not done for whatever reason, return false. * Caller may retry the update if this happens. */ bool brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, BlockNumber heapBlk, Buffer oldbuf, OffsetNumber oldoff, const BrinTuple *origtup, Size origsz, const BrinTuple *newtup, Size newsz, bool samepage) { Page oldpage; ItemId oldlp; BrinTuple *oldtup; Size oldsz; Buffer newbuf; BrinSpecialSpace *special; bool extended = false; newsz = MAXALIGN(newsz); /* make sure the revmap is long enough to contain the entry we need */ brinRevmapExtend(revmap, heapBlk); if (!samepage) { /* need a page on which to put the item */ newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended); /* XXX delay vacuuming FSM until locks are released? */ if (extended) FreeSpaceMapVacuum(idxrel); if (!BufferIsValid(newbuf)) return false; /* * Note: it's possible (though unlikely) that the returned newbuf is * the same as oldbuf, if brin_getinsertbuffer determined that the old * buffer does in fact have enough space. */ if (newbuf == oldbuf) newbuf = InvalidBuffer; } else { LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE); newbuf = InvalidBuffer; } oldpage = BufferGetPage(oldbuf); oldlp = PageGetItemId(oldpage, oldoff); /* * Check that the old tuple wasn't updated concurrently: it might have * moved someplace else entirely ... */ if (!ItemIdIsNormal(oldlp)) { LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); if (BufferIsValid(newbuf)) UnlockReleaseBuffer(newbuf); return false; } oldsz = ItemIdGetLength(oldlp); oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp); /* * ... or it might have been updated in place to different contents. */ if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz)) { LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); if (BufferIsValid(newbuf)) UnlockReleaseBuffer(newbuf); return false; } special = (BrinSpecialSpace *) PageGetSpecialPointer(oldpage); /* * Great, the old tuple is intact. We can proceed with the update. * * If there's enough room in the old page for the new tuple, replace it. * * Note that there might now be enough space on the page even though the * caller told us there isn't, if a concurrent update moved another tuple * elsewhere or replaced a tuple with a smaller one. */ if (((special->flags & BRIN_EVACUATE_PAGE) == 0) && brin_can_do_samepage_update(oldbuf, origsz, newsz)) { if (BufferIsValid(newbuf)) UnlockReleaseBuffer(newbuf); START_CRIT_SECTION(); PageIndexDeleteNoCompact(oldpage, &oldoff, 1); if (PageAddItem(oldpage, (Item) newtup, newsz, oldoff, true, false) == InvalidOffsetNumber) elog(ERROR, "failed to add BRIN tuple"); MarkBufferDirty(oldbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_samepage_update xlrec; XLogRecPtr recptr; uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE; xlrec.offnum = oldoff; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate); XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) newtup, newsz); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(oldpage, recptr); } END_CRIT_SECTION(); LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); return true; } else if (newbuf == InvalidBuffer) { /* * Not enough space, but caller said that there was. Tell them to * start over. */ LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); return false; } else { /* * Not enough free space on the oldpage. Put the new tuple on the new * page, and update the revmap. */ Page newpage = BufferGetPage(newbuf); Buffer revmapbuf; ItemPointerData newtid; OffsetNumber newoff; revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk); START_CRIT_SECTION(); PageIndexDeleteNoCompact(oldpage, &oldoff, 1); newoff = PageAddItem(newpage, (Item) newtup, newsz, InvalidOffsetNumber, false, false); if (newoff == InvalidOffsetNumber) elog(ERROR, "failed to add BRIN tuple to new page"); MarkBufferDirty(oldbuf); MarkBufferDirty(newbuf); ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid); MarkBufferDirty(revmapbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_update xlrec; XLogRecPtr recptr; uint8 info; info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0); xlrec.insert.offnum = newoff; xlrec.insert.heapBlk = heapBlk; xlrec.insert.pagesPerRange = pagesPerRange; xlrec.oldOffnum = oldoff; XLogBeginInsert(); /* new page */ XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate); XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0)); XLogRegisterBufData(0, (char *) newtup, newsz); /* revmap page */ XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD); /* old page */ XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(oldpage, recptr); PageSetLSN(newpage, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); } END_CRIT_SECTION(); LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK); LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK); UnlockReleaseBuffer(newbuf); return true; } }
/* * Insert an index tuple into the index relation. The revmap is updated to * mark the range containing the given page as pointing to the inserted entry. * A WAL record is written. * * The buffer, if valid, is first checked for free space to insert the new * entry; if there isn't enough, a new buffer is obtained and pinned. No * buffer lock must be held on entry, no buffer lock is held on exit. * * Return value is the offset number where the tuple was inserted. */ OffsetNumber brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk, BrinTuple *tup, Size itemsz) { Page page; BlockNumber blk; OffsetNumber off; Buffer revmapbuf; ItemPointerData tid; bool extended = false; itemsz = MAXALIGN(itemsz); /* Make sure the revmap is long enough to contain the entry we need */ brinRevmapExtend(revmap, heapBlk); /* * Obtain a locked buffer to insert the new tuple. Note * brin_getinsertbuffer ensures there's enough space in the returned * buffer. */ if (BufferIsValid(*buffer)) { /* * It's possible that another backend (or ourselves!) extended the * revmap over the page we held a pin on, so we cannot assume that * it's still a regular page. */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz) { UnlockReleaseBuffer(*buffer); *buffer = InvalidBuffer; } } if (!BufferIsValid(*buffer)) { *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended); Assert(BufferIsValid(*buffer)); Assert(br_page_get_freespace(BufferGetPage(*buffer)) >= itemsz); } /* Now obtain lock on revmap buffer */ revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk); page = BufferGetPage(*buffer); blk = BufferGetBlockNumber(*buffer); START_CRIT_SECTION(); off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber, false, false); if (off == InvalidOffsetNumber) elog(ERROR, "could not insert new index tuple to page"); MarkBufferDirty(*buffer); BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u", blk, off, heapBlk)); ItemPointerSet(&tid, blk, off); brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid); MarkBufferDirty(revmapbuf); /* XLOG stuff */ if (RelationNeedsWAL(idxrel)) { xl_brin_insert xlrec; XLogRecPtr recptr; uint8 info; info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0); xlrec.heapBlk = heapBlk; xlrec.pagesPerRange = pagesPerRange; xlrec.offnum = off; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinInsert); XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0)); XLogRegisterBufData(0, (char *) tup, itemsz); XLogRegisterBuffer(1, revmapbuf, 0); recptr = XLogInsert(RM_BRIN_ID, info); PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); } END_CRIT_SECTION(); /* Tuple is firmly on buffer; we can release our locks */ LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK); if (extended) FreeSpaceMapVacuum(idxrel); return off; }