/* * Update the upper levels of the free space map all the way up to the root * to make sure we don't lose track of new blocks we just inserted. This is * intended to be used after adding many new blocks to the relation; we judge * it not worth updating the upper levels of the tree every time data for * a single page changes, but for a bulk-extend it's worth it. */ void UpdateFreeSpaceMap(Relation rel, BlockNumber startBlkNum, BlockNumber endBlkNum, Size freespace) { int new_cat = fsm_space_avail_to_cat(freespace); FSMAddress addr; uint16 slot; BlockNumber blockNum; BlockNumber lastBlkOnPage; blockNum = startBlkNum; while (blockNum <= endBlkNum) { /* * Find FSM address for this block; update tree all the way to the * root. */ addr = fsm_get_location(blockNum, &slot); fsm_update_recursive(rel, addr, new_cat); /* * Get the last block number on this FSM page. If that's greater * than or equal to our endBlkNum, we're done. Otherwise, advance * to the first block on the next page. */ lastBlkOnPage = fsm_get_lastblckno(rel, addr); if (lastBlkOnPage >= endBlkNum) break; blockNum = lastBlkOnPage + 1; } }
/* * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in * WAL replay */ void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, Size spaceAvail) { int new_cat = fsm_space_avail_to_cat(spaceAvail); FSMAddress addr; uint16 slot; BlockNumber blkno; Buffer buf; Page page; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); blkno = fsm_logical_to_physical(addr); /* If the page doesn't exist already, extend */ buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); if (PageIsNew(page)) PageInit(page, BLCKSZ, 0); if (fsm_set_avail(page, slot, new_cat)) MarkBufferDirtyHint(buf, false); UnlockReleaseBuffer(buf); }
/* * FreeSpaceMapTruncateRel - adjust for truncation of a relation. * * The caller must hold AccessExclusiveLock on the relation, to ensure that * other backends receive the smgr invalidation event that this function sends * before they access the FSM again. * * nblocks is the new___ size of the heap. */ void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) { BlockNumber new_nfsmblocks; FSMAddress first_removed_address; uint16 first_removed_slot; Buffer buf; RelationOpenSmgr(rel); /* * If no FSM has been created yet for this relation, there's nothing to * truncate. */ if (!smgrexists(rel->rd_smgr, FSM_FORKNUM)) return; /* Get the location in the FSM of the first removed heap block */ first_removed_address = fsm_get_location(nblocks, &first_removed_slot); /* * Zero out the tail of the last remaining FSM page. If the slot * representing the first removed heap block is at a page boundary, as the * first slot on the FSM page that first_removed_address points to, we can * just truncate that page altogether. */ if (first_removed_slot > 0) { buf = fsm_readbuf(rel, first_removed_address, false); if (!BufferIsValid(buf)) return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); MarkBufferDirtyHint(buf, false); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; } else { new_nfsmblocks = fsm_logical_to_physical(first_removed_address); if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks) return; /* nothing to do; the FSM was already smaller */ } /* Truncate the unused FSM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. * smgrtruncate sent an smgr cache inval message, which will cause other * backends to invalidate their copy of smgr_fsm_nblocks, and this one too * at the next command boundary. But this ensures it isn't outright wrong * until then. */ if (rel->rd_smgr) rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; }
/* * RecordPageWithFreeSpace - update info about a page. * * Note that if the new___ spaceAvail value is higher than the old value stored * in the FSM, the space might not become visible to searchers until the next * FreeSpaceMapVacuum call, which updates the upper level pages. */ void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail) { int new_cat = fsm_space_avail_to_cat(spaceAvail); FSMAddress addr; uint16 slot; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); fsm_set_and_search(rel, addr, slot, new_cat, 0); }
/* * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in * WAL replay */ void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, Size spaceAvail) { int new_cat = fsm_space_avail_to_cat(spaceAvail); FSMAddress addr; uint16 slot; BlockNumber blkno; Buffer buf; Page page; bool write_to_fsm; /* This is meant to mirror the logic in fsm_allow_writes() */ if (heapBlk >= HEAP_FSM_CREATION_THRESHOLD) write_to_fsm = true; else { /* Open the relation at smgr level */ SMgrRelation smgr = smgropen(rnode, InvalidBackendId); if (smgrexists(smgr, FSM_FORKNUM)) write_to_fsm = true; else { BlockNumber heap_nblocks = smgrnblocks(smgr, MAIN_FORKNUM); if (heap_nblocks > HEAP_FSM_CREATION_THRESHOLD) write_to_fsm = true; else write_to_fsm = false; } } if (!write_to_fsm) return; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); blkno = fsm_logical_to_physical(addr); /* If the page doesn't exist already, extend */ buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); if (PageIsNew(page)) PageInit(page, BLCKSZ, 0); if (fsm_set_avail(page, slot, new_cat)) MarkBufferDirtyHint(buf, false); UnlockReleaseBuffer(buf); }
/* * RecordAndGetPageWithFreeSpace - update info about a page and try again. * * We provide this combo form to save some locking overhead, compared to * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's * also some effort to return a page close to the old page; if there's a * page with enough free space on the same FSM page where the old one page * is located, it is preferred. * * For very small heap relations that don't have a FSM, we update the local * map to indicate we have tried a page, and return the next page to try. */ BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded) { int old_cat; int search_cat; FSMAddress addr; uint16 slot; int search_slot; BlockNumber nblocks = InvalidBlockNumber; /* First try the local map, if it exists. */ if (FSM_LOCAL_MAP_EXISTS) { Assert((rel->rd_rel->relkind == RELKIND_RELATION || rel->rd_rel->relkind == RELKIND_TOASTVALUE) && fsm_local_map.map[oldPage] == FSM_LOCAL_AVAIL); fsm_local_map.map[oldPage] = FSM_LOCAL_NOT_AVAIL; return fsm_local_search(); } if (!fsm_allow_writes(rel, oldPage, InvalidBlockNumber, &nblocks)) { /* * If we have neither a local map nor a FSM, we probably just tried * the target block in the smgr relation entry and failed, so we'll * need to create the local map. */ fsm_local_set(rel, nblocks); return fsm_local_search(); } /* Normal FSM logic follows */ old_cat = fsm_space_avail_to_cat(oldSpaceAvail); search_cat = fsm_space_needed_to_cat(spaceNeeded); /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(oldPage, &slot); search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat); /* * If fsm_set_and_search found a suitable new block, return that. * Otherwise, search as usual. */ if (search_slot != -1) return fsm_get_heap_blk(addr, search_slot); else return fsm_search(rel, search_cat); }
/* * GetRecordedFreePage - return the amount of free space on a particular page, * according to the FSM. */ Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk) { FSMAddress addr; uint16 slot; Buffer buf; uint8 cat; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); buf = fsm_readbuf(rel, addr, false); if (!BufferIsValid(buf)) return 0; cat = fsm_get_avail(BufferGetPage(buf), slot); ReleaseBuffer(buf); return fsm_space_cat_to_avail(cat); }
/* * RecordPageWithFreeSpace - update info about a page. * * Note that if the new spaceAvail value is higher than the old value stored * in the FSM, the space might not become visible to searchers until the next * FreeSpaceMapVacuum call, which updates the upper level pages. * * Callers have no need for a local map. */ void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail, BlockNumber nblocks) { int new_cat; FSMAddress addr; uint16 slot; BlockNumber dummy; if (!fsm_allow_writes(rel, heapBlk, nblocks, &dummy)) /* No FSM to update and no local map either */ return; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); new_cat = fsm_space_avail_to_cat(spaceAvail); fsm_set_and_search(rel, addr, slot, new_cat, 0); }
/* * RecordAndGetPageWithFreeSpace - update info about a page and try again. * * We provide this combo form to save some locking overhead, compared to * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's * also some effort to return a page close to the old page; if there's a * page with enough free space on the same FSM page where the old one page * is located, it is preferred. */ BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded) { int old_cat = fsm_space_avail_to_cat(oldSpaceAvail); int search_cat = fsm_space_needed_to_cat(spaceNeeded); FSMAddress addr; uint16 slot; int search_slot; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(oldPage, &slot); search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat); /* * If fsm_set_and_search found a suitable new___ block, return that. * Otherwise, search as usual. */ if (search_slot != -1) return fsm_get_heap_blk(addr, search_slot); else return fsm_search(rel, search_cat); }
/* * Recursive guts of FreeSpaceMapVacuum * * Examine the FSM page indicated by addr, as well as its children, updating * upper-level nodes that cover the heap block range from start to end-1. * (It's okay if end is beyond the actual end of the map.) * Return the maximum freespace value on this page. * * If addr is past the end of the FSM, set *eof_p to true and return 0. * * This traverses the tree in depth-first order. The tree is stored * physically in depth-first order, so this should be pretty I/O efficient. */ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, BlockNumber start, BlockNumber end, bool *eof_p) { Buffer buf; Page page; uint8 max_avail; /* Read the page if it exists, or return EOF */ buf = fsm_readbuf(rel, addr, false); if (!BufferIsValid(buf)) { *eof_p = true; return 0; } else *eof_p = false; page = BufferGetPage(buf); /* * If we're above the bottom level, recurse into children, and fix the * information stored about them at this level. */ if (addr.level > FSM_BOTTOM_LEVEL) { FSMAddress fsm_start, fsm_end; uint16 fsm_start_slot, fsm_end_slot; int slot, start_slot, end_slot; bool eof = false; /* * Compute the range of slots we need to update on this page, given * the requested range of heap blocks to consider. The first slot to * update is the one covering the "start" block, and the last slot is * the one covering "end - 1". (Some of this work will be duplicated * in each recursive call, but it's cheap enough to not worry about.) */ fsm_start = fsm_get_location(start, &fsm_start_slot); fsm_end = fsm_get_location(end - 1, &fsm_end_slot); while (fsm_start.level < addr.level) { fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot); fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot); } Assert(fsm_start.level == addr.level); if (fsm_start.logpageno == addr.logpageno) start_slot = fsm_start_slot; else if (fsm_start.logpageno > addr.logpageno) start_slot = SlotsPerFSMPage; /* shouldn't get here... */ else start_slot = 0; if (fsm_end.logpageno == addr.logpageno) end_slot = fsm_end_slot; else if (fsm_end.logpageno > addr.logpageno) end_slot = SlotsPerFSMPage - 1; else end_slot = -1; /* shouldn't get here... */ for (slot = start_slot; slot <= end_slot; slot++) { int child_avail; CHECK_FOR_INTERRUPTS(); /* After we hit end-of-file, just clear the rest of the slots */ if (!eof) child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), start, end, &eof); else child_avail = 0; /* Update information about the child */ if (fsm_get_avail(page, slot) != child_avail) { LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_set_avail(page, slot, child_avail); MarkBufferDirtyHint(buf, false); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } } /* Now get the maximum value on the page, to return to caller */ max_avail = fsm_get_max_avail(page); /* * Reset the next slot pointer. This encourages the use of low-numbered * pages, increasing the chances that a later vacuum can truncate the * relation. We don't bother with a lock here, nor with marking the page * dirty if it wasn't already, since this is just a hint. */ ((FSMPage) PageGetContents(page))->fp_next_slot = 0; ReleaseBuffer(buf); return max_avail; }
/* * FreeSpaceMapTruncateRel - adjust for truncation of a relation. * * The caller must hold AccessExclusiveLock on the relation, to ensure that * other backends receive the smgr invalidation event that this function sends * before they access the FSM again. * * nblocks is the new size of the heap. */ void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) { BlockNumber new_nfsmblocks; FSMAddress first_removed_address; uint16 first_removed_slot; Buffer buf; RelationOpenSmgr(rel); /* * If no FSM has been created yet for this relation, there's nothing to * truncate. */ if (!smgrexists(rel->rd_smgr, FSM_FORKNUM)) return; /* Get the location in the FSM of the first removed heap block */ first_removed_address = fsm_get_location(nblocks, &first_removed_slot); /* * Zero out the tail of the last remaining FSM page. If the slot * representing the first removed heap block is at a page boundary, as the * first slot on the FSM page that first_removed_address points to, we can * just truncate that page altogether. */ if (first_removed_slot > 0) { buf = fsm_readbuf(rel, first_removed_address, false); if (!BufferIsValid(buf)) return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); /* * Truncation of a relation is WAL-logged at a higher-level, and we * will be called at WAL replay. But if checksums are enabled, we need * to still write a WAL record to protect against a torn page, if the * page is flushed to disk before the truncation WAL record. We cannot * use MarkBufferDirtyHint here, because that will not dirty the page * during recovery. */ MarkBufferDirty(buf); if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded()) log_newpage_buffer(buf, false); END_CRIT_SECTION(); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; } else { new_nfsmblocks = fsm_logical_to_physical(first_removed_address); if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks) return; /* nothing to do; the FSM was already smaller */ } /* Truncate the unused FSM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. * smgrtruncate sent an smgr cache inval message, which will cause other * backends to invalidate their copy of smgr_fsm_nblocks, and this one too * at the next command boundary. But this ensures it isn't outright wrong * until then. */ if (rel->rd_smgr) rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; /* * Update upper-level FSM pages to account for the truncation. This is * important because the just-truncated pages were likely marked as * all-free, and would be preferentially selected. */ FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber); }