/* * Set value in given FSM page and slot. * * If minValue > 0, the updated page is also searched for a page with at * least minValue of free space. If one is found, its slot number is * returned, -1 otherwise. */ static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue) { Buffer buf; Page page; int newslot = -1; buf = fsm_readbuf(rel, addr, true); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); if (fsm_set_avail(page, slot, newValue)) MarkBufferDirtyHint(buf, false); if (minValue != 0) { /* Search while we still hold the lock */ newslot = fsm_search_avail(buf, minValue, addr.level == FSM_BOTTOM_LEVEL, true); } UnlockReleaseBuffer(buf); return newslot; }
/* * FreeSpaceMapTruncateRel - adjust for truncation of a relation. * * The caller must hold AccessExclusiveLock on the relation, to ensure that * other backends receive the smgr invalidation event that this function sends * before they access the FSM again. * * nblocks is the new___ size of the heap. */ void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) { BlockNumber new_nfsmblocks; FSMAddress first_removed_address; uint16 first_removed_slot; Buffer buf; RelationOpenSmgr(rel); /* * If no FSM has been created yet for this relation, there's nothing to * truncate. */ if (!smgrexists(rel->rd_smgr, FSM_FORKNUM)) return; /* Get the location in the FSM of the first removed heap block */ first_removed_address = fsm_get_location(nblocks, &first_removed_slot); /* * Zero out the tail of the last remaining FSM page. If the slot * representing the first removed heap block is at a page boundary, as the * first slot on the FSM page that first_removed_address points to, we can * just truncate that page altogether. */ if (first_removed_slot > 0) { buf = fsm_readbuf(rel, first_removed_address, false); if (!BufferIsValid(buf)) return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); MarkBufferDirtyHint(buf, false); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; } else { new_nfsmblocks = fsm_logical_to_physical(first_removed_address); if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks) return; /* nothing to do; the FSM was already smaller */ } /* Truncate the unused FSM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. * smgrtruncate sent an smgr cache inval message, which will cause other * backends to invalidate their copy of smgr_fsm_nblocks, and this one too * at the next command boundary. But this ensures it isn't outright wrong * until then. */ if (rel->rd_smgr) rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; }
/* * GetRecordedFreePage - return the amount of free space on a particular page, * according to the FSM. */ Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk) { FSMAddress addr; uint16 slot; Buffer buf; uint8 cat; /* Get the location of the FSM byte representing the heap block */ addr = fsm_get_location(heapBlk, &slot); buf = fsm_readbuf(rel, addr, false); if (!BufferIsValid(buf)) return 0; cat = fsm_get_avail(BufferGetPage(buf), slot); ReleaseBuffer(buf); return fsm_space_cat_to_avail(cat); }
/* * Recursive guts of FreeSpaceMapVacuum */ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) { Buffer buf; Page page; uint8 max_avail; /* Read the page if it exists, or return EOF */ buf = fsm_readbuf(rel, addr, false); if (!BufferIsValid(buf)) { *eof_p = true; return 0; } else *eof_p = false; page = BufferGetPage(buf); /* * Recurse into children, and fix the information stored about them at * this level. */ if (addr.level > FSM_BOTTOM_LEVEL) { int slot; bool eof = false; for (slot = 0; slot < SlotsPerFSMPage; slot++) { int child_avail; CHECK_FOR_INTERRUPTS(); /* After we hit end-of-file, just clear the rest of the slots */ if (!eof) child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof); else child_avail = 0; /* Update information about the child */ if (fsm_get_avail(page, slot) != child_avail) { LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_set_avail(BufferGetPage(buf), slot, child_avail); MarkBufferDirtyHint(buf, false); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } } max_avail = fsm_get_max_avail(BufferGetPage(buf)); /* * Reset the next slot pointer. This encourages the use of low-numbered * pages, increasing the chances that a later vacuum can truncate the * relation. */ ((FSMPage) PageGetContents(page))->fp_next_slot = 0; ReleaseBuffer(buf); return max_avail; }
/* * Search the tree for a heap page with at least min_cat of free space */ static BlockNumber fsm_search(Relation rel, uint8 min_cat) { int restarts = 0; FSMAddress addr = FSM_ROOT_ADDRESS; for (;;) { int slot; Buffer buf; uint8 max_avail = 0; /* Read the FSM page. */ buf = fsm_readbuf(rel, addr, false); /* Search within the page */ if (BufferIsValid(buf)) { LockBuffer(buf, BUFFER_LOCK_SHARE); slot = fsm_search_avail(buf, min_cat, (addr.level == FSM_BOTTOM_LEVEL), false); if (slot == -1) max_avail = fsm_get_max_avail(BufferGetPage(buf)); UnlockReleaseBuffer(buf); } else slot = -1; if (slot != -1) { /* * Descend the tree, or return the found block if we're at the * bottom. */ if (addr.level == FSM_BOTTOM_LEVEL) return fsm_get_heap_blk(addr, slot); addr = fsm_get_child(addr, slot); } else if (addr.level == FSM_ROOT_LEVEL) { /* * At the root, failure means there's no page with enough free * space in the FSM. Give up. */ return InvalidBlockNumber; } else { uint16 parentslot; FSMAddress parent; /* * At lower level, failure can happen if the value in the upper- * level node didn't reflect the value on the lower page. Update * the upper node, to avoid falling into the same trap again, and * start over. * * There's a race condition here, if another backend updates this * page right after we release it, and gets the lock on the parent * page before us. We'll then update the parent page with the now * stale information we had. It's OK, because it should happen * rarely, and will be fixed by the next vacuum. */ parent = fsm_get_parent(addr, &parentslot); fsm_set_and_search(rel, parent, parentslot, max_avail, 0); /* * If the upper pages are badly out of date, we might need to loop * quite a few times, updating them as we go. Any inconsistencies * should eventually be corrected and the loop should end. Looping * indefinitely is nevertheless scary, so provide an emergency * valve. */ if (restarts++ > 10000) return InvalidBlockNumber; /* Start search all over from the root */ addr = FSM_ROOT_ADDRESS; } } }
/* * Recursive guts of FreeSpaceMapVacuum * * Examine the FSM page indicated by addr, as well as its children, updating * upper-level nodes that cover the heap block range from start to end-1. * (It's okay if end is beyond the actual end of the map.) * Return the maximum freespace value on this page. * * If addr is past the end of the FSM, set *eof_p to true and return 0. * * This traverses the tree in depth-first order. The tree is stored * physically in depth-first order, so this should be pretty I/O efficient. */ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, BlockNumber start, BlockNumber end, bool *eof_p) { Buffer buf; Page page; uint8 max_avail; /* Read the page if it exists, or return EOF */ buf = fsm_readbuf(rel, addr, false); if (!BufferIsValid(buf)) { *eof_p = true; return 0; } else *eof_p = false; page = BufferGetPage(buf); /* * If we're above the bottom level, recurse into children, and fix the * information stored about them at this level. */ if (addr.level > FSM_BOTTOM_LEVEL) { FSMAddress fsm_start, fsm_end; uint16 fsm_start_slot, fsm_end_slot; int slot, start_slot, end_slot; bool eof = false; /* * Compute the range of slots we need to update on this page, given * the requested range of heap blocks to consider. The first slot to * update is the one covering the "start" block, and the last slot is * the one covering "end - 1". (Some of this work will be duplicated * in each recursive call, but it's cheap enough to not worry about.) */ fsm_start = fsm_get_location(start, &fsm_start_slot); fsm_end = fsm_get_location(end - 1, &fsm_end_slot); while (fsm_start.level < addr.level) { fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot); fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot); } Assert(fsm_start.level == addr.level); if (fsm_start.logpageno == addr.logpageno) start_slot = fsm_start_slot; else if (fsm_start.logpageno > addr.logpageno) start_slot = SlotsPerFSMPage; /* shouldn't get here... */ else start_slot = 0; if (fsm_end.logpageno == addr.logpageno) end_slot = fsm_end_slot; else if (fsm_end.logpageno > addr.logpageno) end_slot = SlotsPerFSMPage - 1; else end_slot = -1; /* shouldn't get here... */ for (slot = start_slot; slot <= end_slot; slot++) { int child_avail; CHECK_FOR_INTERRUPTS(); /* After we hit end-of-file, just clear the rest of the slots */ if (!eof) child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), start, end, &eof); else child_avail = 0; /* Update information about the child */ if (fsm_get_avail(page, slot) != child_avail) { LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_set_avail(page, slot, child_avail); MarkBufferDirtyHint(buf, false); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } } /* Now get the maximum value on the page, to return to caller */ max_avail = fsm_get_max_avail(page); /* * Reset the next slot pointer. This encourages the use of low-numbered * pages, increasing the chances that a later vacuum can truncate the * relation. We don't bother with a lock here, nor with marking the page * dirty if it wasn't already, since this is just a hint. */ ((FSMPage) PageGetContents(page))->fp_next_slot = 0; ReleaseBuffer(buf); return max_avail; }
/* * FreeSpaceMapTruncateRel - adjust for truncation of a relation. * * The caller must hold AccessExclusiveLock on the relation, to ensure that * other backends receive the smgr invalidation event that this function sends * before they access the FSM again. * * nblocks is the new size of the heap. */ void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) { BlockNumber new_nfsmblocks; FSMAddress first_removed_address; uint16 first_removed_slot; Buffer buf; RelationOpenSmgr(rel); /* * If no FSM has been created yet for this relation, there's nothing to * truncate. */ if (!smgrexists(rel->rd_smgr, FSM_FORKNUM)) return; /* Get the location in the FSM of the first removed heap block */ first_removed_address = fsm_get_location(nblocks, &first_removed_slot); /* * Zero out the tail of the last remaining FSM page. If the slot * representing the first removed heap block is at a page boundary, as the * first slot on the FSM page that first_removed_address points to, we can * just truncate that page altogether. */ if (first_removed_slot > 0) { buf = fsm_readbuf(rel, first_removed_address, false); if (!BufferIsValid(buf)) return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); /* * Truncation of a relation is WAL-logged at a higher-level, and we * will be called at WAL replay. But if checksums are enabled, we need * to still write a WAL record to protect against a torn page, if the * page is flushed to disk before the truncation WAL record. We cannot * use MarkBufferDirtyHint here, because that will not dirty the page * during recovery. */ MarkBufferDirty(buf); if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded()) log_newpage_buffer(buf, false); END_CRIT_SECTION(); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; } else { new_nfsmblocks = fsm_logical_to_physical(first_removed_address); if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks) return; /* nothing to do; the FSM was already smaller */ } /* Truncate the unused FSM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. * smgrtruncate sent an smgr cache inval message, which will cause other * backends to invalidate their copy of smgr_fsm_nblocks, and this one too * at the next command boundary. But this ensures it isn't outright wrong * until then. */ if (rel->rd_smgr) rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; /* * Update upper-level FSM pages to account for the truncation. This is * important because the just-truncated pages were likely marked as * all-free, and would be preferentially selected. */ FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber); }