static void PersistentBuild_NonTransactionTruncate(RelFileNode *relFileNode) { SMgrRelation smgrRelation; PersistentFileSysObjName fsObjName; PersistentFileSysObjName_SetRelationFile( &fsObjName, relFileNode, /* segmentFileNum */ 0, is_tablespace_shared); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Non-transaction truncate of '%s'", PersistentFileSysObjName_ObjectName(&fsObjName)); smgrRelation = smgropen(*relFileNode); smgrtruncate( smgrRelation, 0, /* isTemp */ true, /* isLocalBuf */ false, /* persistentTid */ NULL, /* persistentSerialNum */ 0); smgrclose(smgrRelation); }
/* * FreeSpaceMapTruncateRel - adjust for truncation of a relation. * * The caller must hold AccessExclusiveLock on the relation, to ensure that * other backends receive the smgr invalidation event that this function sends * before they access the FSM again. * * nblocks is the new___ size of the heap. */ void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) { BlockNumber new_nfsmblocks; FSMAddress first_removed_address; uint16 first_removed_slot; Buffer buf; RelationOpenSmgr(rel); /* * If no FSM has been created yet for this relation, there's nothing to * truncate. */ if (!smgrexists(rel->rd_smgr, FSM_FORKNUM)) return; /* Get the location in the FSM of the first removed heap block */ first_removed_address = fsm_get_location(nblocks, &first_removed_slot); /* * Zero out the tail of the last remaining FSM page. If the slot * representing the first removed heap block is at a page boundary, as the * first slot on the FSM page that first_removed_address points to, we can * just truncate that page altogether. */ if (first_removed_slot > 0) { buf = fsm_readbuf(rel, first_removed_address, false); if (!BufferIsValid(buf)) return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); MarkBufferDirtyHint(buf, false); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; } else { new_nfsmblocks = fsm_logical_to_physical(first_removed_address); if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks) return; /* nothing to do; the FSM was already smaller */ } /* Truncate the unused FSM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. * smgrtruncate sent an smgr cache inval message, which will cause other * backends to invalidate their copy of smgr_fsm_nblocks, and this one too * at the next command boundary. But this ensures it isn't outright wrong * until then. */ if (rel->rd_smgr) rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; }
void smgr_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; /* Backup blocks are not used in smgr records */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); if (info == XLOG_SMGR_CREATE) { xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; reln = smgropen(xlrec->rnode); smgrcreate(reln, MAIN_FORKNUM, true); } else if (info == XLOG_SMGR_TRUNCATE) { xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record); SMgrRelation reln; Relation rel; reln = smgropen(xlrec->rnode); /* * Forcibly create relation if it doesn't exist (which suggests that * it was dropped somewhere later in the WAL sequence). As in * XLogOpenRelation, we prefer to recreate the rel and replay the log * as best we can until the drop is seen. */ smgrcreate(reln, MAIN_FORKNUM, true); smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno, false); /* Also tell xlogutils.c about it */ XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); /* Truncate FSM and VM too */ rel = CreateFakeRelcacheEntry(xlrec->rnode); if (smgrexists(reln, FSM_FORKNUM)) FreeSpaceMapTruncateRel(rel, xlrec->blkno); if (smgrexists(reln, VISIBILITYMAP_FORKNUM)) visibilitymap_truncate(rel, xlrec->blkno); FreeFakeRelcacheEntry(rel); } else elog(PANIC, "smgr_redo: unknown op code %u", info); }
/* * visibilitymap_truncate - truncate the visibility map * * The caller must hold AccessExclusiveLock on the relation, to ensure that * other backends receive the smgr invalidation event that this function sends * before they access the VM again. * * nheapblocks is the new size of the heap. */ void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks) { BlockNumber newnblocks; /* last remaining block, byte, and bit */ BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks); uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks); uint8 truncBit = HEAPBLK_TO_MAPBIT(nheapblocks); #ifdef TRACE_VISIBILITYMAP elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks); #endif RelationOpenSmgr(rel); /* * If no visibility map has been created yet for this relation, there's * nothing to truncate. */ if (!smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) return; /* * Unless the new size is exactly at a visibility map page boundary, the * tail bits in the last remaining map page, representing truncated heap * blocks, need to be cleared. This is not only tidy, but also necessary * because we don't get a chance to clear the bits if the heap is extended * again. */ if (truncByte != 0 || truncBit != 0) { Buffer mapBuffer; Page page; char *map; newnblocks = truncBlock + 1; mapBuffer = vm_readbuf(rel, truncBlock, false); if (!BufferIsValid(mapBuffer)) { /* nothing to do, the file was already smaller */ return; } page = BufferGetPage(mapBuffer); map = PageGetContents(page); LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE); /* Clear out the unwanted bytes. */ MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1)); /* * Mask out the unwanted bits of the last remaining byte. * * ((1 << 0) - 1) = 00000000 ((1 << 1) - 1) = 00000001 ... ((1 << 6) - * 1) = 00111111 ((1 << 7) - 1) = 01111111 */ map[truncByte] &= (1 << truncBit) - 1; MarkBufferDirty(mapBuffer); UnlockReleaseBuffer(mapBuffer); } else newnblocks = truncBlock; if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) <= newnblocks) { /* nothing to do, the file was already smaller than requested size */ return; } /* Truncate the unused VM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks); /* * We might as well update the local smgr_vm_nblocks setting. smgrtruncate * sent an smgr cache inval message, which will cause other backends to * invalidate their copy of smgr_vm_nblocks, and this one too at the next * command boundary. But this ensures it isn't outright wrong until then. */ if (rel->rd_smgr) rel->rd_smgr->smgr_vm_nblocks = newnblocks; }
static int FileRepPrimary_ResyncWrite(FileRepResyncHashEntry_s *entry) { int status = STATUS_OK; Page page; Buffer buf; BlockNumber numBlocks; BlockNumber blkno; SMgrRelation smgr_relation; char relidstr[OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1]; XLogRecPtr loc; int count = 0; int thresholdCount = 0; bool mirrorDataLossOccurred = FALSE; switch (entry->relStorageMgr) { case PersistentFileSysRelStorageMgr_BufferPool: switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_BufferPoolScanIncremental: case MirroredRelDataSynchronizationState_FullCopy: smgr_relation = smgropen(entry->relFileNode); numBlocks = smgrnblocks(smgr_relation); snprintf(relidstr, sizeof(relidstr), "%u/%u/%u", smgr_relation->smgr_rnode.spcNode, smgr_relation->smgr_rnode.dbNode, smgr_relation->smgr_rnode.relNode); if (Debug_filerep_print) elog(LOG, "resync buffer pool relation '%s' number of blocks '%d' ", relidstr, numBlocks); thresholdCount = Min(numBlocks, 1024); /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = numBlocks - entry->mirrorBufpoolResyncCkptBlockNum; } for (blkno = entry->mirrorBufpoolResyncCkptBlockNum; blkno < numBlocks; blkno++) { XLogRecPtr endResyncLSN = (isFullResync() ? FileRepResync_GetEndFullResyncLSN() : FileRepResync_GetEndIncrResyncLSN()); #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorkerRead, DDLNotSpecified, "", //databaseName ""); // tableName #endif FileRepResync_SetReadBufferRequest(); buf = ReadBuffer_Resync(smgr_relation, blkno, relidstr); FileRepResync_ResetReadBufferRequest(); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); loc = PageGetLSN(page); if (Debug_filerep_print) { elog(LOG, "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' " "lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); } else { char tmpBuf[FILEREP_MAX_LOG_DESCRIPTION_LEN]; snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' num blocks '%d' blkno '%d' lsn begin change tracking '%s(%u/%u)' ", relidstr, numBlocks, blkno, XLogLocationToString(&entry->mirrorBufpoolResyncCkptLoc), entry->mirrorBufpoolResyncCkptLoc.xlogid, entry->mirrorBufpoolResyncCkptLoc.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); snprintf(tmpBuf, sizeof(tmpBuf), "full resync buffer pool identifier '%s' lsn page '%s(%u/%u)' lsn end change tracking '%s(%u/%u)' ", relidstr, XLogLocationToString(&loc), loc.xlogid, loc.xrecoff, XLogLocationToString(&endResyncLSN), endResyncLSN.xlogid, endResyncLSN.xrecoff); FileRep_InsertConfigLogEntry(tmpBuf); } if (XLByteLE(PageGetLSN(page), endResyncLSN) && XLByteLE(entry->mirrorBufpoolResyncCkptLoc, PageGetLSN(page))) { smgrwrite(smgr_relation, blkno, (char *)BufferGetBlock(buf), FALSE); } #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( FileRepResyncWorker, DDLNotSpecified, "", // databaseName ""); // tableName #endif UnlockReleaseBuffer(buf); if (count > thresholdCount) { count = 0; FileRepSubProcess_ProcessSignals(); if (! (FileRepSubProcess_GetState() == FileRepStateReady && dataState == DataStateInResync)) { mirrorDataLossOccurred = TRUE; break; } } else count++; } if (mirrorDataLossOccurred) break; if (entry->mirrorDataSynchronizationState != MirroredRelDataSynchronizationState_FullCopy) { LockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); numBlocks = smgrnblocks(smgr_relation); smgrtruncate(smgr_relation, numBlocks, TRUE /* isTemp, TRUE means to not record in XLOG */, FALSE /* isLocalBuf */, &entry->persistentTid, entry->persistentSerialNum); UnlockRelationForResyncExtension(&smgr_relation->smgr_rnode, ExclusiveLock); } smgrimmedsync(smgr_relation); smgrclose(smgr_relation); smgr_relation = NULL; break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; case PersistentFileSysRelStorageMgr_AppendOnly: { MirroredAppendOnlyOpen mirroredOpen; int primaryError; bool mirrorDataLossOccurred; char *buffer = NULL; int64 endOffset = entry->mirrorAppendOnlyNewEof; int64 startOffset = entry->mirrorAppendOnlyLossEof; int32 bufferLen = 0; int retval = 0; switch (entry->mirrorDataSynchronizationState) { case MirroredRelDataSynchronizationState_AppendOnlyCatchup: case MirroredRelDataSynchronizationState_FullCopy: /* * required in order to report how many blocks were synchronized * if gp_persistent_relation_node does not return that information */ if (entry->mirrorBufpoolResyncChangedPageCount == 0) { entry->mirrorBufpoolResyncChangedPageCount = (endOffset - startOffset) / BLCKSZ; } /* * The MirroredAppendOnly_OpenResynchonize routine knows we are a resynch worker and * will open BOTH, but write only the MIRROR!!! */ MirroredAppendOnly_OpenResynchonize( &mirroredOpen, &entry->relFileNode, entry->segmentFileNum, startOffset, &primaryError, &mirrorDataLossOccurred); if (primaryError != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file %u/%u/%u.%u : %s", entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum, strerror(primaryError)))); break; } if (mirrorDataLossOccurred) break; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); buffer = (char*) palloc(bufferLen); if (buffer == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), (errmsg("not enough memory for resynchronization")))); MemSet(buffer, 0, bufferLen); while (startOffset < endOffset) { retval = MirroredAppendOnly_Read( &mirroredOpen, buffer, bufferLen); if (retval != bufferLen) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from position:" INT64_FORMAT " in file %u/%u/%u.%u : %m", startOffset, entry->relFileNode.dbNode, entry->relFileNode.spcNode, entry->relFileNode.relNode, entry->segmentFileNum))); break; } MirroredAppendOnly_Append( &mirroredOpen, buffer, bufferLen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // No primary writes as resync worker. startOffset += bufferLen; /* AO and CO Data Store writes 64k size by default */ bufferLen = (Size) Min(2*BLCKSZ, endOffset - startOffset); } if (buffer) { pfree(buffer); buffer = NULL; } if (mirrorDataLossOccurred) break; /* Flush written data on Mirror */ MirroredAppendOnly_Flush( &mirroredOpen, &primaryError, &mirrorDataLossOccurred); if (mirrorDataLossOccurred) break; Assert(primaryError == 0); // Not flushed on primary as resync worker. /* Close Primary and Mirror */ MirroredAppendOnly_Close( &mirroredOpen, &mirrorDataLossOccurred); break; case MirroredRelDataSynchronizationState_None: case MirroredRelDataSynchronizationState_DataSynchronized: break; default: ereport(LOG, (errmsg("could not resynchronize relation '%u/%u/%u' " "mirror synchronization state:'%s(%d)' ", entry->relFileNode.relNode, entry->relFileNode.spcNode, entry->relFileNode.dbNode, MirroredRelDataSynchronizationState_Name(entry->mirrorDataSynchronizationState), entry->mirrorDataSynchronizationState))); break; } break; } //case default: Assert(0); break; } //switch if (mirrorDataLossOccurred) status = STATUS_ERROR; return status; }
/* * RelationTruncate * Physically truncate a relation to the specified number of blocks. * * This includes getting rid of any buffers for the blocks that are to be * dropped. */ void RelationTruncate(Relation rel, BlockNumber nblocks) { bool fsm; bool vm; /* Open it at the smgr level if not already done */ RelationOpenSmgr(rel); /* * Make sure smgr_targblock etc aren't pointing somewhere past new end */ rel->rd_smgr->smgr_targblock = InvalidBlockNumber; rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber; rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber; /* Truncate the FSM first if it exists */ fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM); if (fsm) FreeSpaceMapTruncateRel(rel, nblocks); /* Truncate the visibility map too if it exists. */ vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM); if (vm) visibilitymap_truncate(rel, nblocks); /* * We WAL-log the truncation before actually truncating, which means * trouble if the truncation fails. If we then crash, the WAL replay * likely isn't going to succeed in the truncation either, and cause a * PANIC. It's tempting to put a critical section here, but that cure * would be worse than the disease. It would turn a usually harmless * failure to truncate, that might spell trouble at WAL replay, into a * certain PANIC. */ if (!rel->rd_istemp) { /* * Make an XLOG entry reporting the file truncation. */ XLogRecPtr lsn; XLogRecData rdata; xl_smgr_truncate xlrec; xlrec.blkno = nblocks; xlrec.rnode = rel->rd_node; rdata.data = (char *) &xlrec; rdata.len = sizeof(xlrec); rdata.buffer = InvalidBuffer; rdata.next = NULL; lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata); /* * Flush, because otherwise the truncation of the main relation might * hit the disk before the WAL record, and the truncation of the FSM * or visibility map. If we crashed during that window, we'd be left * with a truncated heap, but the FSM or visibility map would still * contain entries for the non-existent heap pages. */ if (fsm || vm) XLogFlush(lsn); } /* Do the real work */ smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks, rel->rd_istemp); }
/* * FreeSpaceMapTruncateRel - adjust for truncation of a relation. * * The caller must hold AccessExclusiveLock on the relation, to ensure that * other backends receive the smgr invalidation event that this function sends * before they access the FSM again. * * nblocks is the new size of the heap. */ void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) { BlockNumber new_nfsmblocks; FSMAddress first_removed_address; uint16 first_removed_slot; Buffer buf; RelationOpenSmgr(rel); /* * If no FSM has been created yet for this relation, there's nothing to * truncate. */ if (!smgrexists(rel->rd_smgr, FSM_FORKNUM)) return; /* Get the location in the FSM of the first removed heap block */ first_removed_address = fsm_get_location(nblocks, &first_removed_slot); /* * Zero out the tail of the last remaining FSM page. If the slot * representing the first removed heap block is at a page boundary, as the * first slot on the FSM page that first_removed_address points to, we can * just truncate that page altogether. */ if (first_removed_slot > 0) { buf = fsm_readbuf(rel, first_removed_address, false); if (!BufferIsValid(buf)) return; /* nothing to do; the FSM was already smaller */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); /* * Truncation of a relation is WAL-logged at a higher-level, and we * will be called at WAL replay. But if checksums are enabled, we need * to still write a WAL record to protect against a torn page, if the * page is flushed to disk before the truncation WAL record. We cannot * use MarkBufferDirtyHint here, because that will not dirty the page * during recovery. */ MarkBufferDirty(buf); if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded()) log_newpage_buffer(buf, false); END_CRIT_SECTION(); UnlockReleaseBuffer(buf); new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1; } else { new_nfsmblocks = fsm_logical_to_physical(first_removed_address); if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks) return; /* nothing to do; the FSM was already smaller */ } /* Truncate the unused FSM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. * smgrtruncate sent an smgr cache inval message, which will cause other * backends to invalidate their copy of smgr_fsm_nblocks, and this one too * at the next command boundary. But this ensures it isn't outright wrong * until then. */ if (rel->rd_smgr) rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; /* * Update upper-level FSM pages to account for the truncation. This is * important because the just-truncated pages were likely marked as * all-free, and would be preferentially selected. */ FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber); }
void smgr_redo(XLogRecPtr lsn, XLogRecord *record) { uint8 info = record->xl_info & ~XLR_INFO_MASK; /* Backup blocks are not used in smgr records */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); if (info == XLOG_SMGR_CREATE) { xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); SMgrRelation reln; reln = smgropen(xlrec->rnode, InvalidBackendId); smgrcreate(reln, xlrec->forkNum, true); } else if (info == XLOG_SMGR_TRUNCATE) { xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record); SMgrRelation reln; Relation rel; reln = smgropen(xlrec->rnode, InvalidBackendId); /* * Forcibly create relation if it doesn't exist (which suggests that * it was dropped somewhere later in the WAL sequence). As in * XLogReadBuffer, we prefer to recreate the rel and replay the log as * best we can until the drop is seen. */ smgrcreate(reln, MAIN_FORKNUM, true); /* * Before we perform the truncation, update minimum recovery point * to cover this WAL record. Once the relation is truncated, there's * no going back. The buffer manager enforces the WAL-first rule * for normal updates to relation files, so that the minimum recovery * point is always updated before the corresponding change in the * data file is flushed to disk. We have to do the same manually * here. * * Doing this before the truncation means that if the truncation fails * for some reason, you cannot start up the system even after restart, * until you fix the underlying situation so that the truncation will * succeed. Alternatively, we could update the minimum recovery point * after truncation, but that would leave a small window where the * WAL-first rule could be violated. */ XLogFlush(lsn); smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno); /* Also tell xlogutils.c about it */ XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno); /* Truncate FSM and VM too */ rel = CreateFakeRelcacheEntry(xlrec->rnode); if (smgrexists(reln, FSM_FORKNUM)) FreeSpaceMapTruncateRel(rel, xlrec->blkno); if (smgrexists(reln, VISIBILITYMAP_FORKNUM)) visibilitymap_truncate(rel, xlrec->blkno); FreeFakeRelcacheEntry(rel); } else elog(PANIC, "smgr_redo: unknown op code %u", info); }
/* * visibilitymap_test - truncate the visibility map */ void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks) { BlockNumber newnblocks; /* last remaining block, byte, and bit */ BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks); uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks); uint8 truncBit = HEAPBLK_TO_MAPBIT(nheapblocks); #ifdef TRACE_VISIBILITYMAP elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks); #endif /* * If no visibility map has been created yet for this relation, there's * nothing to truncate. */ if (!smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) return; /* * Unless the new size is exactly at a visibility map page boundary, the * tail bits in the last remaining map page, representing truncated heap * blocks, need to be cleared. This is not only tidy, but also necessary * because we don't get a chance to clear the bits if the heap is extended * again. */ if (truncByte != 0 || truncBit != 0) { Buffer mapBuffer; Page page; char *map; newnblocks = truncBlock + 1; mapBuffer = vm_readbuf(rel, truncBlock, false); if (!BufferIsValid(mapBuffer)) { /* nothing to do, the file was already smaller */ return; } page = BufferGetPage(mapBuffer); map = PageGetContents(page); LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE); /* Clear out the unwanted bytes. */ MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1)); /* * Mask out the unwanted bits of the last remaining byte. * * ((1 << 0) - 1) = 00000000 ((1 << 1) - 1) = 00000001 ... ((1 << 6) - * 1) = 00111111 ((1 << 7) - 1) = 01111111 */ map[truncByte] &= (1 << truncBit) - 1; MarkBufferDirty(mapBuffer); UnlockReleaseBuffer(mapBuffer); } else newnblocks = truncBlock; if (smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM) < newnblocks) { /* nothing to do, the file was already smaller than requested size */ return; } smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks, rel->rd_istemp); /* * Need to invalidate the relcache entry, because rd_vm_nblocks seen by * other backends is no longer valid. */ if (!InRecovery) CacheInvalidateRelcache(rel); rel->rd_vm_nblocks = newnblocks; }
/* * reindex_index - This routine is used to recreate a single index */ void reindex_index(Oid indexId) { Relation iRel, heapRelation; IndexInfo *indexInfo; Oid heapId; bool inplace; /* * Open our index relation and get an exclusive lock on it. * * Note: for REINDEX INDEX, doing this before opening the parent heap * relation means there's a possibility for deadlock failure against * another xact that is doing normal accesses to the heap and index. * However, it's not real clear why you'd be wanting to do REINDEX INDEX * on a table that's in active use, so I'd rather have the protection of * making sure the index is locked down. In the REINDEX TABLE and * REINDEX DATABASE cases, there is no problem because caller already * holds exclusive lock on the parent table. */ iRel = index_open(indexId); LockRelation(iRel, AccessExclusiveLock); /* Get OID of index's parent table */ heapId = iRel->rd_index->indrelid; /* Open and lock the parent heap relation */ heapRelation = heap_open(heapId, AccessExclusiveLock); SetReindexProcessing(heapId, indexId); /* * If it's a shared index, we must do inplace processing (because we * have no way to update relfilenode in other databases). Otherwise * we can do it the normal transaction-safe way. * * Since inplace processing isn't crash-safe, we only allow it in a * standalone backend. (In the REINDEX TABLE and REINDEX DATABASE cases, * the caller should have detected this.) */ inplace = iRel->rd_rel->relisshared; if (inplace && IsUnderPostmaster) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("shared index \"%s\" can only be reindexed in stand-alone mode", RelationGetRelationName(iRel)))); /* Fetch info needed for index_build */ indexInfo = BuildIndexInfo(iRel); if (inplace) { /* * Release any buffers associated with this index. If they're * dirty, they're just dropped without bothering to flush to disk. */ DropRelationBuffers(iRel); /* Now truncate the actual data and set blocks to zero */ smgrtruncate(DEFAULT_SMGR, iRel, 0); iRel->rd_nblocks = 0; iRel->rd_targblock = InvalidBlockNumber; } else { /* * We'll build a new physical relation for the index. */ setNewRelfilenode(iRel); } /* Initialize the index and rebuild */ index_build(heapRelation, iRel, indexInfo); /* * index_build will close both the heap and index relations (but not * give up the locks we hold on them). So we're done. */ SetReindexProcessing(InvalidOid, InvalidOid); }