/* * mdimmedsync() -- Immediately sync a relation to stable storage. * * Note that only writes already issued are synced; this routine knows * nothing of dirty buffers that may exist inside the buffer manager. */ bool mdimmedsync(SMgrRelation reln) { MdfdVec *v; BlockNumber curnblk; /* * NOTE: mdnblocks makes sure we have opened all existing segments, so * that fsync loop will get them all! */ curnblk = mdnblocks(reln); if (curnblk == InvalidBlockNumber) return false; /* mdnblocks failed */ v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE while (v != NULL) { if (FileSync(v->mdfd_vfd) < 0) return false; v = v->mdfd_chain; } #else if (FileSync(v->mdfd_vfd) < 0) return false; #endif return true; }
/* * _mdfd_getseg() -- Find the segment of the relation holding the * specified block. ereport's on failure. * (Optionally, can return NULL instead of ereport for ENOENT.) */ static MdfdVec * _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound) { MdfdVec *v = mdopen(reln, allowNotFound); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber segstogo; BlockNumber nextsegno; if (!v) return NULL; /* only possible if allowNotFound */ for (segstogo = blkno / ((BlockNumber) RELSEG_SIZE), nextsegno = 1; segstogo > 0; nextsegno++, segstogo--) { if (v->mdfd_chain == NULL) { /* * We will create the next segment only if the target block is * within it. This prevents Sorcerer's Apprentice syndrome if * a bug at higher levels causes us to be handed a * ridiculously large blkno --- otherwise we could create many * thousands of empty segment files before reaching the * "target" block. We should never need to create more than * one new segment per call, so this restriction seems * reasonable. * * BUT: when doing WAL recovery, disable this logic and create * segments unconditionally. In this case it seems better * to assume the given blkno is good (it presumably came from * a CRC-checked WAL record); furthermore this lets us cope * in the case where we are replaying WAL data that has a write * into a high-numbered segment of a relation that was later * deleted. We want to go ahead and create the segments so * we can finish out the replay. */ v->mdfd_chain = _mdfd_openseg(reln, nextsegno, (segstogo == 1 || InRecovery) ? O_CREAT : 0); if (v->mdfd_chain == NULL) { if (allowNotFound && errno == ENOENT) return NULL; ereport(ERROR, (errcode_for_file_access(), errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m", nextsegno, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, blkno))); } } v = v->mdfd_chain; } #endif return v; }
/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all active segments of the relation are opened * and added to the mdfd_chain list. If this routine has not been * called, then only segments up to the last one actually touched * are present in the chain. * * Returns # of blocks, or InvalidBlockNumber on error. */ BlockNumber mdnblocks(SMgrRelation reln) { MdfdVec *v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber nblocks; BlockNumber segno = 0; /* * Skip through any segments that aren't the last one, to avoid redundant * seeks on them. We have previously verified that these segments are * exactly RELSEG_SIZE long, and it's useless to recheck that each time. * * NOTE: this assumption could only be wrong if another backend has * truncated the relation. We rely on higher code levels to handle that * scenario by closing and re-opening the md fd, which is handled via * relcache flush. (Since the bgwriter doesn't participate in relcache * flush, it could have segment chain entries for inactive segments; * that's OK because the bgwriter never needs to compute relation size.) */ while (v->mdfd_chain != NULL) { segno++; v = v->mdfd_chain; } for (;;) { nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks; /* * If segment is exactly RELSEG_SIZE, advance to next one. */ segno++; if (v->mdfd_chain == NULL) { /* * Because we pass O_CREAT, we will create the next segment (with * zero length) immediately, if the last segment is of length * RELSEG_SIZE. While perhaps not strictly necessary, this keeps * the logic simple. */ v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); if (v->mdfd_chain == NULL) return InvalidBlockNumber; /* failed? */ } v = v->mdfd_chain; } #else return _mdnblocks(v->mdfd_vfd, BLCKSZ); #endif }
/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all active segments of the relation are opened * and added to the mdfd_chain list. If this routine has not been * called, then only segments up to the last one actually touched * are present in the chain. */ BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL); BlockNumber nblocks; BlockNumber segno = 0; /* * Skip through any segments that aren't the last one, to avoid redundant * seeks on them. We have previously verified that these segments are * exactly RELSEG_SIZE long, and it's useless to recheck that each time. * * NOTE: this assumption could only be wrong if another backend has * truncated the relation. We rely on higher code levels to handle that * scenario by closing and re-opening the md fd, which is handled via * relcache flush. (Since the bgwriter doesn't participate in relcache * flush, it could have segment chain entries for inactive segments; * that's OK because the bgwriter never needs to compute relation size.) */ while (v->mdfd_chain != NULL) { segno++; v = v->mdfd_chain; } for (;;) { nblocks = _mdnblocks(reln, forknum, v); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks; /* * If segment is exactly RELSEG_SIZE, advance to next one. */ segno++; if (v->mdfd_chain == NULL) { /* * Because we pass O_CREAT, we will create the next segment (with * zero length) immediately, if the last segment is of length * RELSEG_SIZE. While perhaps not strictly necessary, this keeps * the logic simple. */ v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT); if (v->mdfd_chain == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", _mdfd_segpath(reln, forknum, segno)))); } v = v->mdfd_chain; } }
/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all segments of the relation are opened * and added to the mdfd_chain list. If this routine has not been * called, then only segments up to the last one actually touched * are present in the chain... * * Returns # of blocks, or InvalidBlockNumber on error. */ BlockNumber mdnblocks(SMgrRelation reln) { MdfdVec *v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber nblocks; BlockNumber segno = 0; /* * Skip through any segments that aren't the last one, to avoid * redundant seeks on them. We have previously verified that these * segments are exactly RELSEG_SIZE long, and it's useless to recheck * that each time. (NOTE: this assumption could only be wrong if * another backend has truncated the relation. We rely on higher code * levels to handle that scenario by closing and re-opening the md * fd.) */ while (v->mdfd_chain != NULL) { segno++; v = v->mdfd_chain; } for (;;) { nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks; /* * If segment is exactly RELSEG_SIZE, advance to next one. */ segno++; if (v->mdfd_chain == NULL) { /* * Because we pass O_CREAT, we will create the next segment * (with zero length) immediately, if the last segment is of * length REL_SEGSIZE. This is unnecessary but harmless, and * testing for the case would take more cycles than it seems * worth. */ v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); if (v->mdfd_chain == NULL) return InvalidBlockNumber; /* failed? */ } v = v->mdfd_chain; } #else return _mdnblocks(v->mdfd_vfd, BLCKSZ); #endif }
/* * mdexists() -- Does the physical file exist? * * Note: this will return true for lingering files, with pending deletions */ bool mdexists(SMgrRelation reln, ForkNumber forkNum) { /* * Close it first, to ensure that we notice if the fork has been unlinked * since we opened it. */ mdclose(reln, forkNum); return (mdopen(reln, forkNum, EXTENSION_RETURN_NULL) != NULL); }
/* * mdimmedsync() -- Immediately sync a relation to stable storage. * * Note that only writes already issued are synced; this routine knows * nothing of dirty buffers that may exist inside the buffer manager. */ void mdimmedsync(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v; BlockNumber curnblk; /* * NOTE: mdnblocks makes sure we have opened all active segments, so that * fsync loop will get them all! */ curnblk = mdnblocks(reln, forknum); v = mdopen(reln, forknum, EXTENSION_FAIL); while (v != NULL) { if (FileSync(v->mdfd_vfd) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", FilePathName(v->mdfd_vfd)))); v = v->mdfd_chain; } }
/* * mdtruncate() -- Truncate relation to specified number of blocks. */ void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, bool isTemp) { MdfdVec *v; BlockNumber curnblk; BlockNumber priorblocks; /* * NOTE: mdnblocks makes sure we have opened all active segments, so that * truncation loop will get them all! */ curnblk = mdnblocks(reln, forknum); if (nblocks > curnblk) { /* Bogus request ... but no complaint if InRecovery */ if (InRecovery) return; ereport(ERROR, (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now", relpath(reln->smgr_rnode, forknum), nblocks, curnblk))); } if (nblocks == curnblk) return; /* no work */ v = mdopen(reln, forknum, EXTENSION_FAIL); priorblocks = 0; while (v != NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* * This segment is no longer active (and has already been unlinked * from the mdfd_chain). We truncate the file, but do not delete * it, for reasons explained in the header comments. */ if (FileTruncate(v->mdfd_vfd, 0) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", FilePathName(v->mdfd_vfd)))); if (!isTemp) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st * segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { /* * This is the last segment we want to keep. Truncate the file to * the right length, and clear chain link that points to any * remaining segments (which we shall zap). NOTE: if nblocks is * exactly a multiple K of RELSEG_SIZE, we will truncate the K+1st * segment to 0 length but keep it. This adheres to the invariant * given in the header comments. */ BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate file \"%s\" to %u blocks: %m", FilePathName(v->mdfd_vfd), nblocks))); if (!isTemp) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; } else { /* * We still need this segment and 0 or more blocks beyond it, so * nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; } }
/* * mdtruncate() -- Truncate relation to specified number of blocks. * * Returns # of blocks or InvalidBlockNumber on error. */ BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) { MdfdVec *v; BlockNumber curnblk; #ifndef LET_OS_MANAGE_FILESIZE BlockNumber priorblocks; #endif /* * NOTE: mdnblocks makes sure we have opened all existing segments, so * that truncate/delete loop will get them all! */ curnblk = mdnblocks(reln); if (curnblk == InvalidBlockNumber) return InvalidBlockNumber; /* mdnblocks failed */ if (nblocks > curnblk) return InvalidBlockNumber; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; while (v != NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* * This segment is no longer wanted at all (and has already been * unlinked from the mdfd_chain). We truncate the file before * deleting it because if other backends are holding the file * open, the unlink will fail on some platforms. Better a * zero-size file gets left around than a big file... */ FileTruncate(v->mdfd_vfd, 0); FileUnlink(v->mdfd_vfd); v = v->mdfd_chain; Assert(ov != reln->md_fd); /* we never drop the 1st segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { /* * This is the last segment we want to keep. Truncate the file to * the right length, and clear chain link that points to any * remaining segments (which we shall zap). NOTE: if nblocks is * exactly a multiple K of RELSEG_SIZE, we will truncate the K+1st * segment to 0 length but keep it. This is mainly so that the * right thing happens if nblocks==0. */ BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } v = v->mdfd_chain; ov->mdfd_chain = NULL; } else { /* * We still need this segment and 0 or more blocks beyond it, so * nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; } #else if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } #endif return nblocks; }
/* * mdtruncate() -- Truncate relation to specified number of blocks. * * Returns # of blocks or InvalidBlockNumber on error. */ BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) { MdfdVec *v; BlockNumber curnblk; #ifndef LET_OS_MANAGE_FILESIZE BlockNumber priorblocks; #endif /* * NOTE: mdnblocks makes sure we have opened all active segments, so * that truncation loop will get them all! */ curnblk = mdnblocks(reln); if (curnblk == InvalidBlockNumber) return InvalidBlockNumber; /* mdnblocks failed */ if (nblocks > curnblk) return InvalidBlockNumber; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; while (v != NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* * This segment is no longer active (and has already been * unlinked from the mdfd_chain). We truncate the file, but do * not delete it, for reasons explained in the header comments. */ if (FileTruncate(v->mdfd_vfd, 0) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } v = v->mdfd_chain; Assert(ov != reln->md_fd); /* we never drop the 1st segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { /* * This is the last segment we want to keep. Truncate the file to * the right length, and clear chain link that points to any * remaining segments (which we shall zap). NOTE: if nblocks is * exactly a multiple K of RELSEG_SIZE, we will truncate the K+1st * segment to 0 length but keep it. This adheres to the invariant * given in the header comments. */ BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } v = v->mdfd_chain; ov->mdfd_chain = NULL; } else { /* * We still need this segment and 0 or more blocks beyond it, so * nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; } #else if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } #endif return nblocks; }