/* * mdimmedsync() -- Immediately sync a relation to stable storage. * * Note that only writes already issued are synced; this routine knows * nothing of dirty buffers that may exist inside the buffer manager. */ bool mdimmedsync(SMgrRelation reln) { MdfdVec *v; BlockNumber curnblk; /* * NOTE: mdnblocks makes sure we have opened all existing segments, so * that fsync loop will get them all! */ curnblk = mdnblocks(reln); if (curnblk == InvalidBlockNumber) return false; /* mdnblocks failed */ v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE while (v != NULL) { if (FileSync(v->mdfd_vfd) < 0) return false; v = v->mdfd_chain; } #else if (FileSync(v->mdfd_vfd) < 0) return false; #endif return true; }
/* * mdwrite() -- Write the supplied block at the appropriate location. * * This is to be used only for updating already-existing blocks of a * relation (ie, those before the current EOF). To extend a relation, * use mdextend(). */ void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool isTemp) { off_t seekpos; int nbytes; MdfdVec *v; /* This assert is too expensive to have on normally ... */ #ifdef CHECK_WRITE_VS_EXTEND Assert(blocknum < mdnblocks(reln, forknum)); #endif TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode); v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek to block %u in file \"%s\": %m", blocknum, FilePathName(v->mdfd_vfd)))); nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode, nbytes, BLCKSZ); if (nbytes != BLCKSZ) { if (nbytes < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write block %u in file \"%s\": %m", blocknum, FilePathName(v->mdfd_vfd)))); /* short write: complain appropriately */ ereport(ERROR, (errcode(ERRCODE_DISK_FULL), errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes", blocknum, FilePathName(v->mdfd_vfd), nbytes, BLCKSZ), errhint("Check free disk space."))); } if (!isTemp) register_dirty_segment(reln, forknum, v); }
/* * mdread() -- Read the specified block from a relation. */ bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { bool status; long seekpos; int nbytes; MdfdVec *v; v = _mdfd_getseg(reln, blocknum, false); #ifndef LET_OS_MANAGE_FILESIZE seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); Assert(seekpos < BLCKSZ * RELSEG_SIZE); #else seekpos = (long) (BLCKSZ * (blocknum)); #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) return false; status = true; if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { /* * If we are at or past EOF, return zeroes without complaining. Also * substitute zeroes if we found a partial block at EOF. * * XXX this is really ugly, bad design. However the current * implementation of hash indexes requires it, because hash index * pages are initialized out-of-order. */ if (nbytes == 0 || (nbytes > 0 && mdnblocks(reln) == blocknum)) MemSet(buffer, 0, BLCKSZ); else status = false; } return status; }
/* * mdimmedsync() -- Immediately sync a relation to stable storage. * * Note that only writes already issued are synced; this routine knows * nothing of dirty buffers that may exist inside the buffer manager. */ void mdimmedsync(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v; BlockNumber curnblk; /* * NOTE: mdnblocks makes sure we have opened all active segments, so that * fsync loop will get them all! */ curnblk = mdnblocks(reln, forknum); v = mdopen(reln, forknum, EXTENSION_FAIL); while (v != NULL) { if (FileSync(v->mdfd_vfd) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", FilePathName(v->mdfd_vfd)))); v = v->mdfd_chain; } }
/* * mdtruncate() -- Truncate relation to specified number of blocks. */ void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, bool isTemp) { MdfdVec *v; BlockNumber curnblk; BlockNumber priorblocks; /* * NOTE: mdnblocks makes sure we have opened all active segments, so that * truncation loop will get them all! */ curnblk = mdnblocks(reln, forknum); if (nblocks > curnblk) { /* Bogus request ... but no complaint if InRecovery */ if (InRecovery) return; ereport(ERROR, (errmsg("could not truncate file \"%s\" to %u blocks: it's only %u blocks now", relpath(reln->smgr_rnode, forknum), nblocks, curnblk))); } if (nblocks == curnblk) return; /* no work */ v = mdopen(reln, forknum, EXTENSION_FAIL); priorblocks = 0; while (v != NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* * This segment is no longer active (and has already been unlinked * from the mdfd_chain). We truncate the file, but do not delete * it, for reasons explained in the header comments. */ if (FileTruncate(v->mdfd_vfd, 0) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", FilePathName(v->mdfd_vfd)))); if (!isTemp) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st * segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { /* * This is the last segment we want to keep. Truncate the file to * the right length, and clear chain link that points to any * remaining segments (which we shall zap). NOTE: if nblocks is * exactly a multiple K of RELSEG_SIZE, we will truncate the K+1st * segment to 0 length but keep it. This adheres to the invariant * given in the header comments. */ BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate file \"%s\" to %u blocks: %m", FilePathName(v->mdfd_vfd), nblocks))); if (!isTemp) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; } else { /* * We still need this segment and 0 or more blocks beyond it, so * nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; } }
/* * mdextend() -- Add a block to the specified relation. * * The semantics are nearly the same as mdwrite(): write at the * specified position. However, this is to be used for the case of * extending a relation (i.e., blocknum is at or beyond the current * EOF). Note that we assume writing a block beyond current EOF * causes intervening file space to become filled with zeroes. */ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool isTemp) { off_t seekpos; int nbytes; MdfdVec *v; /* This assert is too expensive to have on normally ... */ #ifdef CHECK_WRITE_VS_EXTEND Assert(blocknum >= mdnblocks(reln, forknum)); #endif /* * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any * more --- we mustn't create a block whose number actually is * InvalidBlockNumber. */ if (blocknum == InvalidBlockNumber) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("cannot extend file \"%s\" beyond %u blocks", relpath(reln->smgr_rnode, forknum), InvalidBlockNumber))); v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); /* * Note: because caller usually obtained blocknum by calling mdnblocks, * which did a seek(SEEK_END), this seek is often redundant and will be * optimized away by fd.c. It's not redundant, however, if there is a * partial page at the end of the file. In that case we want to try to * overwrite the partial page with a full page. It's also not redundant * if bufmgr.c had to dump another buffer of the same file to make room * for the new page's buffer. */ if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek to block %u in file \"%s\": %m", blocknum, FilePathName(v->mdfd_vfd)))); if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { if (nbytes < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not extend file \"%s\": %m", FilePathName(v->mdfd_vfd)), errhint("Check free disk space."))); /* short write: complain appropriately */ ereport(ERROR, (errcode(ERRCODE_DISK_FULL), errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u", FilePathName(v->mdfd_vfd), nbytes, BLCKSZ, blocknum), errhint("Check free disk space."))); } if (!isTemp) register_dirty_segment(reln, forknum, v); Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); }
/* * mdtruncate() -- Truncate relation to specified number of blocks. * * Returns # of blocks or InvalidBlockNumber on error. */ BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) { MdfdVec *v; BlockNumber curnblk; #ifndef LET_OS_MANAGE_FILESIZE BlockNumber priorblocks; #endif /* * NOTE: mdnblocks makes sure we have opened all existing segments, so * that truncate/delete loop will get them all! */ curnblk = mdnblocks(reln); if (curnblk == InvalidBlockNumber) return InvalidBlockNumber; /* mdnblocks failed */ if (nblocks > curnblk) return InvalidBlockNumber; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; while (v != NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* * This segment is no longer wanted at all (and has already been * unlinked from the mdfd_chain). We truncate the file before * deleting it because if other backends are holding the file * open, the unlink will fail on some platforms. Better a * zero-size file gets left around than a big file... */ FileTruncate(v->mdfd_vfd, 0); FileUnlink(v->mdfd_vfd); v = v->mdfd_chain; Assert(ov != reln->md_fd); /* we never drop the 1st segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { /* * This is the last segment we want to keep. Truncate the file to * the right length, and clear chain link that points to any * remaining segments (which we shall zap). NOTE: if nblocks is * exactly a multiple K of RELSEG_SIZE, we will truncate the K+1st * segment to 0 length but keep it. This is mainly so that the * right thing happens if nblocks==0. */ BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } v = v->mdfd_chain; ov->mdfd_chain = NULL; } else { /* * We still need this segment and 0 or more blocks beyond it, so * nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; } #else if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } #endif return nblocks; }
/* * mdtruncate() -- Truncate relation to specified number of blocks. * * Returns # of blocks or InvalidBlockNumber on error. */ BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) { MdfdVec *v; BlockNumber curnblk; #ifndef LET_OS_MANAGE_FILESIZE BlockNumber priorblocks; #endif /* * NOTE: mdnblocks makes sure we have opened all active segments, so * that truncation loop will get them all! */ curnblk = mdnblocks(reln); if (curnblk == InvalidBlockNumber) return InvalidBlockNumber; /* mdnblocks failed */ if (nblocks > curnblk) return InvalidBlockNumber; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ v = mdopen(reln, false); #ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; while (v != NULL) { MdfdVec *ov = v; if (priorblocks > nblocks) { /* * This segment is no longer active (and has already been * unlinked from the mdfd_chain). We truncate the file, but do * not delete it, for reasons explained in the header comments. */ if (FileTruncate(v->mdfd_vfd, 0) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } v = v->mdfd_chain; Assert(ov != reln->md_fd); /* we never drop the 1st segment */ pfree(ov); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { /* * This is the last segment we want to keep. Truncate the file to * the right length, and clear chain link that points to any * remaining segments (which we shall zap). NOTE: if nblocks is * exactly a multiple K of RELSEG_SIZE, we will truncate the K+1st * segment to 0 length but keep it. This adheres to the invariant * given in the header comments. */ BlockNumber lastsegblocks = nblocks - priorblocks; if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } v = v->mdfd_chain; ov->mdfd_chain = NULL; } else { /* * We still need this segment and 0 or more blocks beyond it, so * nothing to do here. */ v = v->mdfd_chain; } priorblocks += RELSEG_SIZE; } #else if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0) return InvalidBlockNumber; if (!isTemp) { if (!register_dirty_segment(reln, v)) return InvalidBlockNumber; } #endif return nblocks; }