/* * Flush dirty pages to disk during checkpoint or database shutdown */ void SimpleLruFlush(SlruCtl ctl, bool checkpoint) { SlruShared shared = ctl->shared; SlruFlushData fdata; int slotno; int pageno = 0; int i; bool ok; /* * Find and write dirty pages */ fdata.num_files = 0; LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); for (slotno = 0; slotno < shared->num_slots; slotno++) { SlruInternalWritePage(ctl, slotno, &fdata); /* * When called during a checkpoint, we cannot assert that the slot is * clean now, since another process might have re-dirtied it already. * That's okay. */ Assert(checkpoint || shared->page_status[slotno] == SLRU_PAGE_EMPTY || (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno])); } LWLockRelease(shared->ControlLock); /* * Now fsync and close any files that were open */ ok = true; for (i = 0; i < fdata.num_files; i++) { if (ctl->do_fsync && pg_fsync(fdata.fd[i])) { slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT; ok = false; } if (close(fdata.fd[i])) { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT; ok = false; } } if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); }
/* * Return whether the given page exists on disk. * * A false return means that either the file does not exist, or that it's not * large enough to contain the given page. */ bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) { int segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; int fd; bool result; off_t endpos; SlruFileName(ctl, path, segno); fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) { /* expected: file doesn't exist */ if (errno == ENOENT) return false; /* report error normally */ slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; SlruReportIOError(ctl, pageno, 0); } if ((endpos = lseek(fd, 0, SEEK_END)) < 0) { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; SlruReportIOError(ctl, pageno, 0); } result = endpos >= (off_t) (offset + BLCKSZ); CloseTransientFile(fd); return result; }
/* * Write a page from a shared buffer, if necessary. * Does nothing if the specified slot is not dirty. * * NOTE: only one write attempt is made here. Hence, it is possible that * the page is still dirty at exit (if someone else re-dirtied it during * the write). However, we *do* attempt a fresh write even if the page * is already being written; this is for checkpoints. * * Control lock must be held at entry, and will be held at exit. */ static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) { SlruShared shared = ctl->shared; int pageno = shared->page_number[slotno]; bool ok; /* If a write is in progress, wait for it to finish */ while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS && shared->page_number[slotno] == pageno) { SimpleLruWaitIO(ctl, slotno); } /* * Do nothing if page is not dirty, or if buffer no longer contains the * same page we were called for. */ if (!shared->page_dirty[slotno] || shared->page_status[slotno] != SLRU_PAGE_VALID || shared->page_number[slotno] != pageno) return; /* * Mark the slot write-busy, and clear the dirtybit. After this point, a * transaction status update on this page will mark it dirty again. */ shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS; shared->page_dirty[slotno] = false; /* Acquire per-buffer lock (cannot deadlock, see notes at top) */ LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* Release control lock while doing I/O */ LWLockRelease(shared->ControlLock); /* Do the write */ ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata); /* If we failed, and we're in a flush, better close the files */ if (!ok && fdata) { int i; for (i = 0; i < fdata->num_files; i++) close(fdata->fd[i]); } /* Re-acquire control lock and update page state */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); Assert(shared->page_number[slotno] == pageno && shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS); /* If we failed to write, mark the page dirty again */ if (!ok) shared->page_dirty[slotno] = true; shared->page_status[slotno] = SLRU_PAGE_VALID; LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); }
/* * Find a page in a shared buffer, reading it in if necessary. * The page number must correspond to an already-initialized page. * * If write_ok is true then it is OK to return a page that is in * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure * that modification of the page is safe. If write_ok is false then we * will not return the page until it is not undergoing active I/O. * * The passed-in xid is used only for error reporting, and may be * InvalidTransactionId if no specific xid is associated with the action. * * Return value is the shared-buffer slot number now holding the page. * The buffer's LRU access info is updated. * * Control lock must be held at entry, and will be held at exit. */ int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid) { SlruShared shared = ctl->shared; /* Outer loop handles restart if we must wait for someone else's I/O */ for (;;) { int slotno; bool ok; /* See if page already is in memory; if not, pick victim slot */ slotno = SlruSelectLRUPage(ctl, pageno); /* Did we find the page in memory? */ if (shared->page_number[slotno] == pageno && shared->page_status[slotno] != SLRU_PAGE_EMPTY) { /* * If page is still being read in, we must wait for I/O. Likewise * if the page is being written and the caller said that's not OK. */ if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS || (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS && !write_ok)) { SimpleLruWaitIO(ctl, slotno); /* Now we must recheck state from the top */ continue; } /* Otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); return slotno; } /* We found no match; assert we selected a freeable slot */ Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY || (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno])); /* Mark the slot read-busy */ shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS; shared->page_dirty[slotno] = false; /* Acquire per-buffer lock (cannot deadlock, see notes at top) */ LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* Release control lock while doing I/O */ LWLockRelease(shared->ControlLock); /* Do the read */ ok = SlruPhysicalReadPage(ctl, pageno, slotno); /* Set the LSNs for this newly read-in page to zero */ SimpleLruZeroLSNs(ctl, slotno); /* Re-acquire control lock and update page state */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); Assert(shared->page_number[slotno] == pageno && shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS && !shared->page_dirty[slotno]); shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY; LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, xid); SlruRecentlyUsed(shared, slotno); return slotno; } }
/* * Flush dirty pages to disk during checkpoint or database shutdown */ void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied) { SlruShared shared = ctl->shared; SlruFlushData fdata; int slotno; int pageno = 0; int i; bool ok; /* * Find and write dirty pages */ fdata.num_files = 0; LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); for (slotno = 0; slotno < shared->num_slots; slotno++) { SlruInternalWritePage(ctl, slotno, &fdata); /* * In some places (e.g. checkpoints), we cannot assert that the slot * is clean now, since another process might have re-dirtied it * already. That's okay. */ Assert(allow_redirtied || shared->page_status[slotno] == SLRU_PAGE_EMPTY || (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno])); } LWLockRelease(shared->ControlLock); /* * Now fsync and close any files that were open */ ok = true; for (i = 0; i < fdata.num_files; i++) { pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC); if (ctl->do_fsync && pg_fsync(fdata.fd[i])) { slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT; ok = false; } pgstat_report_wait_end(); if (CloseTransientFile(fdata.fd[i])) { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT; ok = false; } } if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); }
/* * Find a page in a shared buffer, reading it in if necessary. * The page number must correspond to an already-initialized page. * * The passed-in xid is used only for error reporting, and may be * InvalidTransactionId if no specific xid is associated with the action. * * If the passed in pointer to valid is NULL, then log errors can be * generated by this function. If valid is not NULL, then the function * will not generate log errors, but will set the boolean value * pointed to by valid to TRUE if it was able to read the page, * or FALSE if the page read had error. * * Return value is the shared-buffer slot number now holding the page. * The buffer's LRU access info is updated. * * Control lock must be held at entry, and will be held at exit. */ static int SimpleLruReadPage_Internal(SlruCtl ctl, int pageno, TransactionId xid, bool *valid) { SlruShared shared = ctl->shared; /* Outer loop handles restart if we must wait for someone else's I/O */ for (;;) { int slotno; bool ok; /* See if page already is in memory; if not, pick victim slot */ slotno = SlruSelectLRUPage(ctl, pageno); /* Did we find the page in memory? */ if (shared->page_number[slotno] == pageno && shared->page_status[slotno] != SLRU_PAGE_EMPTY) { /* If page is still being read in, we must wait for I/O */ if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS) { SimpleLruWaitIO(ctl, slotno); /* Now we must recheck state from the top */ continue; } /* Otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); if (valid != NULL) *valid = true; return slotno; } /* We found no match; assert we selected a freeable slot */ Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY || (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno])); /* Mark the slot read-busy */ shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS; shared->page_dirty[slotno] = false; /* Acquire per-buffer lock (cannot deadlock, see notes at top) */ LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* * Temporarily mark page as recently-used to discourage * SlruSelectLRUPage from selecting it again for someone else. */ SlruRecentlyUsed(shared, slotno); /* Release control lock while doing I/O */ LWLockRelease(shared->ControlLock); /* Do the read */ ok = SlruPhysicalReadPage(ctl, pageno, slotno); /* Re-acquire control lock and update page state */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); Assert(shared->page_number[slotno] == pageno && shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS && !shared->page_dirty[slotno]); shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY; LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok && valid == NULL) SlruReportIOError(ctl, pageno, xid); else if (valid != NULL) { if (!ok) { LWLockRelease(shared->ControlLock); *valid = false; return -1; } else *valid = true; } SlruRecentlyUsed(shared, slotno); return slotno; } }
/* * Write a page from a shared buffer, if necessary. * Does nothing if the specified slot is not dirty. * * NOTE: only one write attempt is made here. Hence, it is possible that * the page is still dirty at exit (if someone else re-dirtied it during * the write). However, we *do* attempt a fresh write even if the page * is already being written; this is for checkpoints. * * Control lock must be held at entry, and will be held at exit. */ void SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) { SlruShared shared = ctl->shared; int pageno; bool ok; /* Do nothing if page does not need writing */ if (shared->page_status[slotno] != SLRU_PAGE_DIRTY && shared->page_status[slotno] != SLRU_PAGE_WRITE_IN_PROGRESS) return; pageno = shared->page_number[slotno]; /* * We must grab the per-buffer lock to do I/O. To avoid deadlock, must * release ControlLock while waiting for per-buffer lock. Fortunately, * most of the time the per-buffer lock shouldn't be already held, so we * can do this: */ if (!LWLockConditionalAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE)) { LWLockRelease(shared->ControlLock); LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); } /* * Check to see if someone else already did the write, or took the buffer * away from us. If so, do nothing. NOTE: we really should never see * WRITE_IN_PROGRESS here, since that state should only occur while the * writer is holding the buffer lock. But accept it so that we have a * recovery path if a writer aborts. */ if (shared->page_number[slotno] != pageno || (shared->page_status[slotno] != SLRU_PAGE_DIRTY && shared->page_status[slotno] != SLRU_PAGE_WRITE_IN_PROGRESS)) { LWLockRelease(shared->buffer_locks[slotno]); return; } /* * Mark the slot write-busy. After this point, a transaction status * update on this page will mark it dirty again. */ shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS; /* Okay, release the control lock and do the write */ LWLockRelease(shared->ControlLock); ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata); /* If we failed, and we're in a flush, better close the files */ if (!ok && fdata) { int i; for (i = 0; i < fdata->num_files; i++) close(fdata->fd[i]); } /* Re-acquire shared control lock and update page state */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); Assert(shared->page_number[slotno] == pageno && (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS || shared->page_status[slotno] == SLRU_PAGE_DIRTY)); /* Cannot set CLEAN if someone re-dirtied page since write started */ if (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS) shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_DIRTY; LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); }
/* * Find a page in a shared buffer, reading it in if necessary. * The page number must correspond to an already-initialized page. * * The passed-in xid is used only for error reporting, and may be * InvalidTransactionId if no specific xid is associated with the action. * * Return value is the shared-buffer slot number now holding the page. * The buffer's LRU access info is updated. * * Control lock must be held at entry, and will be held at exit. */ int SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid) { SlruShared shared = ctl->shared; /* Outer loop handles restart if we lose the buffer to someone else */ for (;;) { int slotno; bool ok; /* See if page already is in memory; if not, pick victim slot */ slotno = SlruSelectLRUPage(ctl, pageno); /* Did we find the page in memory? */ if (shared->page_number[slotno] == pageno && shared->page_status[slotno] != SLRU_PAGE_EMPTY) { /* If page is still being read in, we cannot use it yet */ if (shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS) { /* otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); return slotno; } } else { /* We found no match; assert we selected a freeable slot */ Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY || shared->page_status[slotno] == SLRU_PAGE_CLEAN); } /* Mark the slot read-busy (no-op if it already was) */ shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS; /* * Temporarily mark page as recently-used to discourage * SlruSelectLRUPage from selecting it again for someone else. */ SlruRecentlyUsed(shared, slotno); /* * We must grab the per-buffer lock to do I/O. To avoid deadlock, * must release ControlLock while waiting for per-buffer lock. * Fortunately, most of the time the per-buffer lock shouldn't be * already held, so we can do this: */ if (!LWLockConditionalAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE)) { LWLockRelease(shared->ControlLock); LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); } /* * Check to see if someone else already did the read, or took the * buffer away from us. If so, restart from the top. */ if (shared->page_number[slotno] != pageno || shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS) { LWLockRelease(shared->buffer_locks[slotno]); continue; } /* Okay, release control lock and do the read */ LWLockRelease(shared->ControlLock); ok = SlruPhysicalReadPage(ctl, pageno, slotno); /* Re-acquire shared control lock and update page state */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); Assert(shared->page_number[slotno] == pageno && shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS); shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_EMPTY; LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, xid); SlruRecentlyUsed(shared, slotno); return slotno; } }