/* * ProcWaitForSignal - wait for a signal from another backend. * * As this uses the generic process latch the caller has to be robust against * unrelated wakeups: Always check that the desired state has occurred, and * wait again if not. */ void ProcWaitForSignal(void) { WaitLatch(MyLatch, WL_LATCH_SET, 0); ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); }
/* * hello_main * * Main loop processing. */ void hello_main(Datum main_arg) { /* Set up the sigterm signal before unblocking them */ pqsignal(SIGTERM, hello_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); while (!got_sigterm) { int rc; /* Wait 10s */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 10000L, PG_WAIT_EXTENSION); ResetLatch(&MyProc->procLatch); /* Emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); elog(LOG, "Hello World!"); /* Say Hello to the world */ } proc_exit(0); }
/* * Wait until at least *nbytesp bytes are available to be read from the * shared message queue, or until the buffer wraps around. If the queue is * detached, returns SHM_MQ_DETACHED. If nowait is specified and a wait * would be required, returns SHM_MQ_WOULD_BLOCK. Otherwise, *datap is set * to the location at which data bytes can be read, *nbytesp is set to the * number of bytes which can be read at that address, and the return value * is SHM_MQ_SUCCESS. */ static shm_mq_result shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait, Size *nbytesp, void **datap) { Size ringsize = mq->mq_ring_size; uint64 used; uint64 written; for (;;) { Size offset; bool detached; /* Get bytes written, so we can compute what's available to read. */ written = shm_mq_get_bytes_written(mq, &detached); used = written - mq->mq_bytes_read; Assert(used <= ringsize); offset = mq->mq_bytes_read % (uint64) ringsize; /* If we have enough data or buffer has wrapped, we're done. */ if (used >= bytes_needed || offset + used >= ringsize) { *nbytesp = Min(used, ringsize - offset); *datap = &mq->mq_ring[mq->mq_ring_offset + offset]; return SHM_MQ_SUCCESS; } /* * Fall out before waiting if the queue has been detached. * * Note that we don't check for this until *after* considering * whether the data already available is enough, since the * receiver can finish receiving a message stored in the buffer * even after the sender has detached. */ if (detached) return SHM_MQ_DETACHED; /* Skip manipulation of our latch if nowait = true. */ if (nowait) return SHM_MQ_WOULD_BLOCK; /* * Wait for our latch to be set. It might already be set for * some unrelated reason, but that'll just result in one extra * trip through the loop. It's worth it to avoid resetting the * latch at top of loop, because setting an already-set latch is * much cheaper than setting one that has been reset. */ WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0); /* An interrupt may have occurred while we were waiting. */ CHECK_FOR_INTERRUPTS(); /* Reset the latch so we don't spin. */ ResetLatch(&MyProc->procLatch); } }
static void wait_for_workers_to_become_ready(worker_state *wstate, volatile test_shm_mq_header *hdr) { bool save_set_latch_on_sigusr1; bool result = false; save_set_latch_on_sigusr1 = set_latch_on_sigusr1; set_latch_on_sigusr1 = true; PG_TRY(); { for (;;) { int workers_ready; /* If all the workers are ready, we have succeeded. */ SpinLockAcquire(&hdr->mutex); workers_ready = hdr->workers_ready; SpinLockRelease(&hdr->mutex); if (workers_ready >= wstate->nworkers) { result = true; break; } /* If any workers (or the postmaster) have died, we have failed. */ if (!check_worker_status(wstate)) { result = false; break; } /* Wait to be signalled. */ WaitLatch(MyLatch, WL_LATCH_SET, 0); /* An interrupt may have occurred while we were waiting. */ CHECK_FOR_INTERRUPTS(); /* Reset the latch so we don't spin. */ ResetLatch(MyLatch); } } PG_CATCH(); { set_latch_on_sigusr1 = save_set_latch_on_sigusr1; PG_RE_THROW(); } PG_END_TRY(); if (!result) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("one or more background workers failed to start"))); }
/* * Wait for a background worker to start up and attach to the shmem context. * * This is only needed for cleaning up the shared memory in case the worker * fails to attach. */ static void WaitForReplicationWorkerAttach(LogicalRepWorker *worker, uint16 generation, BackgroundWorkerHandle *handle) { BgwHandleStatus status; int rc; for (;;) { pid_t pid; CHECK_FOR_INTERRUPTS(); LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); /* Worker either died or has started; no need to do anything. */ if (!worker->in_use || worker->proc) { LWLockRelease(LogicalRepWorkerLock); return; } LWLockRelease(LogicalRepWorkerLock); /* Check if worker has died before attaching, and clean up after it. */ status = GetBackgroundWorkerPid(handle, &pid); if (status == BGWH_STOPPED) { LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE); /* Ensure that this was indeed the worker we waited for. */ if (generation == worker->generation) logicalrep_worker_cleanup(worker); LWLockRelease(LogicalRepWorkerLock); return; } /* * We need timeout because we generally don't get notified via latch * about the worker attach. But we don't expect to have to wait long. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 10L, WAIT_EVENT_BGWORKER_STARTUP); if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); } } return; }
/* * Read data from a secure connection. */ ssize_t secure_read(Port *port, void *ptr, size_t len) { ssize_t n; int waitfor; retry: #ifdef USE_SSL waitfor = 0; if (port->ssl_in_use) { n = be_tls_read(port, ptr, len, &waitfor); } else #endif { n = secure_raw_read(port, ptr, len); waitfor = WL_SOCKET_READABLE; } /* In blocking mode, wait until the socket is ready */ if (n < 0 && !port->noblock && (errno == EWOULDBLOCK || errno == EAGAIN)) { int w; Assert(waitfor); w = WaitLatchOrSocket(MyLatch, WL_LATCH_SET | waitfor, port->sock, 0); /* Handle interrupt. */ if (w & WL_LATCH_SET) { ResetLatch(MyLatch); ProcessClientReadInterrupt(true); /* * We'll retry the read. Most likely it will return immediately * because there's still no data available, and we'll wait * for the socket to become ready again. */ } goto retry; } /* * Process interrupts that happened while (or before) receiving. Note that * we signal that we're not blocking, which will prevent some types of * interrupts from being processed. */ ProcessClientReadInterrupt(false); return n; }
/* * pg_sleep - delay for N seconds */ Datum pg_sleep(PG_FUNCTION_ARGS) { float8 secs = PG_GETARG_FLOAT8(0); float8 endtime; /* * We sleep using WaitLatch, to ensure that we'll wake up promptly if an * important signal (such as SIGALRM or SIGINT) arrives. Because * WaitLatch's upper limit of delay is INT_MAX milliseconds, and the user * might ask for more than that, we sleep for at most 10 minutes and then * loop. * * By computing the intended stop time initially, we avoid accumulation of * extra delay across multiple sleeps. This also ensures we won't delay * less than the specified time when WaitLatch is terminated early by a * non-query-canceling signal such as SIGHUP. */ #ifdef HAVE_INT64_TIMESTAMP #define GetNowFloat() ((float8) GetCurrentTimestamp() / 1000000.0) #else #define GetNowFloat() GetCurrentTimestamp() #endif endtime = GetNowFloat() + secs; for (;;) { float8 delay; long delay_ms; CHECK_FOR_INTERRUPTS(); delay = endtime - GetNowFloat(); if (delay >= 600.0) delay_ms = 600000; else if (delay > 0.0) delay_ms = (long) ceil(delay * 1000.0); else break; (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT, delay_ms, WAIT_EVENT_PG_SLEEP); ResetLatch(MyLatch); } PG_RETURN_VOID(); }
static void BufferSaverMain(Datum main_arg) { WorkerCommon(); /* * Main loop: do this until the SIGTERM handler tells us to terminate */ while (!got_sigterm) { int rc; ResetLatch(&MyProc->procLatch); /* * Wait on the process latch, which sleeps as necessary, but is awakened * if postmaster dies. This way the background process goes away * immediately in case of an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 10 * 1000L); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); } } /* * We recieved the SIGTERM; Shutdown is in progress, so save the * shared-buffer contents. */ /* Save the buffers only if the extension is enabled. */ if (guc_enabled) SaveBuffers(); /* * The worker exits here. A proc_exit(0) is not necessary, we'll let the * caller do that. */ }
/* * This is used when a process is waiting for its counterpart to attach to the * queue. We exit when the other process attaches as expected, or, if * handle != NULL, when the referenced background process or the postmaster * dies. Note that if handle == NULL, and the process fails to attach, we'll * potentially get stuck here forever waiting for a process that may never * start. We do check for interrupts, though. * * ptr is a pointer to the memory address that we're expecting to become * non-NULL when our counterpart attaches to the queue. */ static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr, BackgroundWorkerHandle *handle) { bool result = false; for (;;) { BgwHandleStatus status; pid_t pid; bool detached; /* Acquire the lock just long enough to check the pointer. */ SpinLockAcquire(&mq->mq_mutex); detached = mq->mq_detached; result = (*ptr != NULL); SpinLockRelease(&mq->mq_mutex); /* Fail if detached; else succeed if initialized. */ if (detached) { result = false; break; } if (result) break; if (handle != NULL) { /* Check for unexpected worker death. */ status = GetBackgroundWorkerPid(handle, &pid); if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED) { result = false; break; } } /* Wait to be signalled. */ WaitLatch(MyLatch, WL_LATCH_SET, 0); /* An interrupt may have occurred while we were waiting. */ CHECK_FOR_INTERRUPTS(); /* Reset the latch so we don't spin. */ ResetLatch(MyLatch); } return result; }
/* * Prepare to wait on a given condition variable. * * This can optionally be called before entering a test/sleep loop. * Doing so is more efficient if we'll need to sleep at least once. * However, if the first test of the exit condition is likely to succeed, * it's more efficient to omit the ConditionVariablePrepareToSleep call. * See comments in ConditionVariableSleep for more detail. * * Caution: "before entering the loop" means you *must* test the exit * condition between calling ConditionVariablePrepareToSleep and calling * ConditionVariableSleep. If that is inconvenient, omit calling * ConditionVariablePrepareToSleep. */ void ConditionVariablePrepareToSleep(ConditionVariable *cv) { int pgprocno = MyProc->pgprocno; /* * If first time through in this process, create a WaitEventSet, which * we'll reuse for all condition variable sleeps. */ if (cv_wait_event_set == NULL) { WaitEventSet *new_event_set; new_event_set = CreateWaitEventSet(TopMemoryContext, 2); AddWaitEventToSet(new_event_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL); AddWaitEventToSet(new_event_set, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET, NULL, NULL); /* Don't set cv_wait_event_set until we have a correct WES. */ cv_wait_event_set = new_event_set; } /* * If some other sleep is already prepared, cancel it; this is necessary * because we have just one static variable tracking the prepared sleep, * and also only one cvWaitLink in our PGPROC. It's okay to do this * because whenever control does return to the other test-and-sleep loop, * its ConditionVariableSleep call will just re-establish that sleep as * the prepared one. */ if (cv_sleep_target != NULL) ConditionVariableCancelSleep(); /* Record the condition variable on which we will sleep. */ cv_sleep_target = cv; /* * Reset my latch before adding myself to the queue, to ensure that we * don't miss a wakeup that occurs immediately. */ ResetLatch(MyLatch); /* Add myself to the wait queue. */ SpinLockAcquire(&cv->mutex); proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink); SpinLockRelease(&cv->mutex); }
/* * Wait until the apply worker changes the state of our synchronization * worker to the expected one. * * Used when transitioning from SYNCWAIT state to CATCHUP. * * Returns false if the apply worker has disappeared. */ static bool wait_for_worker_state_change(char expected_state) { int rc; for (;;) { LogicalRepWorker *worker; CHECK_FOR_INTERRUPTS(); /* * Done if already in correct state. (We assume this fetch is atomic * enough to not give a misleading answer if we do it with no lock.) */ if (MyLogicalRepWorker->relstate == expected_state) return true; /* * Bail out if the apply worker has died, else signal it we're * waiting. */ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); worker = logicalrep_worker_find(MyLogicalRepWorker->subid, InvalidOid, false); if (worker && worker->proc) logicalrep_worker_wakeup_ptr(worker); LWLockRelease(LogicalRepWorkerLock); if (!worker) break; /* * Wait. We expect to get a latch signal back from the apply worker, * but use a timeout in case it dies without sending one. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 1000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE); if (rc & WL_LATCH_SET) ResetLatch(MyLatch); } return false; }
/* * Wait until the relation synchronization state is set in the catalog to the * expected one. * * Used when transitioning from CATCHUP state to SYNCDONE. * * Returns false if the synchronization worker has disappeared or the table state * has been reset. */ static bool wait_for_relation_state_change(Oid relid, char expected_state) { char state; for (;;) { LogicalRepWorker *worker; XLogRecPtr statelsn; CHECK_FOR_INTERRUPTS(); /* XXX use cache invalidation here to improve performance? */ PushActiveSnapshot(GetLatestSnapshot()); state = GetSubscriptionRelState(MyLogicalRepWorker->subid, relid, &statelsn, true); PopActiveSnapshot(); if (state == SUBREL_STATE_UNKNOWN) return false; if (state == expected_state) return true; /* Check if the sync worker is still running and bail if not. */ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); /* Check if the opposite worker is still running and bail if not. */ worker = logicalrep_worker_find(MyLogicalRepWorker->subid, am_tablesync_worker() ? InvalidOid : relid, false); LWLockRelease(LogicalRepWorkerLock); if (!worker) return false; (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 1000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE); ResetLatch(MyLatch); } return false; }
static void config_log_main(Datum main_arg) { config_log_objects *objects; pqsignal(SIGTERM, config_log_sigterm); pqsignal(SIGHUP, config_log_sighup); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to database */ BackgroundWorkerInitializeConnection(config_log_database, NULL); /* Verify expected objects exist */ objects = initialize_objects(); while (!got_sigterm) { int rc; rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 100000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); execute_pg_settings_logger(objects); } } proc_exit(0); }
/** * Main loop of the sender process. It wakes up every * gp_perfmon_segment_interval ms to send segment * information to perfmon */ static void SegmentInfoSenderLoop(void) { int rc; int counter; for (counter = 0;; counter += SEGMENT_INFO_LOOP_SLEEP_MS) { CHECK_FOR_INTERRUPTS(); if (senderShutdownRequested) { break; } /* no need to live on if postmaster has died */ if (!PostmasterIsAlive()) exit(1); if (cluster_state_collect_hook) cluster_state_collect_hook(); if (gp_enable_gpperfmon && counter >= gp_perfmon_segment_interval) { SegmentInfoSender(); counter = 0; } /* Sleep a while. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, SEGMENT_INFO_LOOP_SLEEP_MS); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); } /* end server loop */ return; }
/* * Wait for the result from a prior asynchronous execution function call. * * This function offers quick responsiveness by checking for any interruptions. * * This function emulates the PQexec()'s behavior of returning the last result * when there are many. * * Caller is responsible for the error handling on the result. */ PGresult * pgfdw_get_result(PGconn *conn, const char *query) { PGresult *last_res = NULL; for (;;) { PGresult *res; while (PQisBusy(conn)) { int wc; /* Sleep until there's something to do */ wc = WaitLatchOrSocket(MyLatch, WL_LATCH_SET | WL_SOCKET_READABLE, PQsocket(conn), -1L, PG_WAIT_EXTENSION); ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); /* Data available in socket */ if (wc & WL_SOCKET_READABLE) { if (!PQconsumeInput(conn)) pgfdw_report_error(ERROR, NULL, conn, false, query); } } res = PQgetResult(conn); if (res == NULL) break; /* query is complete */ PQclear(last_res); last_res = res; } return last_res; }
void hello_main(Datum main_arg) { /* Register functions for SIGTERM/SIGHUP management */ pqsignal(SIGHUP, hello_sighup); pqsignal(SIGTERM, hello_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); while (true) { /* Wait 1s */ WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 1000L, PG_WAIT_EXTENSION); ResetLatch(MyLatch); /* Process signals */ if (got_sighup) { /* Process config file */ ProcessConfigFile(PGC_SIGHUP); got_sighup = false; ereport(LOG, (errmsg("hello signal: processed SIGHUP"))); } if (got_sigterm) { /* Simply exit */ ereport(LOG, (errmsg("hello signal: processed SIGTERM"))); proc_exit(0); } } /* No problems, so clean exit */ proc_exit(0); }
/* * Main entry point for checkpointer process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void CheckpointerMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext checkpointer_context; CheckpointerShmem->checkpointer_pid = MyProcPid; /* * Properly accept or ignore signals the postmaster might send us * * Note: we deliberately ignore SIGTERM, because during a standard Unix * system shutdown cycle, init will SIGTERM all processes at once. We * want to wait for the backends to exit, whereupon the postmaster will * tell us it's okay to shut down (via SIGUSR2). */ pqsignal(SIGHUP, ChkptSigHupHandler); /* set flag to read config * file */ pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */ pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */ pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, chkpt_sigusr1_handler); pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Initialize so that first time-driven event happens at the correct time. */ last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ checkpointer_context = AllocSetContextCreate(TopMemoryContext, "Checkpointer", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(checkpointer_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in checkpointer, but we do have LWLocks, buffers, and temp * files. */ LWLockReleaseAll(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* Warn any waiting backends that the checkpoint failed. */ if (ckpt_active) { SpinLockAcquire(&CheckpointerShmem->ckpt_lck); CheckpointerShmem->ckpt_failed++; CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started; SpinLockRelease(&CheckpointerShmem->ckpt_lck); ckpt_active = false; } /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(checkpointer_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(checkpointer_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Ensure all shared memory values are set correctly for the config. Doing * this here ensures no race conditions from other concurrent updaters. */ UpdateSharedMemoryConfig(); /* * Advertise our latch that backends can use to wake us up while we're * sleeping. */ ProcGlobal->checkpointerLatch = &MyProc->procLatch; /* * Loop forever */ for (;;) { bool do_checkpoint = false; int flags = 0; pg_time_t now; int elapsed_secs; int cur_timeout; int rc; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); /* * Process any requests or signals received recently. */ AbsorbFsyncRequests(); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* * Checkpointer is the last process to shut down, so we ask it to * hold the keys for a range of other tasks required most of which * have nothing to do with checkpointing at all. * * For various reasons, some config values can change dynamically * so the primary copy of them is held in shared memory to make * sure all backends see the same value. We make Checkpointer * responsible for updating the shared memory copy if the * parameter setting changes because of SIGHUP. */ UpdateSharedMemoryConfig(); } if (checkpoint_requested) { checkpoint_requested = false; do_checkpoint = true; BgWriterStats.m_requested_checkpoints++; } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Close down the database */ ShutdownXLOG(0, 0); /* Normal exit from the checkpointer is here */ proc_exit(0); /* done */ } /* * Force a checkpoint if too much time has elapsed since the last one. * Note that we count a timed checkpoint in stats only when this * occurs without an external request, but we set the CAUSE_TIME flag * bit even if there is also an external request. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) BgWriterStats.m_timed_checkpoints++; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } /* * Do a checkpoint if requested. */ if (do_checkpoint) { bool ckpt_performed = false; bool do_restartpoint; /* * Check if we should perform a checkpoint or a restartpoint. As a * side-effect, RecoveryInProgress() initializes TimeLineID if * it's not set yet. */ do_restartpoint = RecoveryInProgress(); /* * Atomically fetch the request flags to figure out what kind of a * checkpoint we should perform, and increase the started-counter * to acknowledge that we've started a new checkpoint. */ SpinLockAcquire(&CheckpointerShmem->ckpt_lck); flags |= CheckpointerShmem->ckpt_flags; CheckpointerShmem->ckpt_flags = 0; CheckpointerShmem->ckpt_started++; SpinLockRelease(&CheckpointerShmem->ckpt_lck); /* * The end-of-recovery checkpoint is a real checkpoint that's * performed while we're still in recovery. */ if (flags & CHECKPOINT_END_OF_RECOVERY) do_restartpoint = false; /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag * since the last checkpoint start. Note in particular that this * implementation will not generate warnings caused by * CheckPointTimeout < CheckPointWarning. */ if (!do_restartpoint && (flags & CHECKPOINT_CAUSE_XLOG) && elapsed_secs < CheckPointWarning) ereport(LOG, (errmsg_plural("checkpoints are occurring too frequently (%d second apart)", "checkpoints are occurring too frequently (%d seconds apart)", elapsed_secs, elapsed_secs), errhint("Consider increasing the configuration parameter \"max_wal_size\"."))); /* * Initialize checkpointer-private variables used during * checkpoint. */ ckpt_active = true; if (do_restartpoint) ckpt_start_recptr = GetXLogReplayRecPtr(NULL); else ckpt_start_recptr = GetInsertRecPtr(); ckpt_start_time = now; ckpt_cached_elapsed = 0; /* * Do the checkpoint. */ if (!do_restartpoint) { CreateCheckPoint(flags); ckpt_performed = true; } else ckpt_performed = CreateRestartPoint(flags); /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); /* * Indicate checkpoint completion to any waiting backends. */ SpinLockAcquire(&CheckpointerShmem->ckpt_lck); CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started; SpinLockRelease(&CheckpointerShmem->ckpt_lck); if (ckpt_performed) { /* * Note we record the checkpoint start time not end time as * last_checkpoint_time. This is so that time-driven * checkpoints happen at a predictable spacing. */ last_checkpoint_time = now; } else { /* * We were not able to perform the restartpoint (checkpoints * throw an ERROR in case of error). Most likely because we * have not received any new checkpoint WAL records since the * last restartpoint. Try again in 15 s. */ last_checkpoint_time = now - CheckPointTimeout + 15; } ckpt_active = false; } /* Check for archive_timeout and switch xlog files if necessary. */ CheckArchiveTimeout(); /* * Send off activity statistics to the stats collector. (The reason * why we re-use bgwriter-related code for this is that the bgwriter * and checkpointer used to be just one process. It's probably not * worth the trouble to split the stats support into two independent * stats message types.) */ pgstat_send_bgwriter(); /* * Sleep until we are signaled or it's time for another checkpoint or * xlog file switch. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) continue; /* no sleep for us ... */ cur_timeout = CheckPointTimeout - elapsed_secs; if (XLogArchiveTimeout > 0 && !RecoveryInProgress()) { elapsed_secs = now - last_xlog_switch_time; if (elapsed_secs >= XLogArchiveTimeout) continue; /* no sleep for us ... */ cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs); } rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, cur_timeout * 1000L /* convert to ms */, WAIT_EVENT_CHECKPOINTER_MAIN); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); } }
/* * Wait for synchronous replication, if requested by user. * * Initially backends start in state SYNC_REP_NOT_WAITING and then * change that state to SYNC_REP_WAITING before adding ourselves * to the wait queue. During SyncRepWakeQueue() a WALSender changes * the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed. * This backend then resets its state to SYNC_REP_NOT_WAITING. */ void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN) { char *new_status = NULL; const char *old_status; /* * Fast exit if user has not requested sync replication, or * there are no sync replication standby names defined. * Note that those standbys don't need to be connected. */ if (!SyncRepRequested() || !SyncStandbysDefined()) return; Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks))); Assert(WalSndCtl != NULL); /* Reset the latch before adding ourselves to the queue. */ ResetLatch(&MyProc->waitLatch); /* * Set our waitLSN so WALSender will know when to wake us, and add * ourselves to the queue. */ LWLockAcquire(SyncRepLock, LW_EXCLUSIVE); Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING); if (!WalSndCtl->sync_standbys_defined) { /* * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is * not set. See SyncRepUpdateSyncStandbysDefined. */ LWLockRelease(SyncRepLock); return; } MyProc->waitLSN = XactCommitLSN; MyProc->syncRepState = SYNC_REP_WAITING; SyncRepQueueInsert(); Assert(SyncRepQueueIsOrderedByLSN()); LWLockRelease(SyncRepLock); /* Alter ps display to show waiting for sync rep. */ if (update_process_title) { int len; old_status = get_ps_display(&len); new_status = (char *) palloc(len + 32 + 1); memcpy(new_status, old_status, len); sprintf(new_status + len, " waiting for %X/%X", XactCommitLSN.xlogid, XactCommitLSN.xrecoff); set_ps_display(new_status, false); new_status[len] = '\0'; /* truncate off " waiting ..." */ } /* * Wait for specified LSN to be confirmed. * * Each proc has its own wait latch, so we perform a normal latch * check/wait loop here. */ for (;;) { int syncRepState; /* * Wait on latch for up to 60 seconds. This allows us to * check for postmaster death regularly while waiting. * Note that timeout here does not necessarily release from loop. */ WaitLatch(&MyProc->waitLatch, 60000000L); /* Must reset the latch before testing state. */ ResetLatch(&MyProc->waitLatch); /* * Try checking the state without the lock first. There's no guarantee * that we'll read the most up-to-date value, so if it looks like we're * still waiting, recheck while holding the lock. But if it looks like * we're done, we must really be done, because once walsender changes * the state to SYNC_REP_WAIT_COMPLETE, it will never update it again, * so we can't be seeing a stale value in that case. */ syncRepState = MyProc->syncRepState; if (syncRepState == SYNC_REP_WAITING) { LWLockAcquire(SyncRepLock, LW_SHARED); syncRepState = MyProc->syncRepState; LWLockRelease(SyncRepLock); } if (syncRepState == SYNC_REP_WAIT_COMPLETE) break; /* * If a wait for synchronous replication is pending, we can neither * acknowledge the commit nor raise ERROR or FATAL. The latter * would lead the client to believe that that the transaction * aborted, which is not true: it's already committed locally. * The former is no good either: the client has requested * synchronous replication, and is entitled to assume that an * acknowledged commit is also replicated, which may not be true. * So in this case we issue a WARNING (which some clients may * be able to interpret) and shut off further output. We do NOT * reset ProcDiePending, so that the process will die after the * commit is cleaned up. */ if (ProcDiePending) { ereport(WARNING, (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"), errdetail("The transaction has already committed locally, but may not have been replicated to the standby."))); whereToSendOutput = DestNone; SyncRepCancelWait(); break; } /* * It's unclear what to do if a query cancel interrupt arrives. We * can't actually abort at this point, but ignoring the interrupt * altogether is not helpful, so we just terminate the wait with * a suitable warning. */ if (QueryCancelPending) { QueryCancelPending = false; ereport(WARNING, (errmsg("canceling wait for synchronous replication due to user request"), errdetail("The transaction has already committed locally, but may not have been replicated to the standby."))); SyncRepCancelWait(); break; } /* * If the postmaster dies, we'll probably never get an acknowledgement, * because all the wal sender processes will exit. So just bail out. */ if (!PostmasterIsAlive(true)) { ProcDiePending = true; whereToSendOutput = DestNone; SyncRepCancelWait(); break; } } /* * WalSender has checked our LSN and has removed us from queue. Clean up * state and leave. It's OK to reset these shared memory fields without * holding SyncRepLock, because any walsenders will ignore us anyway when * we're not on the queue. */ Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks))); MyProc->syncRepState = SYNC_REP_NOT_WAITING; MyProc->waitLSN.xlogid = 0; MyProc->waitLSN.xrecoff = 0; if (new_status) { /* Reset ps display */ set_ps_display(new_status, false); pfree(new_status); } }
/* * Main entry point for syslogger process * argc/argv parameters are valid only in EXEC_BACKEND case. */ NON_EXEC_STATIC void SysLoggerMain(int argc, char *argv[]) { #ifndef WIN32 char logbuffer[READ_BUF_SIZE]; int bytes_in_logbuffer = 0; #endif char *currentLogDir; char *currentLogFilename; int currentLogRotationAge; pg_time_t now; IsUnderPostmaster = true; /* we are a postmaster subprocess now */ MyProcPid = getpid(); /* reset MyProcPid */ MyStartTime = time(NULL); /* set our start time in case we call elog */ now = MyStartTime; #ifdef EXEC_BACKEND syslogger_parseArgs(argc, argv); #endif /* EXEC_BACKEND */ am_syslogger = true; init_ps_display("logger process", "", "", ""); /* * If we restarted, our stderr is already redirected into our own input * pipe. This is of course pretty useless, not to mention that it * interferes with detecting pipe EOF. Point stderr to /dev/null. This * assumes that all interesting messages generated in the syslogger will * come through elog.c and will be sent to write_syslogger_file. */ if (redirection_done) { int fd = open(DEVNULL, O_WRONLY, 0); /* * The closes might look redundant, but they are not: we want to be * darn sure the pipe gets closed even if the open failed. We can * survive running with stderr pointing nowhere, but we can't afford * to have extra pipe input descriptors hanging around. */ close(fileno(stdout)); close(fileno(stderr)); if (fd != -1) { dup2(fd, fileno(stdout)); dup2(fd, fileno(stderr)); close(fd); } } /* * Syslogger's own stderr can't be the syslogPipe, so set it back to text * mode if we didn't just close it. (It was set to binary in * SubPostmasterMain). */ #ifdef WIN32 else _setmode(_fileno(stderr), _O_TEXT); #endif /* * Also close our copy of the write end of the pipe. This is needed to * ensure we can detect pipe EOF correctly. (But note that in the restart * case, the postmaster already did this.) */ #ifndef WIN32 if (syslogPipe[1] >= 0) close(syslogPipe[1]); syslogPipe[1] = -1; #else if (syslogPipe[1]) CloseHandle(syslogPipe[1]); syslogPipe[1] = 0; #endif /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (syslogger probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif InitializeLatchSupport(); /* needed for latch waits */ /* Initialize private latch for use by signal handlers */ InitLatch(&sysLoggerLatch); /* * Properly accept or ignore signals the postmaster might send us * * Note: we ignore all termination signals, and instead exit only when all * upstream processes are gone, to ensure we don't miss any dying gasps of * broken backends... */ pqsignal(SIGHUP, sigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); pqsignal(SIGQUIT, SIG_IGN); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, sigUsr1Handler); /* request log rotation */ pqsignal(SIGUSR2, SIG_IGN); /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); PG_SETMASK(&UnBlockSig); #ifdef WIN32 /* Fire up separate data transfer thread */ InitializeCriticalSection(&sysloggerSection); EnterCriticalSection(&sysloggerSection); threadHandle = (HANDLE) _beginthreadex(NULL, 0, pipeThread, NULL, 0, NULL); if (threadHandle == 0) elog(FATAL, "could not create syslogger data transfer thread: %m"); #endif /* WIN32 */ /* * Remember active logfile's name. We recompute this from the reference * time because passing down just the pg_time_t is a lot cheaper than * passing a whole file path in the EXEC_BACKEND case. */ last_file_name = logfile_getname(first_syslogger_file_time, NULL); /* remember active logfile parameters */ currentLogDir = pstrdup(Log_directory); currentLogFilename = pstrdup(Log_filename); currentLogRotationAge = Log_RotationAge; /* set next planned rotation time */ set_next_rotation_time(); /* main worker loop */ for (;;) { bool time_based_rotation = false; int size_rotation_for = 0; long cur_timeout; int cur_flags; #ifndef WIN32 int rc; #endif /* Clear any already-pending wakeups */ ResetLatch(&sysLoggerLatch); /* * Process any requests or signals received recently. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* * Check if the log directory or filename pattern changed in * postgresql.conf. If so, force rotation to make sure we're * writing the logfiles in the right place. */ if (strcmp(Log_directory, currentLogDir) != 0) { pfree(currentLogDir); currentLogDir = pstrdup(Log_directory); rotation_requested = true; /* * Also, create new directory if not present; ignore errors */ mkdir(Log_directory, S_IRWXU); } if (strcmp(Log_filename, currentLogFilename) != 0) { pfree(currentLogFilename); currentLogFilename = pstrdup(Log_filename); rotation_requested = true; } /* * If rotation time parameter changed, reset next rotation time, * but don't immediately force a rotation. */ if (currentLogRotationAge != Log_RotationAge) { currentLogRotationAge = Log_RotationAge; set_next_rotation_time(); } /* * If we had a rotation-disabling failure, re-enable rotation * attempts after SIGHUP, and force one immediately. */ if (rotation_disabled) { rotation_disabled = false; rotation_requested = true; } } if (Log_RotationAge > 0 && !rotation_disabled) { /* Do a logfile rotation if it's time */ now = (pg_time_t) time(NULL); if (now >= next_rotation_time) rotation_requested = time_based_rotation = true; } if (!rotation_requested && Log_RotationSize > 0 && !rotation_disabled) { /* Do a rotation if file is too big */ if (ftell(syslogFile) >= Log_RotationSize * 1024L) { rotation_requested = true; size_rotation_for |= LOG_DESTINATION_STDERR; } if (csvlogFile != NULL && ftell(csvlogFile) >= Log_RotationSize * 1024L) { rotation_requested = true; size_rotation_for |= LOG_DESTINATION_CSVLOG; } } if (rotation_requested) { /* * Force rotation when both values are zero. It means the request * was sent by pg_rotate_logfile. */ if (!time_based_rotation && size_rotation_for == 0) size_rotation_for = LOG_DESTINATION_STDERR | LOG_DESTINATION_CSVLOG; logfile_rotate(time_based_rotation, size_rotation_for); } /* * Calculate time till next time-based rotation, so that we don't * sleep longer than that. We assume the value of "now" obtained * above is still close enough. Note we can't make this calculation * until after calling logfile_rotate(), since it will advance * next_rotation_time. * * Also note that we need to beware of overflow in calculation of the * timeout: with large settings of Log_RotationAge, next_rotation_time * could be more than INT_MAX msec in the future. In that case we'll * wait no more than INT_MAX msec, and try again. */ if (Log_RotationAge > 0 && !rotation_disabled) { pg_time_t delay; delay = next_rotation_time - now; if (delay > 0) { if (delay > INT_MAX / 1000) delay = INT_MAX / 1000; cur_timeout = delay * 1000L; /* msec */ } else cur_timeout = 0; cur_flags = WL_TIMEOUT; } else { cur_timeout = -1L; cur_flags = 0; } /* * Sleep until there's something to do */ #ifndef WIN32 rc = WaitLatchOrSocket(&sysLoggerLatch, WL_LATCH_SET | WL_SOCKET_READABLE | cur_flags, syslogPipe[0], cur_timeout); if (rc & WL_SOCKET_READABLE) { int bytesRead; bytesRead = read(syslogPipe[0], logbuffer + bytes_in_logbuffer, sizeof(logbuffer) - bytes_in_logbuffer); if (bytesRead < 0) { if (errno != EINTR) ereport(LOG, (errcode_for_socket_access(), errmsg("could not read from logger pipe: %m"))); } else if (bytesRead > 0) { bytes_in_logbuffer += bytesRead; process_pipe_input(logbuffer, &bytes_in_logbuffer); continue; } else { /* * Zero bytes read when select() is saying read-ready means * EOF on the pipe: that is, there are no longer any processes * with the pipe write end open. Therefore, the postmaster * and all backends are shut down, and we are done. */ pipe_eof_seen = true; /* if there's any data left then force it out now */ flush_pipe_input(logbuffer, &bytes_in_logbuffer); } } #else /* WIN32 */ /* * On Windows we leave it to a separate thread to transfer data and * detect pipe EOF. The main thread just wakes up to handle SIGHUP * and rotation conditions. * * Server code isn't generally thread-safe, so we ensure that only one * of the threads is active at a time by entering the critical section * whenever we're not sleeping. */ LeaveCriticalSection(&sysloggerSection); (void) WaitLatch(&sysLoggerLatch, WL_LATCH_SET | cur_flags, cur_timeout); EnterCriticalSection(&sysloggerSection); #endif /* WIN32 */ if (pipe_eof_seen) { /* * seeing this message on the real stderr is annoying - so we make * it DEBUG1 to suppress in normal use. */ ereport(DEBUG1, (errmsg("logger shutting down"))); /* * Normal exit from the syslogger is here. Note that we * deliberately do not close syslogFile before exiting; this is to * allow for the possibility of elog messages being generated * inside proc_exit. Regular exit() will take care of flushing * and closing stdio channels. */ proc_exit(0); } } }
/* Main loop of walsender process */ static int WalSndLoop(void) { char *output_message; bool caughtup = false; /* * Allocate buffer that will be used for each output message. We do this * just once to reduce palloc overhead. The buffer must be made large * enough for maximum-sized messages. */ output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE); /* * Allocate buffer that will be used for processing reply messages. As * above, do this just once to reduce palloc overhead. */ initStringInfo(&reply_message); /* Initialize the last reply timestamp */ last_reply_timestamp = GetCurrentTimestamp(); /* Loop forever, unless we get an error */ for (;;) { /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive(true)) exit(1); /* Process any requests or signals received recently */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); SyncRepInitConfig(); } /* Normal exit from the walsender is here */ if (walsender_shutdown_requested) { /* Inform the standby that XLOG streaming was done */ pq_puttextmessage('C', "COPY 0"); pq_flush(); proc_exit(0); } /* * If we don't have any pending data in the output buffer, try to send * some more. */ if (!pq_is_send_pending()) { XLogSend(output_message, &caughtup); /* * Even if we wrote all the WAL that was available when we started * sending, more might have arrived while we were sending this * batch. We had the latch set while sending, so we have not * received any signals from that time. Let's arm the latch again, * and after that check that we're still up-to-date. */ if (caughtup && !pq_is_send_pending()) { ResetLatch(&MyWalSnd->latch); XLogSend(output_message, &caughtup); } } /* Flush pending output to the client */ if (pq_flush_if_writable() != 0) break; /* * When SIGUSR2 arrives, we send any outstanding logs up to the * shutdown checkpoint record (i.e., the latest record) and exit. */ if (walsender_ready_to_stop && !pq_is_send_pending()) { XLogSend(output_message, &caughtup); ProcessRepliesIfAny(); if (caughtup && !pq_is_send_pending()) walsender_shutdown_requested = true; } if ((caughtup || pq_is_send_pending()) && !got_SIGHUP && !walsender_shutdown_requested) { TimestampTz finish_time = 0; long sleeptime; /* Reschedule replication timeout */ if (replication_timeout > 0) { long secs; int usecs; finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp, replication_timeout); TimestampDifference(GetCurrentTimestamp(), finish_time, &secs, &usecs); sleeptime = secs * 1000 + usecs / 1000; if (WalSndDelay < sleeptime) sleeptime = WalSndDelay; } else { /* * XXX: Without timeout, we don't really need the periodic * wakeups anymore, WaitLatchOrSocket should reliably wake up * as soon as something interesting happens. */ sleeptime = WalSndDelay; } /* Sleep */ WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock, true, pq_is_send_pending(), sleeptime); /* Check for replication timeout */ if (replication_timeout > 0 && GetCurrentTimestamp() >= finish_time) { /* * Since typically expiration of replication timeout means * communication problem, we don't send the error message to * the standby. */ ereport(COMMERROR, (errmsg("terminating walsender process due to replication timeout"))); break; } } /* * If we're in catchup state, see if its time to move to streaming. * This is an important state change for users, since before this * point data loss might occur if the primary dies and we need to * failover to the standby. The state change is also important for * synchronous replication, since commits that started to wait at that * point might wait for some time. */ if (MyWalSnd->state == WALSNDSTATE_CATCHUP && caughtup) { ereport(DEBUG1, (errmsg("standby \"%s\" has now caught up with primary", application_name))); WalSndSetState(WALSNDSTATE_STREAMING); } ProcessRepliesIfAny(); } /* * Get here on send failure. Clean up and exit. * * Reset whereToSendOutput to prevent ereport from attempting to send any * more messages to the standby. */ if (whereToSendOutput == DestRemote) whereToSendOutput = DestNone; proc_exit(0); return 1; /* keep the compiler quiet */ }
/* * ProcSleep -- put a process to sleep on the specified lock * * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * * The lock table's partition lock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after * we release the partition lock. * * NOTES: The process queue is now a priority queue for locking. */ int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { LOCKMODE lockmode = locallock->tag.mode; LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; LWLock *partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; bool allow_autovacuum_cancel = true; int myWaitStatus; PGPROC *proc; int i; /* * Determine where to add myself in the wait queue. * * Normally I should go at the end of the queue. However, if I already * hold locks that conflict with the request of any previous waiter, put * myself in the queue just in front of the first such waiter. This is not * a necessary step, since deadlock detection would move me to before that * waiter anyway; but it's relatively cheap to detect such a conflict * immediately, and avoid delaying till deadlock timeout. * * Special case: if I find I should go in front of some waiter, check to * see if I conflict with already-held locks or the requests before that * waiter. If not, then just grant myself the requested lock immediately. * This is the same as the test for immediate grant in LockAcquire, except * we are only considering the part of the wait queue before my insertion * point. */ if (myHeldLocks != 0) { LOCKMASK aheadRequests = 0; proc = (PGPROC *) waitQueue->links.next; for (i = 0; i < waitQueue->size; i++) { /* Must he wait for me? */ if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) { /* Must I wait for him ? */ if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks) { /* * Yes, so we have a deadlock. Easiest way to clean up * correctly is to call RemoveFromWaitQueue(), but we * can't do that until we are *on* the wait queue. So, set * a flag to check below, and break out of loop. Also, * record deadlock info for later message. */ RememberSimpleDeadLock(MyProc, lockmode, lock, proc); early_deadlock = true; break; } /* I must go before this waiter. Check special case. */ if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && LockCheckConflicts(lockMethodTable, lockmode, lock, proclock) == STATUS_OK) { /* Skip the wait and just grant myself the lock. */ GrantLock(lock, proclock, lockmode); GrantAwaitedLock(); return STATUS_OK; } /* Break out of loop to put myself before him */ break; } /* Nope, so advance to next waiter */ aheadRequests |= LOCKBIT_ON(proc->waitLockMode); proc = (PGPROC *) proc->links.next; } /* * If we fall out of loop normally, proc points to waitQueue head, so * we will insert at tail of queue as desired. */ } else { /* I hold no locks, so I can't push in front of anyone. */ proc = (PGPROC *) &(waitQueue->links); } /* * Insert self into queue, ahead of the given proc (or at tail of queue). */ SHMQueueInsertBefore(&(proc->links), &(MyProc->links)); waitQueue->size++; lock->waitMask |= LOCKBIT_ON(lockmode); /* Set up wait information in PGPROC object, too */ MyProc->waitLock = lock; MyProc->waitProcLock = proclock; MyProc->waitLockMode = lockmode; MyProc->waitStatus = STATUS_WAITING; /* * If we detected deadlock, give up without waiting. This must agree with * CheckDeadLock's recovery code, except that we shouldn't release the * semaphore since we haven't tried to lock it yet. */ if (early_deadlock) { RemoveFromWaitQueue(MyProc, hashcode); return STATUS_ERROR; } /* mark that we are waiting for a lock */ lockAwaited = locallock; /* * Release the lock table's partition lock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because we * have recorded the fact that we are waiting for a lock, and so * LockErrorCleanup will clean up if cancel/die happens. */ LWLockRelease(partitionLock); /* * Also, now that we will successfully clean up after an ereport, it's * safe to check to see if there's a buffer pin deadlock against the * Startup process. Of course, that's only necessary if we're doing Hot * Standby and are not the Startup process ourselves. */ if (RecoveryInProgress() && !InRecovery) CheckRecoveryConflictDeadlock(); /* Reset deadlock_state before enabling the timeout handler */ deadlock_state = DS_NOT_YET_CHECKED; got_deadlock_timeout = false; /* * Set timer so we can wake up after awhile and check for a deadlock. If a * deadlock is detected, the handler releases the process's semaphore and * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we * must report failure rather than success. * * By delaying the check until we've waited for a bit, we can avoid * running the rather expensive deadlock-check code in most cases. * * If LockTimeout is set, also enable the timeout for that. We can save a * few cycles by enabling both timeout sources in one call. */ if (LockTimeout > 0) { EnableTimeoutParams timeouts[2]; timeouts[0].id = DEADLOCK_TIMEOUT; timeouts[0].type = TMPARAM_AFTER; timeouts[0].delay_ms = DeadlockTimeout; timeouts[1].id = LOCK_TIMEOUT; timeouts[1].type = TMPARAM_AFTER; timeouts[1].delay_ms = LockTimeout; enable_timeouts(timeouts, 2); } else enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); /* * If somebody wakes us between LWLockRelease and WaitLatch, the latch * will not wait. But a set latch does not necessarily mean that the lock * is free now, as there are many other sources for latch sets than * somebody releasing the lock. * * We process interrupts whenever the latch has been set, so cancel/die * interrupts are processed quickly. This means we must not mind losing * control to a cancel/die interrupt here. We don't, because we have no * shared-state-change work to do after being granted the lock (the * grantor did it all). We do have to worry about canceling the deadlock * timeout and updating the locallock table, but if we lose control to an * error, LockErrorCleanup will fix that up. */ do { WaitLatch(MyLatch, WL_LATCH_SET, 0); ResetLatch(MyLatch); /* check for deadlocks first, as that's probably log-worthy */ if (got_deadlock_timeout) { CheckDeadLock(); got_deadlock_timeout = false; } CHECK_FOR_INTERRUPTS(); /* * waitStatus could change from STATUS_WAITING to something else * asynchronously. Read it just once per loop to prevent surprising * behavior (such as missing log messages). */ myWaitStatus = *((volatile int *) &MyProc->waitStatus); /* * If we are not deadlocked, but are waiting on an autovacuum-induced * task, send a signal to interrupt it. */ if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel) { PGPROC *autovac = GetBlockingAutoVacuumPgproc(); PGXACT *autovac_pgxact = &ProcGlobal->allPgXact[autovac->pgprocno]; LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); /* * Only do it if the worker is not working to protect against Xid * wraparound. */ if ((autovac_pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && !(autovac_pgxact->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND)) { int pid = autovac->pid; StringInfoData locktagbuf; StringInfoData logbuf; /* errdetail for server log */ initStringInfo(&locktagbuf); initStringInfo(&logbuf); DescribeLockTag(&locktagbuf, &lock->tag); appendStringInfo(&logbuf, _("Process %d waits for %s on %s."), MyProcPid, GetLockmodeName(lock->tag.locktag_lockmethodid, lockmode), locktagbuf.data); /* release lock as quickly as possible */ LWLockRelease(ProcArrayLock); ereport(LOG, (errmsg("sending cancel to blocking autovacuum PID %d", pid), errdetail_log("%s", logbuf.data))); pfree(logbuf.data); pfree(locktagbuf.data); /* send the autovacuum worker Back to Old Kent Road */ if (kill(pid, SIGINT) < 0) { /* Just a warning to allow multiple callers */ ereport(WARNING, (errmsg("could not send signal to process %d: %m", pid))); } } else LWLockRelease(ProcArrayLock); /* prevent signal from being resent more than once */ allow_autovacuum_cancel = false; } /* * If awoken after the deadlock check interrupt has run, and * log_lock_waits is on, then report about the wait. */ if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED) { StringInfoData buf, lock_waiters_sbuf, lock_holders_sbuf; const char *modename; long secs; int usecs; long msecs; SHM_QUEUE *procLocks; PROCLOCK *proclock; bool first_holder = true, first_waiter = true; int lockHoldersNum = 0; initStringInfo(&buf); initStringInfo(&lock_waiters_sbuf); initStringInfo(&lock_holders_sbuf); DescribeLockTag(&buf, &locallock->tag.lock); modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid, lockmode); TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT), GetCurrentTimestamp(), &secs, &usecs); msecs = secs * 1000 + usecs / 1000; usecs = usecs % 1000; /* * we loop over the lock's procLocks to gather a list of all * holders and waiters. Thus we will be able to provide more * detailed information for lock debugging purposes. * * lock->procLocks contains all processes which hold or wait for * this lock. */ LWLockAcquire(partitionLock, LW_SHARED); procLocks = &(lock->procLocks); proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); while (proclock) { /* * we are a waiter if myProc->waitProcLock == proclock; we are * a holder if it is NULL or something different */ if (proclock->tag.myProc->waitProcLock == proclock) { if (first_waiter) { appendStringInfo(&lock_waiters_sbuf, "%d", proclock->tag.myProc->pid); first_waiter = false; } else appendStringInfo(&lock_waiters_sbuf, ", %d", proclock->tag.myProc->pid); } else { if (first_holder) { appendStringInfo(&lock_holders_sbuf, "%d", proclock->tag.myProc->pid); first_holder = false; } else appendStringInfo(&lock_holders_sbuf, ", %d", proclock->tag.myProc->pid); lockHoldersNum++; } proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, offsetof(PROCLOCK, lockLink)); } LWLockRelease(partitionLock); if (deadlock_state == DS_SOFT_DEADLOCK) ereport(LOG, (errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); else if (deadlock_state == DS_HARD_DEADLOCK) { /* * This message is a bit redundant with the error that will be * reported subsequently, but in some cases the error report * might not make it to the log (eg, if it's caught by an * exception handler), and we want to ensure all long-wait * events get logged. */ ereport(LOG, (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); } if (myWaitStatus == STATUS_WAITING) ereport(LOG, (errmsg("process %d still waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); else if (myWaitStatus == STATUS_OK) ereport(LOG, (errmsg("process %d acquired %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); else { Assert(myWaitStatus == STATUS_ERROR); /* * Currently, the deadlock checker always kicks its own * process, which means that we'll only see STATUS_ERROR when * deadlock_state == DS_HARD_DEADLOCK, and there's no need to * print redundant messages. But for completeness and * future-proofing, print a message if it looks like someone * else kicked us off the lock. */ if (deadlock_state != DS_HARD_DEADLOCK) ereport(LOG, (errmsg("process %d failed to acquire %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); } /* * At this point we might still need to wait for the lock. Reset * state so we don't print the above messages again. */ deadlock_state = DS_NO_DEADLOCK; pfree(buf.data); pfree(lock_holders_sbuf.data); pfree(lock_waiters_sbuf.data); } } while (myWaitStatus == STATUS_WAITING); /* * Disable the timers, if they are still running. As in LockErrorCleanup, * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has * already caused QueryCancelPending to become set, we want the cancel to * be reported as a lock timeout, not a user cancel. */ if (LockTimeout > 0) { DisableTimeoutParams timeouts[2]; timeouts[0].id = DEADLOCK_TIMEOUT; timeouts[0].keep_indicator = false; timeouts[1].id = LOCK_TIMEOUT; timeouts[1].keep_indicator = true; disable_timeouts(timeouts, 2); } else disable_timeout(DEADLOCK_TIMEOUT, false); /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we * might have a missed or duplicated locallock update). */ LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We no longer want LockErrorCleanup to do anything. */ lockAwaited = NULL; /* * If we got the lock, be sure to remember it in the locallock table. */ if (MyProc->waitStatus == STATUS_OK) GrantAwaitedLock(); /* * We don't have to do anything else, because the awaker did all the * necessary update of the lock table and MyProc. */ return MyProc->waitStatus; }
/* * Transmit a libpq protocol message to the shared memory message queue * selected via pq_mq_handle. We don't include a length word, because the * receiver will know the length of the message from shm_mq_receive(). */ static int mq_putmessage(char msgtype, const char *s, size_t len) { shm_mq_iovec iov[2]; shm_mq_result result; /* * If we're sending a message, and we have to wait because the queue is * full, and then we get interrupted, and that interrupt results in trying * to send another message, we respond by detaching the queue. There's no * way to return to the original context, but even if there were, just * queueing the message would amount to indefinitely postponing the * response to the interrupt. So we do this instead. */ if (pq_mq_busy) { if (pq_mq != NULL) shm_mq_detach(pq_mq); pq_mq = NULL; pq_mq_handle = NULL; return EOF; } /* * If the message queue is already gone, just ignore the message. This * doesn't necessarily indicate a problem; for example, DEBUG messages * can be generated late in the shutdown sequence, after all DSMs have * already been detached. */ if (pq_mq == NULL) return 0; pq_mq_busy = true; iov[0].data = &msgtype; iov[0].len = 1; iov[1].data = s; iov[1].len = len; Assert(pq_mq_handle != NULL); for (;;) { result = shm_mq_sendv(pq_mq_handle, iov, 2, true); if (pq_mq_parallel_master_pid != 0) SendProcSignal(pq_mq_parallel_master_pid, PROCSIG_PARALLEL_MESSAGE, pq_mq_parallel_master_backend_id); if (result != SHM_MQ_WOULD_BLOCK) break; WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0); CHECK_FOR_INTERRUPTS(); ResetLatch(&MyProc->procLatch); } pq_mq_busy = false; Assert(result == SHM_MQ_SUCCESS || result == SHM_MQ_DETACHED); if (result != SHM_MQ_SUCCESS) return EOF; return 0; }
/* * Attempt to read a tuple from one of our parallel workers. */ static HeapTuple gather_readnext(GatherState *gatherstate) { int waitpos = gatherstate->nextreader; for (;;) { TupleQueueReader *reader; HeapTuple tup; bool readerdone; /* Make sure we've read all messages from workers. */ HandleParallelMessages(); /* Attempt to read a tuple, but don't block if none is available. */ reader = gatherstate->reader[gatherstate->nextreader]; tup = TupleQueueReaderNext(reader, true, &readerdone); /* * If this reader is done, remove it. If all readers are done, * clean up remaining worker state. */ if (readerdone) { DestroyTupleQueueReader(reader); --gatherstate->nreaders; if (gatherstate->nreaders == 0) { ExecShutdownGatherWorkers(gatherstate); return NULL; } else { memmove(&gatherstate->reader[gatherstate->nextreader], &gatherstate->reader[gatherstate->nextreader + 1], sizeof(TupleQueueReader *) * (gatherstate->nreaders - gatherstate->nextreader)); if (gatherstate->nextreader >= gatherstate->nreaders) gatherstate->nextreader = 0; if (gatherstate->nextreader < waitpos) --waitpos; } continue; } /* If we got a tuple, return it. */ if (tup) return tup; /* * Advance nextreader pointer in round-robin fashion. Note that we * only reach this code if we weren't able to get a tuple from the * current worker. We used to advance the nextreader pointer after * every tuple, but it turns out to be much more efficient to keep * reading from the same queue until that would require blocking. */ gatherstate->nextreader = (gatherstate->nextreader + 1) % gatherstate->nreaders; /* Have we visited every TupleQueueReader? */ if (gatherstate->nextreader == waitpos) { /* * If (still) running plan locally, return NULL so caller can * generate another tuple from the local copy of the plan. */ if (gatherstate->need_to_scan_locally) return NULL; /* Nothing to do except wait for developments. */ WaitLatch(MyLatch, WL_LATCH_SET, 0); CHECK_FOR_INTERRUPTS(); ResetLatch(MyLatch); } } }
static void kill_idle_main(Datum main_arg) { StringInfoData buf; /* Register functions for SIGTERM/SIGHUP management */ pqsignal(SIGHUP, kill_idle_sighup); pqsignal(SIGTERM, kill_idle_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to a database */ BackgroundWorkerInitializeConnection("postgres", NULL); /* Build query for process */ initStringInfo(&buf); kill_idle_build_query(&buf); while (!got_sigterm) { int rc, ret, i; /* Wait necessary amount of time */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, kill_max_idle_time * 1000L, PG_WAIT_EXTENSION); ResetLatch(&MyProc->procLatch); /* Emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* Process signals */ if (got_sighup) { int old_interval; /* Save old value of kill interval */ old_interval = kill_max_idle_time; /* Process config file */ ProcessConfigFile(PGC_SIGHUP); got_sighup = false; ereport(LOG, (errmsg("bgworker kill_idle signal: processed SIGHUP"))); /* Rebuild query if necessary */ if (old_interval != kill_max_idle_time) { resetStringInfo(&buf); initStringInfo(&buf); kill_idle_build_query(&buf); } } if (got_sigterm) { /* Simply exit */ ereport(LOG, (errmsg("bgworker kill_idle signal: processed SIGTERM"))); proc_exit(0); } /* Process idle connection kill */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); /* Statement start time */ SetCurrentStatementStartTimestamp(); /* Execute query */ ret = SPI_execute(buf.data, false, 0); /* Some error handling */ if (ret != SPI_OK_SELECT) elog(FATAL, "Error when trying to kill idle connections"); /* Do some processing and log stuff disconnected */ for (i = 0; i < SPI_processed; i++) { int32 pidValue; bool isnull; char *datname = NULL; char *usename = NULL; char *client_addr = NULL; /* Fetch values */ pidValue = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull)); usename = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 3, &isnull)); datname = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 4, &isnull)); client_addr = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 5, &isnull)); /* Log what has been disconnected */ elog(LOG, "Disconnected idle connection: PID %d %s/%s/%s", pidValue, datname ? datname : "none", usename ? usename : "none", client_addr ? client_addr : "none"); } SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); } /* No problems, so clean exit */ proc_exit(0); }
static void worker_spi_main(Datum main_arg) { /* Register functions for SIGTERM/SIGHUP management */ pqsignal(SIGHUP, worker_spi_sighup); pqsignal(SIGTERM, worker_spi_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); while (!got_sigterm) { int ret; int rc; StringInfoData buf; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); initStringInfo(&buf); /* Build the query string */ appendStringInfo(&buf, "SELECT count(*) FROM pg_class;"); ret = SPI_execute(buf.data, true, 0); /* Some error messages in case of incorrect handling */ if (ret != SPI_OK_SELECT) elog(FATAL, "SPI_execute failed: error code %d", ret); if (SPI_processed > 0) { int32 count; bool isnull; count = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); elog(LOG, "Currently %d relations in database", count); } SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); } proc_exit(0); }
void worker_spi_main(Datum main_arg) { int index = DatumGetInt32(main_arg); worktable *table; StringInfoData buf; char name[20]; table = palloc(sizeof(worktable)); sprintf(name, "schema%d", index); table->schema = pstrdup(name); table->name = pstrdup("counted"); /* Establish signal handlers before unblocking signals. */ pqsignal(SIGHUP, worker_spi_sighup); pqsignal(SIGTERM, worker_spi_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); elog(LOG, "%s initialized with %s.%s", MyBgworkerEntry->bgw_name, table->schema, table->name); initialize_worker_spi(table); /* * Quote identifiers passed to us. Note that this must be done after * initialize_worker_spi, because that routine assumes the names are not * quoted. * * Note some memory might be leaked here. */ table->schema = quote_identifier(table->schema); table->name = quote_identifier(table->name); initStringInfo(&buf); appendStringInfo(&buf, "WITH deleted AS (DELETE " "FROM %s.%s " "WHERE type = 'delta' RETURNING value), " "total AS (SELECT coalesce(sum(value), 0) as sum " "FROM deleted) " "UPDATE %s.%s " "SET value = %s.value + total.sum " "FROM total WHERE type = 'total' " "RETURNING %s.value", table->schema, table->name, table->schema, table->name, table->name, table->name); /* * Main loop: do this until the SIGTERM handler tells us to terminate */ while (!got_sigterm) { int ret; int rc; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, worker_spi_naptime * 1000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); } /* * Start a transaction on which we can run queries. Note that each * StartTransactionCommand() call should be preceded by a * SetCurrentStatementStartTimestamp() call, which sets both the time * for the statement we're about the run, and also the transaction * start time. Also, each other query sent to SPI should probably be * preceded by SetCurrentStatementStartTimestamp(), so that statement * start time is always up to date. * * The SPI_connect() call lets us run queries through the SPI manager, * and the PushActiveSnapshot() call creates an "active" snapshot * which is necessary for queries to have MVCC data to work on. * * The pgstat_report_activity() call makes our activity visible * through the pgstat views. */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); /* We can now execute queries via SPI */ ret = SPI_execute(buf.data, false, 0); if (ret != SPI_OK_UPDATE_RETURNING) elog(FATAL, "cannot select from table %s.%s: error code %d", table->schema, table->name, ret); if (SPI_processed > 0) { bool isnull; int32 val; val = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); if (!isnull) elog(LOG, "%s: count in %s.%s is now %d", MyBgworkerEntry->bgw_name, table->schema, table->name, val); } /* * And finish our transaction. */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); } proc_exit(1); }
/* * Stop the logical replication worker for subid/relid, if any, and wait until * it detaches from the slot. */ void logicalrep_worker_stop(Oid subid, Oid relid) { LogicalRepWorker *worker; uint16 generation; LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); worker = logicalrep_worker_find(subid, relid, false); /* No worker, nothing to do. */ if (!worker) { LWLockRelease(LogicalRepWorkerLock); return; } /* * Remember which generation was our worker so we can check if what we see * is still the same one. */ generation = worker->generation; /* * If we found a worker but it does not have proc set then it is still * starting up; wait for it to finish starting and then kill it. */ while (worker->in_use && !worker->proc) { int rc; LWLockRelease(LogicalRepWorkerLock); /* Wait a bit --- we don't expect to have to wait long. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 10L, WAIT_EVENT_BGWORKER_STARTUP); if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); } /* Recheck worker status. */ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); /* * Check whether the worker slot is no longer used, which would mean * that the worker has exited, or whether the worker generation is * different, meaning that a different worker has taken the slot. */ if (!worker->in_use || worker->generation != generation) { LWLockRelease(LogicalRepWorkerLock); return; } /* Worker has assigned proc, so it has started. */ if (worker->proc) break; } /* Now terminate the worker ... */ kill(worker->proc->pid, SIGTERM); /* ... and wait for it to die. */ for (;;) { int rc; /* is it gone? */ if (!worker->proc || worker->generation != generation) break; LWLockRelease(LogicalRepWorkerLock); /* Wait a bit --- we don't expect to have to wait long. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 10L, WAIT_EVENT_BGWORKER_SHUTDOWN); if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); } LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); } LWLockRelease(LogicalRepWorkerLock); }
/* * pgarch_MainLoop * * Main loop for archiver */ static void pgarch_MainLoop(void) { pg_time_t last_copy_time = 0; bool time_to_stop; /* * We run the copy loop immediately upon entry, in case there are * unarchived files left over from a previous database run (or maybe the * archiver died unexpectedly). After that we wait for a signal or * timeout before doing more. */ wakened = true; /* * There shouldn't be anything for the archiver to do except to wait * for a signal ... however, the archiver exists to protect our data, * so she wakes up occasionally to allow herself to be proactive. */ do { ResetLatch(&mainloop_latch); /* When we get SIGUSR2, we do one more archive cycle, then exit */ time_to_stop = ready_to_stop; /* Check for config update */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } /* * If we've gotten SIGTERM, we normally just sit and do nothing until * SIGUSR2 arrives. However, that means a random SIGTERM would * disable archiving indefinitely, which doesn't seem like a good * idea. If more than 60 seconds pass since SIGTERM, exit anyway, so * that the postmaster can start a new archiver if needed. */ if (got_SIGTERM) { time_t curtime = time(NULL); if (last_sigterm_time == 0) last_sigterm_time = curtime; else if ((unsigned int) (curtime - last_sigterm_time) >= (unsigned int) 60) break; } /* Do what we're here for */ if (wakened || time_to_stop) { wakened = false; pgarch_ArchiverCopyLoop(); last_copy_time = time(NULL); } /* * Sleep until a signal is received, or until a poll is forced by * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or * until postmaster dies. */ if (!time_to_stop) /* Don't wait during last iteration */ { pg_time_t curtime = (pg_time_t) time(NULL); int timeout; timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time); if (timeout > 0) { int rc; rc = WaitLatch(&mainloop_latch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, timeout * 1000000L); if (rc & WL_TIMEOUT) wakened = true; } else wakened = true; } /* * The archiver quits either when the postmaster dies (not expected) * or after completing one more archiving cycle after receiving * SIGUSR2. */ } while (PostmasterIsAlive() && !time_to_stop); }
/* Main loop of walsender process */ static int WalSndLoop(void) { char *output_message; bool caughtup = false; /* * Allocate buffer that will be used for each output message. We do this * just once to reduce palloc overhead. The buffer must be made large * enough for maximum-sized messages. */ output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE); /* * Allocate buffer that will be used for processing reply messages. As * above, do this just once to reduce palloc overhead. */ initStringInfo(&reply_message); /* Initialize the last reply timestamp */ last_reply_timestamp = GetCurrentTimestamp(); /* Loop forever, unless we get an error */ for (;;) { /* Clear any already-pending wakeups */ ResetLatch(&MyWalSnd->latch); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive()) exit(1); /* Process any requests or signals received recently */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); SyncRepInitConfig(); } /* Normal exit from the walsender is here */ if (walsender_shutdown_requested) { /* Inform the standby that XLOG streaming is done */ pq_puttextmessage('C', "COPY 0"); pq_flush(); proc_exit(0); } /* Check for input from the client */ ProcessRepliesIfAny(); /* * If we don't have any pending data in the output buffer, try to send * some more. If there is some, we don't bother to call XLogSend * again until we've flushed it ... but we'd better assume we are not * caught up. */ if (!pq_is_send_pending()) XLogSend(output_message, &caughtup); else caughtup = false; /* Try to flush pending output to the client */ if (pq_flush_if_writable() != 0) break; /* If nothing remains to be sent right now ... */ if (caughtup && !pq_is_send_pending()) { /* * If we're in catchup state, move to streaming. This is an * important state change for users to know about, since before * this point data loss might occur if the primary dies and we * need to failover to the standby. The state change is also * important for synchronous replication, since commits that * started to wait at that point might wait for some time. */ if (MyWalSnd->state == WALSNDSTATE_CATCHUP) { ereport(DEBUG1, (errmsg("standby \"%s\" has now caught up with primary", application_name))); WalSndSetState(WALSNDSTATE_STREAMING); } /* * When SIGUSR2 arrives, we send any outstanding logs up to the * shutdown checkpoint record (i.e., the latest record) and exit. * This may be a normal termination at shutdown, or a promotion, * the walsender is not sure which. */ if (walsender_ready_to_stop) { /* ... let's just be real sure we're caught up ... */ XLogSend(output_message, &caughtup); if (caughtup && !pq_is_send_pending()) { walsender_shutdown_requested = true; continue; /* don't want to wait more */ } } } /* * We don't block if not caught up, unless there is unsent data * pending in which case we'd better block until the socket is * write-ready. This test is only needed for the case where XLogSend * loaded a subset of the available data but then pq_flush_if_writable * flushed it all --- we should immediately try to send more. */ if (caughtup || pq_is_send_pending()) { TimestampTz finish_time = 0; long sleeptime = -1; int wakeEvents; wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE; if (pq_is_send_pending()) wakeEvents |= WL_SOCKET_WRITEABLE; /* Determine time until replication timeout */ if (replication_timeout > 0) { long secs; int usecs; finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp, replication_timeout); TimestampDifference(GetCurrentTimestamp(), finish_time, &secs, &usecs); sleeptime = secs * 1000 + usecs / 1000; /* Avoid Assert in WaitLatchOrSocket if timeout is past */ if (sleeptime < 0) sleeptime = 0; wakeEvents |= WL_TIMEOUT; } /* Sleep until something happens or replication timeout */ WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents, MyProcPort->sock, sleeptime); /* * Check for replication timeout. Note we ignore the corner case * possibility that the client replied just as we reached the * timeout ... he's supposed to reply *before* that. */ if (replication_timeout > 0 && GetCurrentTimestamp() >= finish_time) { /* * Since typically expiration of replication timeout means * communication problem, we don't send the error message to * the standby. */ ereport(COMMERROR, (errmsg("terminating walsender process due to replication timeout"))); break; } } } /* * Get here on send failure. Clean up and exit. * * Reset whereToSendOutput to prevent ereport from attempting to send any * more messages to the standby. */ if (whereToSendOutput == DestRemote) whereToSendOutput = DestNone; proc_exit(0); return 1; /* keep the compiler quiet */ }
/* * Write bytes into a shared message queue. */ static shm_mq_result shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, void *data, bool nowait, Size *bytes_written) { shm_mq *mq = mqh->mqh_queue; Size sent = 0; uint64 used; Size ringsize = mq->mq_ring_size; Size available; while (sent < nbytes) { bool detached; uint64 rb; /* Compute number of ring buffer bytes used and available. */ rb = shm_mq_get_bytes_read(mq, &detached); Assert(mq->mq_bytes_written >= rb); used = mq->mq_bytes_written - rb; Assert(used <= ringsize); available = Min(ringsize - used, nbytes - sent); /* Bail out if the queue has been detached. */ if (detached) return SHM_MQ_DETACHED; if (available == 0) { shm_mq_result res; /* * The queue is full, so if the receiver isn't yet known to be * attached, we must wait for that to happen. */ if (!mqh->mqh_counterparty_attached) { if (nowait) { if (shm_mq_get_receiver(mq) == NULL) return SHM_MQ_WOULD_BLOCK; } else if (!shm_mq_wait_internal(mq, &mq->mq_receiver, mqh->mqh_handle)) { mq->mq_detached = true; return SHM_MQ_DETACHED; } mqh->mqh_counterparty_attached = true; } /* Let the receiver know that we need them to read some data. */ res = shm_mq_notify_receiver(mq); if (res != SHM_MQ_SUCCESS) { *bytes_written = sent; return res; } /* Skip manipulation of our latch if nowait = true. */ if (nowait) { *bytes_written = sent; return SHM_MQ_WOULD_BLOCK; } /* * Wait for our latch to be set. It might already be set for * some unrelated reason, but that'll just result in one extra * trip through the loop. It's worth it to avoid resetting the * latch at top of loop, because setting an already-set latch is * much cheaper than setting one that has been reset. */ WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0); /* An interrupt may have occurred while we were waiting. */ CHECK_FOR_INTERRUPTS(); /* Reset the latch so we don't spin. */ ResetLatch(&MyProc->procLatch); } else { Size offset = mq->mq_bytes_written % (uint64) ringsize; Size sendnow = Min(available, ringsize - offset); /* Write as much data as we can via a single memcpy(). */ memcpy(&mq->mq_ring[mq->mq_ring_offset + offset], (char *) data + sent, sendnow); sent += sendnow; /* * Update count of bytes written, with alignment padding. Note * that this will never actually insert any padding except at the * end of a run of bytes, because the buffer size is a multiple of * MAXIMUM_ALIGNOF, and each read is as well. */ Assert(sent == nbytes || sendnow == MAXALIGN(sendnow)); shm_mq_inc_bytes_written(mq, MAXALIGN(sendnow)); /* * For efficiency, we don't set the reader's latch here. We'll * do that only when the buffer fills up or after writing an * entire message. */ } } *bytes_written = sent; return SHM_MQ_SUCCESS; }