/* Main entry point for walsender process */ int WalSenderMain(void) { MemoryContext walsnd_context; am_cascading_walsender = RecoveryInProgress(); /* Create a per-walsender data structure in shared memory */ InitWalSnd(); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. * * XXX: we don't actually attempt error recovery in walsender, we just * close the connection and exit. */ walsnd_context = AllocSetContextCreate(TopMemoryContext, "Wal Sender", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(walsnd_context); /* Set up resource owner */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "walsender top-level resource owner"); /* Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Use the recovery target timeline ID during recovery */ if (am_cascading_walsender) ThisTimeLineID = GetRecoveryTargetTLI(); /* Tell the standby that walsender is ready for receiving commands */ ReadyForQuery(DestRemote); /* Handle handshake messages before streaming */ WalSndHandshake(); /* Initialize shared memory status */ { /* use volatile pointer to prevent code rearrangement */ volatile WalSnd *walsnd = MyWalSnd; SpinLockAcquire(&walsnd->mutex); walsnd->sentPtr = sentPtr; SpinLockRelease(&walsnd->mutex); } SyncRepInitConfig(); /* Main loop of walsender */ return WalSndLoop(); }
/* * Main entry point for checkpointer process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void CheckpointerMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext checkpointer_context; CheckpointerShmem->checkpointer_pid = MyProcPid; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (checkpointer probably never has * any child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * Note: we deliberately ignore SIGTERM, because during a standard Unix * system shutdown cycle, init will SIGTERM all processes at once. We * want to wait for the backends to exit, whereupon the postmaster will * tell us it's okay to shut down (via SIGUSR2). */ pqsignal(SIGHUP, ChkptSigHupHandler); /* set flag to read config * file */ pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */ pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */ pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, chkpt_sigusr1_handler); pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Initialize so that first time-driven event happens at the correct time. */ last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ checkpointer_context = AllocSetContextCreate(TopMemoryContext, "Checkpointer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(checkpointer_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in checkpointer, but we do have LWLocks, buffers, and temp * files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* Warn any waiting backends that the checkpoint failed. */ if (ckpt_active) { /* use volatile pointer to prevent code rearrangement */ volatile CheckpointerShmemStruct *cps = CheckpointerShmem; SpinLockAcquire(&cps->ckpt_lck); cps->ckpt_failed++; cps->ckpt_done = cps->ckpt_started; SpinLockRelease(&cps->ckpt_lck); ckpt_active = false; } /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(checkpointer_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(checkpointer_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Use the recovery target timeline ID during recovery */ if (RecoveryInProgress()) ThisTimeLineID = GetRecoveryTargetTLI(); /* * Ensure all shared memory values are set correctly for the config. Doing * this here ensures no race conditions from other concurrent updaters. */ UpdateSharedMemoryConfig(); /* * Advertise our latch that backends can use to wake us up while we're * sleeping. */ ProcGlobal->checkpointerLatch = &MyProc->procLatch; /* * Loop forever */ for (;;) { bool do_checkpoint = false; int flags = 0; pg_time_t now; int elapsed_secs; int cur_timeout; int rc; /* Clear any already-pending wakeups */ ResetLatch(&MyProc->procLatch); /* * Process any requests or signals received recently. */ AbsorbFsyncRequests(); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* * Checkpointer is the last process to shut down, so we ask it to * hold the keys for a range of other tasks required most of which * have nothing to do with checkpointing at all. * * For various reasons, some config values can change dynamically * so the primary copy of them is held in shared memory to make * sure all backends see the same value. We make Checkpointer * responsible for updating the shared memory copy if the * parameter setting changes because of SIGHUP. */ UpdateSharedMemoryConfig(); } if (checkpoint_requested) { checkpoint_requested = false; do_checkpoint = true; BgWriterStats.m_requested_checkpoints++; } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Close down the database */ ShutdownXLOG(0, 0); /* Normal exit from the checkpointer is here */ proc_exit(0); /* done */ } /* * Force a checkpoint if too much time has elapsed since the last one. * Note that we count a timed checkpoint in stats only when this * occurs without an external request, but we set the CAUSE_TIME flag * bit even if there is also an external request. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) BgWriterStats.m_timed_checkpoints++; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } /* * Do a checkpoint if requested. */ if (do_checkpoint) { bool ckpt_performed = false; bool do_restartpoint; /* use volatile pointer to prevent code rearrangement */ volatile CheckpointerShmemStruct *cps = CheckpointerShmem; /* * Check if we should perform a checkpoint or a restartpoint. As a * side-effect, RecoveryInProgress() initializes TimeLineID if * it's not set yet. */ do_restartpoint = RecoveryInProgress(); /* * Atomically fetch the request flags to figure out what kind of a * checkpoint we should perform, and increase the started-counter * to acknowledge that we've started a new checkpoint. */ SpinLockAcquire(&cps->ckpt_lck); flags |= cps->ckpt_flags; cps->ckpt_flags = 0; cps->ckpt_started++; SpinLockRelease(&cps->ckpt_lck); /* * The end-of-recovery checkpoint is a real checkpoint that's * performed while we're still in recovery. */ if (flags & CHECKPOINT_END_OF_RECOVERY) do_restartpoint = false; /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag * since the last checkpoint start. Note in particular that this * implementation will not generate warnings caused by * CheckPointTimeout < CheckPointWarning. */ if (!do_restartpoint && (flags & CHECKPOINT_CAUSE_XLOG) && elapsed_secs < CheckPointWarning) ereport(LOG, (errmsg_plural("checkpoints are occurring too frequently (%d second apart)", "checkpoints are occurring too frequently (%d seconds apart)", elapsed_secs, elapsed_secs), errhint("Consider increasing the configuration parameter \"checkpoint_segments\"."))); /* * Initialize checkpointer-private variables used during * checkpoint */ ckpt_active = true; if (!do_restartpoint) ckpt_start_recptr = GetInsertRecPtr(); ckpt_start_time = now; ckpt_cached_elapsed = 0; /* * Do the checkpoint. */ if (!do_restartpoint) { CreateCheckPoint(flags); ckpt_performed = true; } else ckpt_performed = CreateRestartPoint(flags); /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); /* * Indicate checkpoint completion to any waiting backends. */ SpinLockAcquire(&cps->ckpt_lck); cps->ckpt_done = cps->ckpt_started; SpinLockRelease(&cps->ckpt_lck); if (ckpt_performed) { /* * Note we record the checkpoint start time not end time as * last_checkpoint_time. This is so that time-driven * checkpoints happen at a predictable spacing. */ last_checkpoint_time = now; } else { /* * We were not able to perform the restartpoint (checkpoints * throw an ERROR in case of error). Most likely because we * have not received any new checkpoint WAL records since the * last restartpoint. Try again in 15 s. */ last_checkpoint_time = now - CheckPointTimeout + 15; } ckpt_active = false; } /* Check for archive_timeout and switch xlog files if necessary. */ CheckArchiveTimeout(); /* * Send off activity statistics to the stats collector. (The reason * why we re-use bgwriter-related code for this is that the bgwriter * and checkpointer used to be just one process. It's probably not * worth the trouble to split the stats support into two independent * stats message types.) */ pgstat_send_bgwriter(); /* * Sleep until we are signaled or it's time for another checkpoint or * xlog file switch. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) continue; /* no sleep for us ... */ cur_timeout = CheckPointTimeout - elapsed_secs; if (XLogArchiveTimeout > 0 && !RecoveryInProgress()) { elapsed_secs = now - last_xlog_switch_time; if (elapsed_secs >= XLogArchiveTimeout) continue; /* no sleep for us ... */ cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs); } rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, cur_timeout * 1000L /* convert to ms */ ); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); } }
/* * Establish the connection to the primary server for XLOG streaming */ static bool libpqrcv_connect(char *conninfo, XLogRecPtr startpoint) { char conninfo_repl[MAXCONNINFO + 37]; char *primary_sysid; char standby_sysid[32]; TimeLineID primary_tli; TimeLineID standby_tli; PGresult *res; char cmd[64]; /* * Connect using deliberately undocumented parameter: replication. The * database name is ignored by the server in replication mode, but specify * "replication" for .pgpass lookup. */ snprintf(conninfo_repl, sizeof(conninfo_repl), "%s dbname=replication replication=true", conninfo); streamConn = PQconnectdb(conninfo_repl); if (PQstatus(streamConn) != CONNECTION_OK) ereport(ERROR, (errmsg("could not connect to the primary server: %s", PQerrorMessage(streamConn)))); /* * Get the system identifier and timeline ID as a DataRow message from the * primary server. */ res = libpqrcv_PQexec("IDENTIFY_SYSTEM"); if (PQresultStatus(res) != PGRES_TUPLES_OK) { PQclear(res); ereport(ERROR, (errmsg("could not receive database system identifier and timeline ID from " "the primary server: %s", PQerrorMessage(streamConn)))); } if (PQnfields(res) != 2 || PQntuples(res) != 1) { int ntuples = PQntuples(res); int nfields = PQnfields(res); PQclear(res); ereport(ERROR, (errmsg("invalid response from primary server"), errdetail("Expected 1 tuple with 2 fields, got %d tuples with %d fields.", ntuples, nfields))); } primary_sysid = PQgetvalue(res, 0, 0); primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0); /* * Confirm that the system identifier of the primary is the same as ours. */ snprintf(standby_sysid, sizeof(standby_sysid), UINT64_FORMAT, GetSystemIdentifier()); if (strcmp(primary_sysid, standby_sysid) != 0) { PQclear(res); ereport(ERROR, (errmsg("database system identifier differs between the primary and standby"), errdetail("The primary's identifier is %s, the standby's identifier is %s.", primary_sysid, standby_sysid))); } /* * Confirm that the current timeline of the primary is the same as the * recovery target timeline. */ standby_tli = GetRecoveryTargetTLI(); PQclear(res); if (primary_tli != standby_tli) ereport(ERROR, (errmsg("timeline %u of the primary does not match recovery target timeline %u", primary_tli, standby_tli))); ThisTimeLineID = primary_tli; /* Start streaming from the point requested by startup process */ snprintf(cmd, sizeof(cmd), "START_REPLICATION %X/%X", startpoint.xlogid, startpoint.xrecoff); res = libpqrcv_PQexec(cmd); if (PQresultStatus(res) != PGRES_COPY_BOTH) { PQclear(res); ereport(ERROR, (errmsg("could not start WAL streaming: %s", PQerrorMessage(streamConn)))); } PQclear(res); justconnected = true; ereport(LOG, (errmsg("streaming replication successfully connected to primary"))); return true; }
/* * Main entry point for bgwriter process * * This is invoked from BootstrapMain, which has already created the basic * execution environment, but not enabled signals yet. */ void BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; am_bg_writer = true; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (bgwriter probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * SIGUSR1 is presently unused; keep it spare in case someday we want this * process to participate in ProcSignal signalling. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); /* as of 9.2 no longer requests checkpoint */ pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ pqsignal(SIGUSR2, SIG_IGN); /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(bgwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(bgwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Use the recovery target timeline ID during recovery */ if (RecoveryInProgress()) ThisTimeLineID = GetRecoveryTargetTLI(); /* * Loop forever */ for (;;) { /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive()) exit(1); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* update global shmem state for sync rep */ } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } /* * Do one cycle of dirty-buffer writing. */ BgBufferSync(); /* Nap for the configured time. */ BgWriterNap(); } }