int main(int argc, char* argv[]) { cmockery_parse_arguments(argc, argv); const UnitTest tests[] = { unit_test(test__resetSessionForPrimaryGangLoss), unit_test(test__createWriterGang), unit_test(test__createReaderGang), }; MemoryContextInit(); CurrentResourceOwner = ResourceOwnerCreate(NULL, "gang test"); Gp_role = GP_ROLE_DISPATCH; GpIdentity.numsegments = TOTOAL_SEGMENTS; GpIdentity.dbid = 1; GpIdentity.segindex = -1; gp_connections_per_thread = 64; Port procport; MyProcPort = &procport; MyProcPort->database_name = "test"; MyProcPort->user_name = "gpadmin"; s_cdb = makeTestCdb(1, TOTOAL_SEGMENTS); return run_tests(tests); }
/* * Main worker routine. Accepts dsm_handle as an argument */ static void bg_worker_main(Datum main_arg) { PartitionArgs *args; dsm_handle handle = DatumGetInt32(main_arg); /* Create resource owner */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "CreatePartitionsWorker"); /* Attach to dynamic shared memory */ if (!handle) { ereport(WARNING, (errmsg("pg_pathman worker: invalid dsm_handle"))); } segment = dsm_attach(handle); args = dsm_segment_address(segment); /* Establish connection and start transaction */ BackgroundWorkerInitializeConnectionByOid(args->dbid, InvalidOid); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); /* Create partitions */ args->result = create_partitions(args->relid, PATHMAN_GET_DATUM(args->value, args->by_val), args->value_type, &args->crashed); /* Cleanup */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); dsm_detach(segment); }
/* Main entry point for walsender process */ int WalSenderMain(void) { MemoryContext walsnd_context; am_cascading_walsender = RecoveryInProgress(); /* Create a per-walsender data structure in shared memory */ InitWalSnd(); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. * * XXX: we don't actually attempt error recovery in walsender, we just * close the connection and exit. */ walsnd_context = AllocSetContextCreate(TopMemoryContext, "Wal Sender", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(walsnd_context); /* Set up resource owner */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "walsender top-level resource owner"); /* Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Use the recovery target timeline ID during recovery */ if (am_cascading_walsender) ThisTimeLineID = GetRecoveryTargetTLI(); /* Tell the standby that walsender is ready for receiving commands */ ReadyForQuery(DestRemote); /* Handle handshake messages before streaming */ WalSndHandshake(); /* Initialize shared memory status */ { /* use volatile pointer to prevent code rearrangement */ volatile WalSnd *walsnd = MyWalSnd; SpinLockAcquire(&walsnd->mutex); walsnd->sentPtr = sentPtr; SpinLockRelease(&walsnd->mutex); } SyncRepInitConfig(); /* Main loop of walsender */ return WalSndLoop(); }
/* * CreatePortal * Returns a new portal given a name. * * allowDup: if true, automatically drop any pre-existing portal of the * same name (if false, an error is raised). * * dupSilent: if true, don't even emit a WARNING. */ Portal CreatePortal(const char *name, bool allowDup, bool dupSilent) { Portal portal; AssertArg(PointerIsValid(name)); portal = GetPortalByName(name); if (PortalIsValid(portal)) { if (!allowDup) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_CURSOR), errmsg("cursor \"%s\" already exists", name))); if (!dupSilent) ereport(WARNING, (errcode(ERRCODE_DUPLICATE_CURSOR), errmsg("closing existing cursor \"%s\"", name))); PortalDrop(portal, false); } /* make new portal structure */ portal = (Portal) MemoryContextAllocZero(PortalMemory, sizeof *portal); /* initialize portal heap context; typically it won't store much */ portal->heap = AllocSetContextCreate(PortalMemory, "PortalHeapMemory", ALLOCSET_SMALL_MINSIZE, ALLOCSET_SMALL_INITSIZE, ALLOCSET_SMALL_MAXSIZE); /* create a resource owner for the portal */ portal->resowner = ResourceOwnerCreate(CurTransactionResourceOwner, "Portal"); /* initialize portal fields that don't start off zero */ portal->cleanup = PortalCleanup; portal->createSubid = GetCurrentSubTransactionId(); portal->strategy = PORTAL_MULTI_QUERY; portal->cursorOptions = CURSOR_OPT_NO_SCROLL; portal->atStart = true; portal->atEnd = true; /* disallow fetches until query is set */ /* put portal in table (sets portal->name) */ PortalHashTableInsert(portal, name); return portal; }
/* * Establish an AuxProcessResourceOwner for the current process. */ void CreateAuxProcessResourceOwner(void) { Assert(AuxProcessResourceOwner == NULL); Assert(CurrentResourceOwner == NULL); AuxProcessResourceOwner = ResourceOwnerCreate(NULL, "AuxiliaryProcess"); CurrentResourceOwner = AuxProcessResourceOwner; /* * Register a shmem-exit callback for cleanup of aux-process resource * owner. (This needs to run after, e.g., ShutdownXLOG.) */ on_shmem_exit(ReleaseAuxProcessResourcesCallback, 0); }
static void FileRepSubProcess_InitProcess(void) { SetProcessingMode(InitProcessing); /* * Create a resource owner to keep track of our resources */ CurrentResourceOwner = ResourceOwnerCreate(NULL, FileRepProcessTypeToString[fileRepProcessType]); InitXLOGAccess(); SetProcessingMode(NormalProcessing); InitBufferPoolAccess(); /* * Don't add Filerep backend subprocesses to the proc array. * * This avoids any deadlock situations during Filerep transition. E.g. If * a normal backend has acquired ProcArrayLock and is waiting for Filerep * transition to finish, the Filerep backend subprocesses will deadlock * forever as they can't acquire the ProcArray lock to remove themselves * from the ProcArray. This directly causes the transition to stall and * thus the whole system. */ /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); }
/* * SetResQueueId -- set the cached value for the current resource queue. * * Notes * Needs to be called at session initialization and after (or in) SET ROLE. */ void SetResQueueId(void) { /* to cave the code of cache part, we provide a resource owner here if no * existing */ ResourceOwner owner = NULL; if (CurrentResourceOwner == NULL) { owner = ResourceOwnerCreate(NULL, "SetResQueueId"); CurrentResourceOwner = owner; } MyQueueId = GetResQueueForRole(GetUserId()); if (owner) { CurrentResourceOwner = NULL; ResourceOwnerDelete(owner); } return; }
/* attach worker to the shared memory segment, read the job structure */ static void initialize_worker(uint32 segment) { dsm_segment *seg; ResourceOwner old, tmp; /* Connect to dynamic shared memory segment. * * In order to attach a dynamic shared memory segment, we need a * resource owner. We cannot to StartTransactionCommand here, since * we haven't yet attached to the database: to do this, we need to * fetch information about connection properties from the shared * memory segment. */ old = CurrentResourceOwner; CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Worker"); seg = dsm_attach(segment); if (seg == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("unable to map dynamic shared memory segment"))); dsm_pin_mapping(seg); tmp = CurrentResourceOwner; CurrentResourceOwner = old; ResourceOwnerDelete(tmp); job = palloc(sizeof(JobDesc)); /* copy the arguments from shared memory segment */ memcpy(job, dsm_segment_address(seg), sizeof(JobDesc)); /* and detach it right away */ dsm_detach(seg); Assert(job->magic == JOB_MAGIC); job_run_function.schema = quote_identifier(job->schemaname); job_run_function.name = quote_identifier("run_job"); }
/* * Main entry point for checkpointer process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void CheckpointerMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext checkpointer_context; CheckpointerShmem->checkpointer_pid = MyProcPid; /* * Properly accept or ignore signals the postmaster might send us * * Note: we deliberately ignore SIGTERM, because during a standard Unix * system shutdown cycle, init will SIGTERM all processes at once. We * want to wait for the backends to exit, whereupon the postmaster will * tell us it's okay to shut down (via SIGUSR2). */ pqsignal(SIGHUP, ChkptSigHupHandler); /* set flag to read config * file */ pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */ pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */ pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, chkpt_sigusr1_handler); pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Initialize so that first time-driven event happens at the correct time. */ last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ checkpointer_context = AllocSetContextCreate(TopMemoryContext, "Checkpointer", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(checkpointer_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in checkpointer, but we do have LWLocks, buffers, and temp * files. */ LWLockReleaseAll(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* Warn any waiting backends that the checkpoint failed. */ if (ckpt_active) { SpinLockAcquire(&CheckpointerShmem->ckpt_lck); CheckpointerShmem->ckpt_failed++; CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started; SpinLockRelease(&CheckpointerShmem->ckpt_lck); ckpt_active = false; } /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(checkpointer_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(checkpointer_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Ensure all shared memory values are set correctly for the config. Doing * this here ensures no race conditions from other concurrent updaters. */ UpdateSharedMemoryConfig(); /* * Advertise our latch that backends can use to wake us up while we're * sleeping. */ ProcGlobal->checkpointerLatch = &MyProc->procLatch; /* * Loop forever */ for (;;) { bool do_checkpoint = false; int flags = 0; pg_time_t now; int elapsed_secs; int cur_timeout; int rc; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); /* * Process any requests or signals received recently. */ AbsorbFsyncRequests(); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* * Checkpointer is the last process to shut down, so we ask it to * hold the keys for a range of other tasks required most of which * have nothing to do with checkpointing at all. * * For various reasons, some config values can change dynamically * so the primary copy of them is held in shared memory to make * sure all backends see the same value. We make Checkpointer * responsible for updating the shared memory copy if the * parameter setting changes because of SIGHUP. */ UpdateSharedMemoryConfig(); } if (checkpoint_requested) { checkpoint_requested = false; do_checkpoint = true; BgWriterStats.m_requested_checkpoints++; } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Close down the database */ ShutdownXLOG(0, 0); /* Normal exit from the checkpointer is here */ proc_exit(0); /* done */ } /* * Force a checkpoint if too much time has elapsed since the last one. * Note that we count a timed checkpoint in stats only when this * occurs without an external request, but we set the CAUSE_TIME flag * bit even if there is also an external request. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) BgWriterStats.m_timed_checkpoints++; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } /* * Do a checkpoint if requested. */ if (do_checkpoint) { bool ckpt_performed = false; bool do_restartpoint; /* * Check if we should perform a checkpoint or a restartpoint. As a * side-effect, RecoveryInProgress() initializes TimeLineID if * it's not set yet. */ do_restartpoint = RecoveryInProgress(); /* * Atomically fetch the request flags to figure out what kind of a * checkpoint we should perform, and increase the started-counter * to acknowledge that we've started a new checkpoint. */ SpinLockAcquire(&CheckpointerShmem->ckpt_lck); flags |= CheckpointerShmem->ckpt_flags; CheckpointerShmem->ckpt_flags = 0; CheckpointerShmem->ckpt_started++; SpinLockRelease(&CheckpointerShmem->ckpt_lck); /* * The end-of-recovery checkpoint is a real checkpoint that's * performed while we're still in recovery. */ if (flags & CHECKPOINT_END_OF_RECOVERY) do_restartpoint = false; /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag * since the last checkpoint start. Note in particular that this * implementation will not generate warnings caused by * CheckPointTimeout < CheckPointWarning. */ if (!do_restartpoint && (flags & CHECKPOINT_CAUSE_XLOG) && elapsed_secs < CheckPointWarning) ereport(LOG, (errmsg_plural("checkpoints are occurring too frequently (%d second apart)", "checkpoints are occurring too frequently (%d seconds apart)", elapsed_secs, elapsed_secs), errhint("Consider increasing the configuration parameter \"max_wal_size\"."))); /* * Initialize checkpointer-private variables used during * checkpoint. */ ckpt_active = true; if (do_restartpoint) ckpt_start_recptr = GetXLogReplayRecPtr(NULL); else ckpt_start_recptr = GetInsertRecPtr(); ckpt_start_time = now; ckpt_cached_elapsed = 0; /* * Do the checkpoint. */ if (!do_restartpoint) { CreateCheckPoint(flags); ckpt_performed = true; } else ckpt_performed = CreateRestartPoint(flags); /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); /* * Indicate checkpoint completion to any waiting backends. */ SpinLockAcquire(&CheckpointerShmem->ckpt_lck); CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started; SpinLockRelease(&CheckpointerShmem->ckpt_lck); if (ckpt_performed) { /* * Note we record the checkpoint start time not end time as * last_checkpoint_time. This is so that time-driven * checkpoints happen at a predictable spacing. */ last_checkpoint_time = now; } else { /* * We were not able to perform the restartpoint (checkpoints * throw an ERROR in case of error). Most likely because we * have not received any new checkpoint WAL records since the * last restartpoint. Try again in 15 s. */ last_checkpoint_time = now - CheckPointTimeout + 15; } ckpt_active = false; } /* Check for archive_timeout and switch xlog files if necessary. */ CheckArchiveTimeout(); /* * Send off activity statistics to the stats collector. (The reason * why we re-use bgwriter-related code for this is that the bgwriter * and checkpointer used to be just one process. It's probably not * worth the trouble to split the stats support into two independent * stats message types.) */ pgstat_send_bgwriter(); /* * Sleep until we are signaled or it's time for another checkpoint or * xlog file switch. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) continue; /* no sleep for us ... */ cur_timeout = CheckPointTimeout - elapsed_secs; if (XLogArchiveTimeout > 0 && !RecoveryInProgress()) { elapsed_secs = now - last_xlog_switch_time; if (elapsed_secs >= XLogArchiveTimeout) continue; /* no sleep for us ... */ cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs); } rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, cur_timeout * 1000L /* convert to ms */, WAIT_EVENT_CHECKPOINTER_MAIN); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); } }
/* * Main entry point for walwriter process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void WalWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext walwriter_context; int left_till_hibernate; bool hibernating; /* * Properly accept or ignore signals the postmaster might send us * * We have no particular use for SIGINT at the moment, but seems * reasonable to treat like SIGTERM. */ pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, WalShutdownHandler); /* request shutdown */ pqsignal(SIGTERM, WalShutdownHandler); /* request shutdown */ pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, walwriter_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* not used */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (not clear that * we need this, but may as well have one). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ walwriter_context = AllocSetContextCreate(TopMemoryContext, "Wal Writer", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(walwriter_context); /* * If an exception is encountered, processing resumes here. * * This code is heavily based on bgwriter.c, q.v. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in walwriter, but we do have LWLocks, and perhaps buffers? */ LWLockReleaseAll(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(walwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(walwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Reset hibernation state after any error. */ left_till_hibernate = LOOPS_UNTIL_HIBERNATE; hibernating = false; SetWalWriterSleeping(false); /* * Advertise our latch that backends can use to wake us up while we're * sleeping. */ ProcGlobal->walwriterLatch = &MyProc->procLatch; /* * Loop forever */ for (;;) { long cur_timeout; int rc; /* * Advertise whether we might hibernate in this cycle. We do this * before resetting the latch to ensure that any async commits will * see the flag set if they might possibly need to wake us up, and * that we won't miss any signal they send us. (If we discover work * to do in the last cycle before we would hibernate, the global flag * will be set unnecessarily, but little harm is done.) But avoid * touching the global flag if it doesn't need to change. */ if (hibernating != (left_till_hibernate <= 1)) { hibernating = (left_till_hibernate <= 1); SetWalWriterSleeping(hibernating); } /* Clear any already-pending wakeups */ ResetLatch(MyLatch); /* * Process any requests or signals received recently. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (shutdown_requested) { /* Normal exit from the walwriter is here */ proc_exit(0); /* done */ } /* * Do what we're here for; then, if XLogBackgroundFlush() found useful * work to do, reset hibernation counter. */ if (XLogBackgroundFlush()) left_till_hibernate = LOOPS_UNTIL_HIBERNATE; else if (left_till_hibernate > 0) left_till_hibernate--; /* * Sleep until we are signaled or WalWriterDelay has elapsed. If we * haven't done anything useful for quite some time, lengthen the * sleep time so as to reduce the server's idle power consumption. */ if (left_till_hibernate > 0) cur_timeout = WalWriterDelay; /* in ms */ else cur_timeout = WalWriterDelay * HIBERNATE_FACTOR; rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, cur_timeout, WAIT_EVENT_WAL_WRITER_MAIN); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); } }
/* * Main entry point for bgwriter process * * This is invoked from BootstrapMain, which has already created the basic * execution environment, but not enabled signals yet. */ void BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; am_bg_writer = true; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (bgwriter probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * SIGUSR1 is presently unused; keep it spare in case someday we want this * process to participate in ProcSignal signalling. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); /* as of 9.2 no longer requests checkpoint */ pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ pqsignal(SIGUSR2, SIG_IGN); /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(bgwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(bgwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Use the recovery target timeline ID during recovery */ if (RecoveryInProgress()) ThisTimeLineID = GetRecoveryTargetTLI(); /* * Loop forever */ for (;;) { /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive()) exit(1); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* update global shmem state for sync rep */ } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } /* * Do one cycle of dirty-buffer writing. */ BgBufferSync(); /* Nap for the configured time. */ BgWriterNap(); } }
/* * Helper function for the various SQL callable logical decoding functions. */ static Datum pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary) { Name name = PG_GETARG_NAME(0); XLogRecPtr upto_lsn; int32 upto_nchanges; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; MemoryContext per_query_ctx; MemoryContext oldcontext; XLogRecPtr end_of_wal; XLogRecPtr startptr; LogicalDecodingContext *ctx; ResourceOwner old_resowner = CurrentResourceOwner; ArrayType *arr; Size ndim; List *options = NIL; DecodingOutputState *p; if (PG_ARGISNULL(1)) upto_lsn = InvalidXLogRecPtr; else upto_lsn = PG_GETARG_LSN(1); if (PG_ARGISNULL(2)) upto_nchanges = InvalidXLogRecPtr; else upto_nchanges = PG_GETARG_INT32(2); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not allowed in this context"))); /* state to write output to */ p = palloc0(sizeof(DecodingOutputState)); p->binary_output = binary; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); check_permissions(); CheckLogicalDecodingRequirements(); arr = PG_GETARG_ARRAYTYPE_P(3); ndim = ARR_NDIM(arr); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); if (ndim > 1) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must be one-dimensional"))); } else if (array_contains_nulls(arr)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must not contain nulls"))); } else if (ndim == 1) { int nelems; Datum *datum_opts; int i; Assert(ARR_ELEMTYPE(arr) == TEXTOID); deconstruct_array(arr, TEXTOID, -1, false, 'i', &datum_opts, NULL, &nelems); if (nelems % 2 != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must have even number of elements"))); for (i = 0; i < nelems; i += 2) { char *name = TextDatumGetCString(datum_opts[i]); char *opt = TextDatumGetCString(datum_opts[i + 1]); options = lappend(options, makeDefElem(name, (Node *) makeString(opt))); } } p->tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = p->tupstore; rsinfo->setDesc = p->tupdesc; /* compute the current end-of-wal */ if (!RecoveryInProgress()) end_of_wal = GetFlushRecPtr(); else end_of_wal = GetXLogReplayRecPtr(NULL); CheckLogicalDecodingRequirements(); ReplicationSlotAcquire(NameStr(*name)); PG_TRY(); { ctx = CreateDecodingContext(InvalidXLogRecPtr, options, logical_read_local_xlog_page, LogicalOutputPrepareWrite, LogicalOutputWrite); MemoryContextSwitchTo(oldcontext); /* * Check whether the output pluggin writes textual output if that's * what we need. */ if (!binary && ctx->options.output_type != OUTPUT_PLUGIN_TEXTUAL_OUTPUT) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("output plugin cannot produce binary output"))); ctx->output_writer_private = p; startptr = MyReplicationSlot->data.restart_lsn; CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding"); /* invalidate non-timetravel entries */ InvalidateSystemCaches(); while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) || (ctx->reader->EndRecPtr && ctx->reader->EndRecPtr < end_of_wal)) { XLogRecord *record; char *errm = NULL; record = XLogReadRecord(ctx->reader, startptr, &errm); if (errm) elog(ERROR, "%s", errm); startptr = InvalidXLogRecPtr; /* * The {begin_txn,change,commit_txn}_wrapper callbacks above will * store the description into our tuplestore. */ if (record != NULL) LogicalDecodingProcessRecord(ctx, record); /* check limits */ if (upto_lsn != InvalidXLogRecPtr && upto_lsn <= ctx->reader->EndRecPtr) break; if (upto_nchanges != 0 && upto_nchanges <= p->returned_rows) break; } } PG_CATCH(); { /* clear all timetravel entries */ InvalidateSystemCaches(); PG_RE_THROW(); } PG_END_TRY(); tuplestore_donestoring(tupstore); CurrentResourceOwner = old_resowner; /* * Next time, start where we left off. (Hunting things, the family * business..) */ if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm) LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr); /* free context, call shutdown callback */ FreeDecodingContext(ctx); ReplicationSlotRelease(); InvalidateSystemCaches(); return (Datum) 0; }
/* * Main entry point for bgwriter process * * This is invoked from BootstrapMain, which has already created the basic * execution environment, but not enabled signals yet. */ void BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; BgWriterShmem->bgwriter_pid = MyProcPid; am_bg_writer = true; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (bgwriter probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * Note: we deliberately ignore SIGTERM, because during a standard Unix * system shutdown cycle, init will SIGTERM all processes at once. We * want to wait for the backends to exit, whereupon the postmaster will * tell us it's okay to shut down (via SIGUSR2). * * SIGUSR1 is presently unused; keep it spare in case someday we want this * process to participate in sinval messaging. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */ pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */ pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ #ifdef HAVE_SIGPROCMASK sigdelset(&BlockSig, SIGQUIT); #else BlockSig &= ~(sigmask(SIGQUIT)); #endif /* * Initialize so that first time-driven event happens at the correct time. */ last_checkpoint_time = last_xlog_switch_time = time(NULL); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_Files(); AtEOXact_HashTables(false); /* Warn any waiting backends that the checkpoint failed. */ if (ckpt_active) { /* use volatile pointer to prevent code rearrangement */ volatile BgWriterShmemStruct *bgs = BgWriterShmem; SpinLockAcquire(&bgs->ckpt_lck); bgs->ckpt_failed++; bgs->ckpt_done = bgs->ckpt_started; SpinLockRelease(&bgs->ckpt_lck); ckpt_active = false; } /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(bgwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(bgwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Loop forever */ for (;;) { bool do_checkpoint = false; int flags = 0; time_t now; int elapsed_secs; /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive(true)) exit(1); /* * Process any requests or signals received recently. */ AbsorbFsyncRequests(); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (checkpoint_requested) { checkpoint_requested = false; do_checkpoint = true; BgWriterStats.m_requested_checkpoints++; } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Close down the database */ ShutdownXLOG(0, 0); DumpFreeSpaceMap(0, 0); /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } /* * Force a checkpoint if too much time has elapsed since the last one. * Note that we count a timed checkpoint in stats only when this * occurs without an external request, but we set the CAUSE_TIME flag * bit even if there is also an external request. */ now = time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) BgWriterStats.m_timed_checkpoints++; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } /* * Do a checkpoint if requested, otherwise do one cycle of * dirty-buffer writing. */ if (do_checkpoint) { /* use volatile pointer to prevent code rearrangement */ volatile BgWriterShmemStruct *bgs = BgWriterShmem; /* * Atomically fetch the request flags to figure out what kind of a * checkpoint we should perform, and increase the started-counter * to acknowledge that we've started a new checkpoint. */ SpinLockAcquire(&bgs->ckpt_lck); flags |= bgs->ckpt_flags; bgs->ckpt_flags = 0; bgs->ckpt_started++; SpinLockRelease(&bgs->ckpt_lck); /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag * since the last checkpoint start. Note in particular that this * implementation will not generate warnings caused by * CheckPointTimeout < CheckPointWarning. */ if ((flags & CHECKPOINT_CAUSE_XLOG) && elapsed_secs < CheckPointWarning) ereport(LOG, (errmsg("checkpoints are occurring too frequently (%d seconds apart)", elapsed_secs), errhint("Consider increasing the configuration parameter \"checkpoint_segments\"."))); /* * Initialize bgwriter-private variables used during checkpoint. */ ckpt_active = true; ckpt_start_recptr = GetInsertRecPtr(); ckpt_start_time = now; ckpt_cached_elapsed = 0; /* * Do the checkpoint. */ CreateCheckPoint(flags); /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); /* * Indicate checkpoint completion to any waiting backends. */ SpinLockAcquire(&bgs->ckpt_lck); bgs->ckpt_done = bgs->ckpt_started; SpinLockRelease(&bgs->ckpt_lck); ckpt_active = false; /* * Note we record the checkpoint start time not end time as * last_checkpoint_time. This is so that time-driven checkpoints * happen at a predictable spacing. */ last_checkpoint_time = now; } else BgBufferSync(); /* Check for archive_timeout and switch xlog files if necessary. */ CheckArchiveTimeout(); /* Nap for the configured time. */ BgWriterNap(); } }
/** * This method is called after fork of the sweeper process. It sets up signal * handlers and does initialization that is required by a postgres backend. */ NON_EXEC_STATIC void BackoffSweeperMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; IsUnderPostmaster = true; isSweeperProcess = true; /* Stay away from PMChildSlot */ MyPMChildSlot = -1; /* reset MyProcPid */ MyProcPid = getpid(); /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* Identify myself via ps */ init_ps_display("sweeper process", "", "", ""); SetProcessingMode(InitProcessing); /* * Set up signal handlers. We operate on databases much like a regular * backend, so we use the same signal handling. See equivalent code in * tcop/postgres.c. */ pqsignal(SIGHUP, SIG_IGN); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); pqsignal(SIGTERM, die); pqsignal(SIGQUIT, quickdie); pqsignal(SIGUSR2, BackoffRequestShutdown); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); /* * Copied from bgwriter */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Sweeper process"); /* Early initialization */ BaseInit(); /* See InitPostgres()... */ InitProcess(); SetProcessingMode(NormalProcessing); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * We can now go away. Note that because we'll call InitProcess, a * callback will be registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); MyBackendId = InvalidBackendId; /* main loop */ BackoffSweeperLoop(); /* One iteration done, go away */ proc_exit(0); }
/* * Main entry point for walwriter process * * This is invoked from BootstrapMain, which has already created the basic * execution environment, but not enabled signals yet. */ void WalWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext walwriter_context; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (walwriter probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * We have no particular use for SIGINT at the moment, but seems * reasonable to treat like SIGTERM. */ pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, WalShutdownHandler); /* request shutdown */ pqsignal(SIGTERM, WalShutdownHandler); /* request shutdown */ pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ pqsignal(SIGUSR2, SIG_IGN); /* not used */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (not clear that * we need this, but may as well have one). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ walwriter_context = AllocSetContextCreate(TopMemoryContext, "Wal Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(walwriter_context); /* * If an exception is encountered, processing resumes here. * * This code is heavily based on bgwriter.c, q.v. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in walwriter, but we do have LWLocks, and perhaps buffers? */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(walwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(walwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Loop forever */ for (;;) { long udelay; /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive(true)) exit(1); /* * Process any requests or signals received recently. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (shutdown_requested) { /* Normal exit from the walwriter is here */ proc_exit(0); /* done */ } /* * Do what we're here for... */ XLogBackgroundFlush(); /* * Delay until time to do something more, but fall out of delay * reasonably quickly if signaled. */ udelay = WalWriterDelay * 1000L; while (udelay > 999999L) { if (got_SIGHUP || shutdown_requested) break; pg_usleep(1000000L); udelay -= 1000000L; } if (!(got_SIGHUP || shutdown_requested)) pg_usleep(udelay); } }
/* * Background worker entrypoint. * * This is intended to demonstrate how a background worker can be used to * facilitate a parallel computation. Most of the logic here is fairly * boilerplate stuff, designed to attach to the shared memory segment, * notify the user backend that we're alive, and so on. The * application-specific bits of logic that you'd replace for your own worker * are attach_to_queues() and copy_messages(). */ void test_shm_mq_main(Datum main_arg) { dsm_segment *seg; shm_toc *toc; shm_mq_handle *inqh; shm_mq_handle *outqh; volatile test_shm_mq_header *hdr; int myworkernumber; PGPROC *registrant; /* * Establish signal handlers. * * We want CHECK_FOR_INTERRUPTS() to kill off this worker process just as * it would a normal user backend. To make that happen, we establish a * signal handler that is a stripped-down version of die(). We don't have * any equivalent of the backend's command-read loop, where interrupts can * be processed immediately, so make sure ImmediateInterruptOK is turned * off. */ pqsignal(SIGTERM, handle_sigterm); ImmediateInterruptOK = false; BackgroundWorkerUnblockSignals(); /* * Connect to the dynamic shared memory segment. * * The backend that registered this worker passed us the ID of a shared * memory segment to which we must attach for further instructions. In * order to attach to dynamic shared memory, we need a resource owner. * Once we've mapped the segment in our address space, attach to the table * of contents so we can locate the various data structures we'll need to * find within the segment. */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "test_shm_mq worker"); seg = dsm_attach(DatumGetInt32(main_arg)); if (seg == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("unable to map dynamic shared memory segment"))); toc = shm_toc_attach(PG_TEST_SHM_MQ_MAGIC, dsm_segment_address(seg)); if (toc == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("bad magic number in dynamic shared memory segment"))); /* * Acquire a worker number. * * By convention, the process registering this background worker should * have stored the control structure at key 0. We look up that key to * find it. Our worker number gives our identity: there may be just one * worker involved in this parallel operation, or there may be many. */ hdr = shm_toc_lookup(toc, 0); SpinLockAcquire(&hdr->mutex); myworkernumber = ++hdr->workers_attached; SpinLockRelease(&hdr->mutex); if (myworkernumber > hdr->workers_total) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("too many message queue testing workers already"))); /* * Attach to the appropriate message queues. */ attach_to_queues(seg, toc, myworkernumber, &inqh, &outqh); /* * Indicate that we're fully initialized and ready to begin the main part * of the parallel operation. * * Once we signal that we're ready, the user backend is entitled to assume * that our on_dsm_detach callbacks will fire before we disconnect from * the shared memory segment and exit. Generally, that means we must have * attached to all relevant dynamic shared memory data structures by now. */ SpinLockAcquire(&hdr->mutex); ++hdr->workers_ready; SpinLockRelease(&hdr->mutex); registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid); if (registrant == NULL) { elog(DEBUG1, "registrant backend has exited prematurely"); proc_exit(1); } SetLatch(®istrant->procLatch); /* Do the work. */ copy_messages(inqh, outqh); /* * We're done. Explicitly detach the shared memory segment so that we * don't get a resource leak warning at commit time. This will fire any * on_dsm_detach callbacks we've registered, as well. Once that's done, * we can go ahead and exit. */ dsm_detach(seg); proc_exit(1); }
void worker_test_main(Datum main_arg) { dsm_segment *seg; volatile test_shm_mq_header *hdr; PGPROC *registrant; pqsignal(SIGHUP, handle_sighup); pqsignal(SIGTERM, handle_sigterm); BackgroundWorkerUnblockSignals(); printf("worker_test_main: %d\n", DatumGetInt32(main_arg)); CurrentResourceOwner = ResourceOwnerCreate(NULL, "worker test"); seg = dsm_attach(DatumGetInt32(main_arg)); if (seg == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("unable to map dynamic shared memory segment"))); hdr = dsm_segment_address(seg); /* 開始 */ SpinLockAcquire(&hdr->mutex); hdr->workers_ready++; hdr->workers_attached++; SpinLockRelease(&hdr->mutex); registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid); if (registrant == NULL) { elog(DEBUG1, "registrant backend has exited prematurely"); proc_exit(1); } SetLatch(®istrant->procLatch); /* Do the work */ BackgroundWorkerInitializeConnection(hdr->dbname, NULL); printf("DSM: %p\n", dsm_segment_address); #if 0 SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "initializing spi_worker schema"); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); #endif dsm_detach(seg); proc_exit(0); }
/* * Helper function for the various SQL callable logical decoding functions. */ static Datum pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary) { Name name; XLogRecPtr upto_lsn; int32 upto_nchanges; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; MemoryContext per_query_ctx; MemoryContext oldcontext; XLogRecPtr end_of_wal; XLogRecPtr startptr; LogicalDecodingContext *ctx; ResourceOwner old_resowner = CurrentResourceOwner; ArrayType *arr; Size ndim; List *options = NIL; DecodingOutputState *p; check_permissions(); CheckLogicalDecodingRequirements(); if (PG_ARGISNULL(0)) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("slot name must not be null"))); name = PG_GETARG_NAME(0); if (PG_ARGISNULL(1)) upto_lsn = InvalidXLogRecPtr; else upto_lsn = PG_GETARG_LSN(1); if (PG_ARGISNULL(2)) upto_nchanges = InvalidXLogRecPtr; else upto_nchanges = PG_GETARG_INT32(2); if (PG_ARGISNULL(3)) ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("options array must not be null"))); arr = PG_GETARG_ARRAYTYPE_P(3); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not allowed in this context"))); /* state to write output to */ p = palloc0(sizeof(DecodingOutputState)); p->binary_output = binary; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); /* Deconstruct options array */ ndim = ARR_NDIM(arr); if (ndim > 1) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must be one-dimensional"))); } else if (array_contains_nulls(arr)) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must not contain nulls"))); } else if (ndim == 1) { int nelems; Datum *datum_opts; int i; Assert(ARR_ELEMTYPE(arr) == TEXTOID); deconstruct_array(arr, TEXTOID, -1, false, 'i', &datum_opts, NULL, &nelems); if (nelems % 2 != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("array must have even number of elements"))); for (i = 0; i < nelems; i += 2) { char *name = TextDatumGetCString(datum_opts[i]); char *opt = TextDatumGetCString(datum_opts[i + 1]); options = lappend(options, makeDefElem(name, (Node *) makeString(opt), -1)); } } p->tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = p->tupstore; rsinfo->setDesc = p->tupdesc; /* * Compute the current end-of-wal and maintain ThisTimeLineID. * RecoveryInProgress() will update ThisTimeLineID on promotion. */ if (!RecoveryInProgress()) end_of_wal = GetFlushRecPtr(); else end_of_wal = GetXLogReplayRecPtr(&ThisTimeLineID); ReplicationSlotAcquire(NameStr(*name), true); PG_TRY(); { /* restart at slot's confirmed_flush */ ctx = CreateDecodingContext(InvalidXLogRecPtr, options, false, logical_read_local_xlog_page, LogicalOutputPrepareWrite, LogicalOutputWrite, NULL); MemoryContextSwitchTo(oldcontext); /* * Check whether the output plugin writes textual output if that's * what we need. */ if (!binary && ctx->options.output_type !=OUTPUT_PLUGIN_TEXTUAL_OUTPUT) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("logical decoding output plugin \"%s\" produces binary output, but function \"%s\" expects textual data", NameStr(MyReplicationSlot->data.plugin), format_procedure(fcinfo->flinfo->fn_oid)))); ctx->output_writer_private = p; /* * Decoding of WAL must start at restart_lsn so that the entirety of * xacts that committed after the slot's confirmed_flush can be * accumulated into reorder buffers. */ startptr = MyReplicationSlot->data.restart_lsn; CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding"); /* invalidate non-timetravel entries */ InvalidateSystemCaches(); /* Decode until we run out of records */ while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) || (ctx->reader->EndRecPtr != InvalidXLogRecPtr && ctx->reader->EndRecPtr < end_of_wal)) { XLogRecord *record; char *errm = NULL; record = XLogReadRecord(ctx->reader, startptr, &errm); if (errm) elog(ERROR, "%s", errm); /* * Now that we've set up the xlog reader state, subsequent calls * pass InvalidXLogRecPtr to say "continue from last record" */ startptr = InvalidXLogRecPtr; /* * The {begin_txn,change,commit_txn}_wrapper callbacks above will * store the description into our tuplestore. */ if (record != NULL) LogicalDecodingProcessRecord(ctx, ctx->reader); /* check limits */ if (upto_lsn != InvalidXLogRecPtr && upto_lsn <= ctx->reader->EndRecPtr) break; if (upto_nchanges != 0 && upto_nchanges <= p->returned_rows) break; CHECK_FOR_INTERRUPTS(); } tuplestore_donestoring(tupstore); CurrentResourceOwner = old_resowner; /* * Next time, start where we left off. (Hunting things, the family * business..) */ if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm) { LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr); /* * If only the confirmed_flush_lsn has changed the slot won't get * marked as dirty by the above. Callers on the walsender * interface are expected to keep track of their own progress and * don't need it written out. But SQL-interface users cannot * specify their own start positions and it's harder for them to * keep track of their progress, so we should make more of an * effort to save it for them. * * Dirty the slot so it's written out at the next checkpoint. * We'll still lose its position on crash, as documented, but it's * better than always losing the position even on clean restart. */ ReplicationSlotMarkDirty(); } /* free context, call shutdown callback */ FreeDecodingContext(ctx); ReplicationSlotRelease(); InvalidateSystemCaches(); } PG_CATCH(); { /* clear all timetravel entries */ InvalidateSystemCaches(); PG_RE_THROW(); } PG_END_TRY(); return (Datum) 0; }
void InitResQueues(void) { HeapTuple tuple; int numQueues = 0; bool queuesok = true; cqContext *pcqCtx; cqContext cqc; Assert(ResScheduler); /* * Need a resource owner to keep the heapam code happy. */ Assert(CurrentResourceOwner == NULL); ResourceOwner owner = ResourceOwnerCreate(NULL, "InitQueues"); CurrentResourceOwner = owner; /** * The resqueue shared mem initialization must be serialized. Only the first session * should do the init. * Serialization is done the ResQueueLock LW_EXCLUSIVE. However, we must obtain all DB * lock before obtaining LWlock. * So, we must have obtained ResQueueRelationId and ResQueueCapabilityRelationId lock * first. */ Relation relResqueue = heap_open(ResQueueRelationId, AccessShareLock); LockRelationOid(ResQueueCapabilityRelationId, RowExclusiveLock); LWLockAcquire(ResQueueLock, LW_EXCLUSIVE); if (ResScheduler->num_queues > 0) { /* Hash table has already been loaded */ LWLockRelease(ResQueueLock); UnlockRelationOid(ResQueueCapabilityRelationId, RowExclusiveLock); heap_close(relResqueue, AccessShareLock); CurrentResourceOwner = NULL; ResourceOwnerDelete(owner); return; } /* XXX XXX: should this be rowexclusive ? */ pcqCtx = caql_beginscan( caql_indexOK( caql_addrel(cqclr(&cqc), relResqueue), false), cql("SELECT * FROM pg_resqueue ", NULL)); while (HeapTupleIsValid(tuple = caql_getnext(pcqCtx))) { Form_pg_resqueue queueform; Oid queueid; bool overcommit; float4 ignorelimit; Cost thresholds[NUM_RES_LIMIT_TYPES]; char *queuename; numQueues++; queueform = (Form_pg_resqueue) GETSTRUCT(tuple); queueid = HeapTupleGetOid(tuple); queuename = NameStr(queueform->rsqname); thresholds[RES_COUNT_LIMIT] = queueform->rsqcountlimit; thresholds[RES_COST_LIMIT] = queueform->rsqcostlimit; thresholds[RES_MEMORY_LIMIT] = ResourceQueueGetMemoryLimit(queueid); overcommit = queueform->rsqovercommit; ignorelimit = queueform->rsqignorecostlimit; queuesok = ResCreateQueue(queueid, thresholds, overcommit, ignorelimit); if (!queuesok) { /** Break out of loop. Close relations, relinquish LWLock and then error out */ break; } } caql_endscan(pcqCtx); LWLockRelease(ResQueueLock); UnlockRelationOid(ResQueueCapabilityRelationId, RowExclusiveLock); heap_close(relResqueue, AccessShareLock); if (!queuesok) ereport(PANIC, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("insufficient resource queues available"), errhint("Increase max_resource_queues to %d.", numQueues))); elog(LOG,"initialized %d resource queues", numQueues); CurrentResourceOwner = NULL; ResourceOwnerDelete(owner); return; }
/* Main entry point for walreceiver process */ void WalReceiverMain(void) { char conninfo[MAXCONNINFO]; XLogRecPtr startpoint; /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; am_walreceiver = true; /* * WalRcv should be set up already (if we are a backend, we inherit this * by fork() or EXEC_BACKEND mechanism from the postmaster). */ Assert(walrcv != NULL); /* * Mark walreceiver as running in shared memory. * * Do this as early as possible, so that if we fail later on, we'll set * state to STOPPED. If we die before this, the startup process will keep * waiting for us to start up, until it times out. */ SpinLockAcquire(&walrcv->mutex); Assert(walrcv->pid == 0); switch (walrcv->walRcvState) { case WALRCV_STOPPING: /* If we've already been requested to stop, don't start up. */ walrcv->walRcvState = WALRCV_STOPPED; /* fall through */ case WALRCV_STOPPED: SpinLockRelease(&walrcv->mutex); proc_exit(1); break; case WALRCV_STARTING: /* The usual case */ break; case WALRCV_RUNNING: /* Shouldn't happen */ elog(PANIC, "walreceiver still running according to shared memory state"); } /* Advertise our PID so that the startup process can kill us */ walrcv->pid = MyProcPid; walrcv->walRcvState = WALRCV_RUNNING; /* Fetch information required to start streaming */ strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO); startpoint = walrcv->receivedUpto; SpinLockRelease(&walrcv->mutex); /* Arrange to clean up at walreceiver exit */ on_shmem_exit(WalRcvDie, 0); /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (walreceiver probably never has * any child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* Properly accept or ignore signals the postmaster might send us */ pqsignal(SIGHUP, WalRcvSigHupHandler); /* set flag to read config * file */ pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, WalRcvShutdownHandler); /* request shutdown */ pqsignal(SIGQUIT, WalRcvQuickDieHandler); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); pqsignal(SIGUSR2, SIG_IGN); /* Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* Load the libpq-specific functions */ load_file("libpqwalreceiver", false); if (walrcv_connect == NULL || walrcv_receive == NULL || walrcv_disconnect == NULL) elog(ERROR, "libpqwalreceiver didn't initialize correctly"); /* * Create a resource owner to keep track of our resources (not clear that * we need this, but may as well have one). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Receiver"); /* Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* Establish the connection to the primary for XLOG streaming */ EnableWalRcvImmediateExit(); walrcv_connect(conninfo, startpoint); DisableWalRcvImmediateExit(); /* Loop until end-of-streaming or error */ for (;;) { unsigned char type; char *buf; int len; /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive(true)) exit(1); /* * Exit walreceiver if we're not in recovery. This should not happen, * but cross-check the status here. */ if (!RecoveryInProgress()) ereport(FATAL, (errmsg("cannot continue WAL streaming, recovery has already ended"))); /* Process any requests or signals received recently */ ProcessWalRcvInterrupts(); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } /* Wait a while for data to arrive */ if (walrcv_receive(NAPTIME_PER_CYCLE, &type, &buf, &len)) { /* Accept the received data, and process it */ XLogWalRcvProcessMsg(type, buf, len); /* Receive any more data we can without sleeping */ while (walrcv_receive(0, &type, &buf, &len)) XLogWalRcvProcessMsg(type, buf, len); /* * If we've written some records, flush them to disk and let the * startup process know about them. */ XLogWalRcvFlush(); } } }
/* * Main entry point for bgwriter process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; bool prev_hibernate; /* * Properly accept or ignore signals the postmaster might send us. * * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1 * handler is still needed for latch wakeups. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, bgwriter_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); /* * We just started, assume there has been either a shutdown or * end-of-recovery snapshot. */ last_snapshot_ts = GetCurrentTimestamp(); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(bgwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(bgwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); /* Report wait end here, when there is no further possibility of wait */ pgstat_report_wait_end(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Reset hibernation state after any error. */ prev_hibernate = false; /* * Loop forever */ for (;;) { bool can_hibernate; int rc; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } /* * Do one cycle of dirty-buffer writing. */ can_hibernate = BgBufferSync(); /* * Send off activity statistics to the stats collector */ pgstat_send_bgwriter(); if (FirstCallSinceLastCheckpoint()) { /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); } /* * Log a new xl_running_xacts every now and then so replication can * get into a consistent state faster (think of suboverflowed * snapshots) and clean up resources (locks, KnownXids*) more * frequently. The costs of this are relatively low, so doing it 4 * times (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine. * * We assume the interval for writing xl_running_xacts is * significantly bigger than BgWriterDelay, so we don't complicate the * overall timeout handling but just assume we're going to get called * often enough even if hibernation mode is active. It's not that * important that log_snap_interval_ms is met strictly. To make sure * we're not waking the disk up unnecessarily on an idle system we * check whether there has been any WAL inserted since the last time * we've logged a running xacts. * * We do this logging in the bgwriter as its the only process that is * run regularly and returns to its mainloop all the time. E.g. * Checkpointer, when active, is barely ever in its mainloop and thus * makes it hard to log regularly. */ if (XLogStandbyInfoActive() && !RecoveryInProgress()) { TimestampTz timeout = 0; TimestampTz now = GetCurrentTimestamp(); timeout = TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS); /* * only log if enough time has passed and some xlog record has * been inserted. */ if (now >= timeout && last_snapshot_lsn != GetXLogInsertRecPtr()) { last_snapshot_lsn = LogStandbySnapshot(); last_snapshot_ts = now; } } /* * Sleep until we are signaled or BgWriterDelay has elapsed. * * Note: the feedback control loop in BgBufferSync() expects that we * will call it every BgWriterDelay msec. While it's not critical for * correctness that that be exact, the feedback loop might misbehave * if we stray too far from that. Hence, avoid loading this process * down with latch events that are likely to happen frequently during * normal operation. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, BgWriterDelay /* ms */ ); /* * If no latch event and BgBufferSync says nothing's happening, extend * the sleep in "hibernation" mode, where we sleep for much longer * than bgwriter_delay says. Fewer wakeups save electricity. When a * backend starts using buffers again, it will wake us up by setting * our latch. Because the extra sleep will persist only as long as no * buffer allocations happen, this should not distort the behavior of * BgBufferSync's control loop too badly; essentially, it will think * that the system-wide idle interval didn't exist. * * There is a race condition here, in that a backend might allocate a * buffer between the time BgBufferSync saw the alloc count as zero * and the time we call StrategyNotifyBgWriter. While it's not * critical that we not hibernate anyway, we try to reduce the odds of * that by only hibernating when BgBufferSync says nothing's happening * for two consecutive cycles. Also, we mitigate any possible * consequences of a missed wakeup by not hibernating forever. */ if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate) { /* Ask for notification at next buffer allocation */ StrategyNotifyBgWriter(MyProc->pgprocno); /* Sleep ... */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, BgWriterDelay * HIBERNATE_FACTOR); /* Reset the notification request in case we timed out */ StrategyNotifyBgWriter(-1); } /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); prev_hibernate = can_hibernate; } }
/* * ContinuousQueryWorkerStartup * * Launches a CQ worker, which continuously generates partial query results to send * back to the combiner process. */ void ContinuousQueryWorkerRun(Portal portal, ContinuousViewState *state, QueryDesc *queryDesc, ResourceOwner owner) { EState *estate = NULL; DestReceiver *dest; CmdType operation; MemoryContext oldcontext; int timeoutms = state->maxwaitms; MemoryContext runcontext; CQProcEntry *entry = GetCQProcEntry(MyCQId); ResourceOwner cqowner = ResourceOwnerCreate(NULL, "CQResourceOwner"); bool savereadonly = XactReadOnly; cq_stat_initialize(state->viewid, MyProcPid); dest = CreateDestReceiver(DestCombiner); SetCombinerDestReceiverParams(dest, MyCQId); /* workers only need read-only transactions */ XactReadOnly = true; runcontext = AllocSetContextCreate(TopMemoryContext, "CQRunContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); elog(LOG, "\"%s\" worker %d running", queryDesc->plannedstmt->cq_target->relname, MyProcPid); MarkWorkerAsRunning(MyCQId, MyWorkerId); pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText); TupleBufferInitLatch(WorkerTupleBuffer, MyCQId, MyWorkerId, &MyProc->procLatch); oldcontext = MemoryContextSwitchTo(runcontext); retry: PG_TRY(); { bool xact_commit = true; TimestampTz last_process = GetCurrentTimestamp(); TimestampTz last_commit = GetCurrentTimestamp(); start_executor(queryDesc, runcontext, cqowner); CurrentResourceOwner = cqowner; estate = queryDesc->estate; operation = queryDesc->operation; /* * Initialize context that lives for the duration of a single iteration * of the main worker loop */ CQExecutionContext = AllocSetContextCreate(estate->es_query_cxt, "CQExecutionContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); estate->es_lastoid = InvalidOid; /* * Startup combiner receiver */ (*dest->rStartup) (dest, operation, queryDesc->tupDesc); for (;;) { if (!TupleBufferHasUnreadSlots()) { if (TimestampDifferenceExceeds(last_process, GetCurrentTimestamp(), state->emptysleepms)) { /* force stats flush */ cq_stat_report(true); pgstat_report_activity(STATE_IDLE, queryDesc->sourceText); TupleBufferWait(WorkerTupleBuffer, MyCQId, MyWorkerId); pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText); } else pg_usleep(Min(WAIT_SLEEP_MS, state->emptysleepms) * 1000); } TupleBufferResetNotify(WorkerTupleBuffer, MyCQId, MyWorkerId); if (xact_commit) StartTransactionCommand(); set_snapshot(estate, cqowner); CurrentResourceOwner = cqowner; MemoryContextSwitchTo(estate->es_query_cxt); estate->es_processed = 0; estate->es_filtered = 0; /* * Run plan on a microbatch */ ExecutePlan(estate, queryDesc->planstate, operation, true, 0, timeoutms, ForwardScanDirection, dest); IncrementCQExecutions(1); TupleBufferClearPinnedSlots(); if (state->long_xact) { if (TimestampDifferenceExceeds(last_commit, GetCurrentTimestamp(), LONG_RUNNING_XACT_DURATION)) xact_commit = true; else xact_commit = false; } unset_snapshot(estate, cqowner); if (xact_commit) { CommitTransactionCommand(); last_commit = GetCurrentTimestamp(); } MemoryContextResetAndDeleteChildren(CQExecutionContext); MemoryContextSwitchTo(runcontext); CurrentResourceOwner = cqowner; if (estate->es_processed || estate->es_filtered) { /* * If the CV query is such that the select does not return any tuples * ex: select id where id=99; and id=99 does not exist, then this reset * will fail. What will happen is that the worker will block at the latch for every * allocated slot, TILL a cv returns a non-zero tuple, at which point * the worker will resume a simple sleep for the threshold time. */ last_process = GetCurrentTimestamp(); /* * Send stats to the collector */ cq_stat_report(false); } /* Has the CQ been deactivated? */ if (!entry->active) { if (ActiveSnapshotSet()) unset_snapshot(estate, cqowner); if (IsTransactionState()) CommitTransactionCommand(); break; } } CurrentResourceOwner = cqowner; /* * The cleanup functions below expect these things to be registered */ RegisterSnapshotOnOwner(estate->es_snapshot, cqowner); RegisterSnapshotOnOwner(queryDesc->snapshot, cqowner); RegisterSnapshotOnOwner(queryDesc->crosscheck_snapshot, cqowner); /* cleanup */ ExecutorFinish(queryDesc); ExecutorEnd(queryDesc); FreeQueryDesc(queryDesc); } PG_CATCH(); { EmitErrorReport(); FlushErrorState(); /* Since the worker is read-only, we can simply commit the transaction. */ if (ActiveSnapshotSet()) unset_snapshot(estate, cqowner); if (IsTransactionState()) CommitTransactionCommand(); TupleBufferUnpinAllPinnedSlots(); TupleBufferClearReaders(); /* This resets the es_query_ctx and in turn the CQExecutionContext */ MemoryContextResetAndDeleteChildren(runcontext); IncrementCQErrors(1); if (continuous_query_crash_recovery) goto retry; } PG_END_TRY(); (*dest->rShutdown) (dest); MemoryContextSwitchTo(oldcontext); MemoryContextDelete(runcontext); XactReadOnly = savereadonly; /* * Remove proc-level stats */ cq_stat_report(true); cq_stat_send_purge(state->viewid, MyProcPid, CQ_STAT_WORKER); CurrentResourceOwner = owner; }
/* * FtsProbeMain */ NON_EXEC_STATIC void ftsMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; char *fullpath; IsUnderPostmaster = true; am_ftsprobe = true; /* Stay away from PMChildSlot */ MyPMChildSlot = -1; /* reset MyProcPid */ MyProcPid = getpid(); /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* Identify myself via ps */ init_ps_display("ftsprobe process", "", "", ""); SetProcessingMode(InitProcessing); /* * reread postgresql.conf if requested */ pqsignal(SIGHUP, sigHupHandler); /* * Presently, SIGINT will lead to autovacuum shutdown, because that's how * we handle ereport(ERROR). It could be improved however. */ pqsignal(SIGINT, ReqFtsFullScan); /* request full-scan */ pqsignal(SIGTERM, die); pqsignal(SIGQUIT, quickdie); /* we don't do any ftsprobe specific cleanup, just use the standard. */ pqsignal(SIGALRM, handle_sig_alarm); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, procsignal_sigusr1_handler); /* We don't listen for async notifies */ pqsignal(SIGUSR2, RequestShutdown); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); /* * Copied from bgwriter */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "FTS Probe"); /* Early initialization */ BaseInit(); /* See InitPostgres()... */ InitProcess(); InitBufferPoolBackend(); InitXLOGAccess(); SetProcessingMode(NormalProcessing); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * We can now go away. Note that because we'll call InitProcess, a * callback will be registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* heap access requires the rel-cache */ RelationCacheInitialize(); InitCatalogCache(); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase2(); /* * In order to access the catalog, we need a database, and a * tablespace; our access to the heap is going to be slightly * limited, so we'll just use some defaults. */ if (!FindMyDatabase(probeDatabase, &MyDatabaseId, &MyDatabaseTableSpace)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exit", probeDatabase))); /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); SetDatabasePath(fullpath); RelationCacheInitializePhase3(); /* shmem: publish probe pid */ ftsProbeInfo->fts_probePid = MyProcPid; /* main loop */ FtsLoop(); /* One iteration done, go away */ proc_exit(0); }