static void WalSendServer_ServiceShutdown(void) { PG_TRY(); { disableQDMirroring_ShutDown(); if (disconnectMirrorQD_SendClose()) elog(LOG,"Master mirror disconnected"); } PG_CATCH(); { /* * Report the error related to reading the primary's WAL * to the server log */ if (!elog_demote(NOTICE)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } EmitErrorReport(); FlushErrorState(); } PG_END_TRY(); }
/* * Used by clients to send a request to a service. */ bool ServiceClientSendRequest(ServiceClient *serviceClient, void* request, int requestLen) { ServiceConfig *serviceConfig; char *message; bool result = false; DECLARE_SAVE_SUPPRESS_PANIC(); Assert(serviceClient != NULL); Assert(request != NULL); PG_TRY(); { SUPPRESS_PANIC(); serviceConfig = serviceClient->serviceConfig; if (serviceConfig == NULL) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Not connected to '%s'", serviceConfig->title))); } if (requestLen != serviceClient->serviceConfig->requestLen) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Expecting request length %d and actual length is %d for '%s'", serviceClient->serviceConfig->requestLen, requestLen, serviceConfig->title))); } result = ServiceClientWrite(serviceClient, request, requestLen); RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
static bool WalSendServer_ServiceRequest(ServiceCtrl *serviceCtrl, int sockfd, uint8 *request) { WalSendRequest *walSendRequest = (WalSendRequest*)request; WalSendResponse walSendResponse; bool result = false; /* * Use a TRY block to catch unexpected errors that bubble up to this level * and disable QD mirroring. */ PG_TRY(); { if (Debug_print_qd_mirroring) elog(LOG, "request command %d = '%s'", walSendRequest->command, WalSendRequestCommandToString(walSendRequest->command)); WalSendServerDoRequest(walSendRequest); /* * Currently, all requests need a response. */ walSendResponse.ok = true; result = ServiceProcessRespond(serviceCtrl, sockfd, (uint8*)&walSendResponse, sizeof(walSendResponse)); } PG_CATCH(); { /* * Report the unexpected error. */ EmitErrorReport(); FlushErrorState(); disableQDMirroring_UnexpectedError( "An unexpected error encountered. Please report this problem to Greenplum"); result = false; } PG_END_TRY(); return result; }
static void * exec_func(void *arg) { unsigned long rtn = (unsigned long)WD_NG; MemoryContext oldContext = CurrentMemoryContext; WdThreadInfo* thread_arg = (WdThreadInfo*) arg; Assert(thread_arg != NULL); PG_TRY(); { rtn = thread_arg->start_routine(thread_arg->arg); } PG_CATCH(); { /* ignore the error message */ EmitErrorReport(); MemoryContextSwitchTo(oldContext); FlushErrorState(); } PG_END_TRY(); return rtn; }
pid_t wd_child(int fork_wait_time) { int sock; volatile int fd; int rtn; pid_t pid = 0; sigjmp_buf local_sigjmp_buf; pid = fork(); if (pid != 0) { if (pid == -1) ereport(PANIC, (errmsg("failed to fork a watchdog process"))); return pid; } on_exit_reset(); processType = PT_WATCHDOG; if (fork_wait_time > 0) { sleep(fork_wait_time); } POOL_SETMASK(&UnBlockSig); signal(SIGTERM, wd_child_exit); signal(SIGINT, wd_child_exit); signal(SIGQUIT, wd_child_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); if (WD_List == NULL) { /* memory allocate is not ready */ wd_child_exit(15); } /* Create per loop iteration memory context */ ProcessLoopContext = AllocSetContextCreate(TopMemoryContext, "wd_child_main_loop", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); sock = wd_create_recv_socket(WD_MYSELF->wd_port); if (sock < 0) { /* socket create failed */ wd_child_exit(15); } set_ps_display("watchdog", false); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ if(fd > 0) close(fd); error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* child loop */ for(;;) { MemoryContextSwitchTo(ProcessLoopContext); MemoryContextResetAndDeleteChildren(ProcessLoopContext); fd = -1; WdPacket buf; fd = wd_accept(sock); if (fd < 0) { continue; } rtn = wd_recv_packet(fd, &buf); if (rtn == WD_OK) { wd_send_response(fd, &buf); } close(fd); } return pid; }
/* * main entry pont of pcp worker child process */ void pcp_worker_main(int port) { sigjmp_buf local_sigjmp_buf; MemoryContext PCPMemoryContext; int authenticated = 0; char salt[4]; int random_salt = 0; struct timeval uptime; char tos; int rsize; char *buf = NULL; ereport(DEBUG1, (errmsg("I am PCP worker child with pid:%d",getpid()))); /* Identify myself via ps */ init_ps_display("", "", "", ""); gettimeofday(&uptime, NULL); srandom((unsigned int) (getpid() ^ uptime.tv_usec)); /* set up signal handlers */ signal(SIGTERM, die); signal(SIGINT, die); signal(SIGQUIT, die); signal(SIGCHLD, SIG_DFL); signal(SIGUSR2, wakeup_handler_child); signal(SIGUSR1, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); /* Create per loop iteration memory context */ PCPMemoryContext = AllocSetContextCreate(TopMemoryContext, "PCP_worker_main_loop", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); /* * install the call back for preparation of pcp worker child exit */ on_system_exit(pcp_worker_will_go_down, (Datum)NULL); /* Initialize my backend status */ pool_initialize_private_backend_status(); /* Initialize process context */ pool_init_process_context(); pcp_frontend = pcp_open(port); unset_nonblock(pcp_frontend->fd); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; for(;;) { MemoryContextSwitchTo(PCPMemoryContext); MemoryContextResetAndDeleteChildren(PCPMemoryContext); errno = 0; /* read a PCP packet */ do_pcp_read(pcp_frontend, &tos, 1); do_pcp_read(pcp_frontend, &rsize, sizeof(int)); rsize = ntohl(rsize); if ((rsize - sizeof(int)) > 0) { buf = (char *)palloc(rsize - sizeof(int)); do_pcp_read(pcp_frontend, buf, rsize - sizeof(int)); } ereport(DEBUG1, (errmsg("received PCP packet"), errdetail("PCP packet type of service '%c'", tos))); if (tos == 'R') /* authentication */ { set_ps_display("PCP: processing authentication", false); process_authentication(pcp_frontend, buf,salt, &random_salt); authenticated = 1; continue; } if (tos == 'M') /* md5 salt */ { set_ps_display("PCP: processing authentication", false); send_md5salt(pcp_frontend, salt); random_salt = 1; continue; } /* is this connection authenticated? if not disconnect immediately*/ if (!authenticated) ereport(FATAL, (errmsg("authentication failed for new PCP connection"), errdetail("connection not authorized"))); /* process a request */ pcp_process_command(tos, buf, rsize); } exit(0); }
/* * ContinuousQueryWorkerStartup * * Launches a CQ worker, which continuously generates partial query results to send * back to the combiner process. */ void ContinuousQueryWorkerRun(Portal portal, ContinuousViewState *state, QueryDesc *queryDesc, ResourceOwner owner) { EState *estate = NULL; DestReceiver *dest; CmdType operation; MemoryContext oldcontext; int timeoutms = state->maxwaitms; MemoryContext runcontext; CQProcEntry *entry = GetCQProcEntry(MyCQId); ResourceOwner cqowner = ResourceOwnerCreate(NULL, "CQResourceOwner"); bool savereadonly = XactReadOnly; cq_stat_initialize(state->viewid, MyProcPid); dest = CreateDestReceiver(DestCombiner); SetCombinerDestReceiverParams(dest, MyCQId); /* workers only need read-only transactions */ XactReadOnly = true; runcontext = AllocSetContextCreate(TopMemoryContext, "CQRunContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); elog(LOG, "\"%s\" worker %d running", queryDesc->plannedstmt->cq_target->relname, MyProcPid); MarkWorkerAsRunning(MyCQId, MyWorkerId); pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText); TupleBufferInitLatch(WorkerTupleBuffer, MyCQId, MyWorkerId, &MyProc->procLatch); oldcontext = MemoryContextSwitchTo(runcontext); retry: PG_TRY(); { bool xact_commit = true; TimestampTz last_process = GetCurrentTimestamp(); TimestampTz last_commit = GetCurrentTimestamp(); start_executor(queryDesc, runcontext, cqowner); CurrentResourceOwner = cqowner; estate = queryDesc->estate; operation = queryDesc->operation; /* * Initialize context that lives for the duration of a single iteration * of the main worker loop */ CQExecutionContext = AllocSetContextCreate(estate->es_query_cxt, "CQExecutionContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); estate->es_lastoid = InvalidOid; /* * Startup combiner receiver */ (*dest->rStartup) (dest, operation, queryDesc->tupDesc); for (;;) { if (!TupleBufferHasUnreadSlots()) { if (TimestampDifferenceExceeds(last_process, GetCurrentTimestamp(), state->emptysleepms)) { /* force stats flush */ cq_stat_report(true); pgstat_report_activity(STATE_IDLE, queryDesc->sourceText); TupleBufferWait(WorkerTupleBuffer, MyCQId, MyWorkerId); pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText); } else pg_usleep(Min(WAIT_SLEEP_MS, state->emptysleepms) * 1000); } TupleBufferResetNotify(WorkerTupleBuffer, MyCQId, MyWorkerId); if (xact_commit) StartTransactionCommand(); set_snapshot(estate, cqowner); CurrentResourceOwner = cqowner; MemoryContextSwitchTo(estate->es_query_cxt); estate->es_processed = 0; estate->es_filtered = 0; /* * Run plan on a microbatch */ ExecutePlan(estate, queryDesc->planstate, operation, true, 0, timeoutms, ForwardScanDirection, dest); IncrementCQExecutions(1); TupleBufferClearPinnedSlots(); if (state->long_xact) { if (TimestampDifferenceExceeds(last_commit, GetCurrentTimestamp(), LONG_RUNNING_XACT_DURATION)) xact_commit = true; else xact_commit = false; } unset_snapshot(estate, cqowner); if (xact_commit) { CommitTransactionCommand(); last_commit = GetCurrentTimestamp(); } MemoryContextResetAndDeleteChildren(CQExecutionContext); MemoryContextSwitchTo(runcontext); CurrentResourceOwner = cqowner; if (estate->es_processed || estate->es_filtered) { /* * If the CV query is such that the select does not return any tuples * ex: select id where id=99; and id=99 does not exist, then this reset * will fail. What will happen is that the worker will block at the latch for every * allocated slot, TILL a cv returns a non-zero tuple, at which point * the worker will resume a simple sleep for the threshold time. */ last_process = GetCurrentTimestamp(); /* * Send stats to the collector */ cq_stat_report(false); } /* Has the CQ been deactivated? */ if (!entry->active) { if (ActiveSnapshotSet()) unset_snapshot(estate, cqowner); if (IsTransactionState()) CommitTransactionCommand(); break; } } CurrentResourceOwner = cqowner; /* * The cleanup functions below expect these things to be registered */ RegisterSnapshotOnOwner(estate->es_snapshot, cqowner); RegisterSnapshotOnOwner(queryDesc->snapshot, cqowner); RegisterSnapshotOnOwner(queryDesc->crosscheck_snapshot, cqowner); /* cleanup */ ExecutorFinish(queryDesc); ExecutorEnd(queryDesc); FreeQueryDesc(queryDesc); } PG_CATCH(); { EmitErrorReport(); FlushErrorState(); /* Since the worker is read-only, we can simply commit the transaction. */ if (ActiveSnapshotSet()) unset_snapshot(estate, cqowner); if (IsTransactionState()) CommitTransactionCommand(); TupleBufferUnpinAllPinnedSlots(); TupleBufferClearReaders(); /* This resets the es_query_ctx and in turn the CQExecutionContext */ MemoryContextResetAndDeleteChildren(runcontext); IncrementCQErrors(1); if (continuous_query_crash_recovery) goto retry; } PG_END_TRY(); (*dest->rShutdown) (dest); MemoryContextSwitchTo(oldcontext); MemoryContextDelete(runcontext); XactReadOnly = savereadonly; /* * Remove proc-level stats */ cq_stat_report(true); cq_stat_send_purge(state->viewid, MyProcPid, CQ_STAT_WORKER); CurrentResourceOwner = owner; }
static bool ServiceDoConnect(ServiceConfig *serviceConfig, int listenerPort, ServiceClient *serviceClient, bool complain) { int n; struct sockaddr_in addr; int saved_err; char *message; bool result = false; DECLARE_SAVE_SUPPRESS_PANIC(); PG_TRY(); { SUPPRESS_PANIC(); for (;;) { /* * Open a connection to the service. */ serviceClient->sockfd = socket(AF_INET, SOCK_STREAM, 0); addr.sin_family = AF_INET; addr.sin_port = htons(listenerPort); addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if ((n = connect(serviceClient->sockfd, (struct sockaddr *)&addr, sizeof(addr))) < 0) { saved_err = errno; close(serviceClient->sockfd); serviceClient->sockfd = -1; if (errno == EINTR) continue; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Could not connect to '%s': %s", serviceConfig->title, strerror(saved_err)))); } else { //success. we're done here! break; } } /* make socket non-blocking BEFORE we connect. */ if (!pg_set_noblock(serviceClient->sockfd)) { saved_err = errno; close(serviceClient->sockfd); serviceClient->sockfd = -1; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Could not set '%s' socket to non-blocking mode: %s", serviceConfig->title, strerror(saved_err)))); } result = true; RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); if (complain) EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
/* * Main entry point for checkpointer process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void CheckpointerMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext checkpointer_context; CheckpointerShmem->checkpointer_pid = MyProcPid; /* * Properly accept or ignore signals the postmaster might send us * * Note: we deliberately ignore SIGTERM, because during a standard Unix * system shutdown cycle, init will SIGTERM all processes at once. We * want to wait for the backends to exit, whereupon the postmaster will * tell us it's okay to shut down (via SIGUSR2). */ pqsignal(SIGHUP, ChkptSigHupHandler); /* set flag to read config * file */ pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */ pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */ pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, chkpt_sigusr1_handler); pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Initialize so that first time-driven event happens at the correct time. */ last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ checkpointer_context = AllocSetContextCreate(TopMemoryContext, "Checkpointer", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(checkpointer_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in checkpointer, but we do have LWLocks, buffers, and temp * files. */ LWLockReleaseAll(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* Warn any waiting backends that the checkpoint failed. */ if (ckpt_active) { SpinLockAcquire(&CheckpointerShmem->ckpt_lck); CheckpointerShmem->ckpt_failed++; CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started; SpinLockRelease(&CheckpointerShmem->ckpt_lck); ckpt_active = false; } /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(checkpointer_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(checkpointer_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Ensure all shared memory values are set correctly for the config. Doing * this here ensures no race conditions from other concurrent updaters. */ UpdateSharedMemoryConfig(); /* * Advertise our latch that backends can use to wake us up while we're * sleeping. */ ProcGlobal->checkpointerLatch = &MyProc->procLatch; /* * Loop forever */ for (;;) { bool do_checkpoint = false; int flags = 0; pg_time_t now; int elapsed_secs; int cur_timeout; int rc; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); /* * Process any requests or signals received recently. */ AbsorbFsyncRequests(); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* * Checkpointer is the last process to shut down, so we ask it to * hold the keys for a range of other tasks required most of which * have nothing to do with checkpointing at all. * * For various reasons, some config values can change dynamically * so the primary copy of them is held in shared memory to make * sure all backends see the same value. We make Checkpointer * responsible for updating the shared memory copy if the * parameter setting changes because of SIGHUP. */ UpdateSharedMemoryConfig(); } if (checkpoint_requested) { checkpoint_requested = false; do_checkpoint = true; BgWriterStats.m_requested_checkpoints++; } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Close down the database */ ShutdownXLOG(0, 0); /* Normal exit from the checkpointer is here */ proc_exit(0); /* done */ } /* * Force a checkpoint if too much time has elapsed since the last one. * Note that we count a timed checkpoint in stats only when this * occurs without an external request, but we set the CAUSE_TIME flag * bit even if there is also an external request. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) BgWriterStats.m_timed_checkpoints++; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } /* * Do a checkpoint if requested. */ if (do_checkpoint) { bool ckpt_performed = false; bool do_restartpoint; /* * Check if we should perform a checkpoint or a restartpoint. As a * side-effect, RecoveryInProgress() initializes TimeLineID if * it's not set yet. */ do_restartpoint = RecoveryInProgress(); /* * Atomically fetch the request flags to figure out what kind of a * checkpoint we should perform, and increase the started-counter * to acknowledge that we've started a new checkpoint. */ SpinLockAcquire(&CheckpointerShmem->ckpt_lck); flags |= CheckpointerShmem->ckpt_flags; CheckpointerShmem->ckpt_flags = 0; CheckpointerShmem->ckpt_started++; SpinLockRelease(&CheckpointerShmem->ckpt_lck); /* * The end-of-recovery checkpoint is a real checkpoint that's * performed while we're still in recovery. */ if (flags & CHECKPOINT_END_OF_RECOVERY) do_restartpoint = false; /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag * since the last checkpoint start. Note in particular that this * implementation will not generate warnings caused by * CheckPointTimeout < CheckPointWarning. */ if (!do_restartpoint && (flags & CHECKPOINT_CAUSE_XLOG) && elapsed_secs < CheckPointWarning) ereport(LOG, (errmsg_plural("checkpoints are occurring too frequently (%d second apart)", "checkpoints are occurring too frequently (%d seconds apart)", elapsed_secs, elapsed_secs), errhint("Consider increasing the configuration parameter \"max_wal_size\"."))); /* * Initialize checkpointer-private variables used during * checkpoint. */ ckpt_active = true; if (do_restartpoint) ckpt_start_recptr = GetXLogReplayRecPtr(NULL); else ckpt_start_recptr = GetInsertRecPtr(); ckpt_start_time = now; ckpt_cached_elapsed = 0; /* * Do the checkpoint. */ if (!do_restartpoint) { CreateCheckPoint(flags); ckpt_performed = true; } else ckpt_performed = CreateRestartPoint(flags); /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); /* * Indicate checkpoint completion to any waiting backends. */ SpinLockAcquire(&CheckpointerShmem->ckpt_lck); CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started; SpinLockRelease(&CheckpointerShmem->ckpt_lck); if (ckpt_performed) { /* * Note we record the checkpoint start time not end time as * last_checkpoint_time. This is so that time-driven * checkpoints happen at a predictable spacing. */ last_checkpoint_time = now; } else { /* * We were not able to perform the restartpoint (checkpoints * throw an ERROR in case of error). Most likely because we * have not received any new checkpoint WAL records since the * last restartpoint. Try again in 15 s. */ last_checkpoint_time = now - CheckPointTimeout + 15; } ckpt_active = false; } /* Check for archive_timeout and switch xlog files if necessary. */ CheckArchiveTimeout(); /* * Send off activity statistics to the stats collector. (The reason * why we re-use bgwriter-related code for this is that the bgwriter * and checkpointer used to be just one process. It's probably not * worth the trouble to split the stats support into two independent * stats message types.) */ pgstat_send_bgwriter(); /* * Sleep until we are signaled or it's time for another checkpoint or * xlog file switch. */ now = (pg_time_t) time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) continue; /* no sleep for us ... */ cur_timeout = CheckPointTimeout - elapsed_secs; if (XLogArchiveTimeout > 0 && !RecoveryInProgress()) { elapsed_secs = now - last_xlog_switch_time; if (elapsed_secs >= XLogArchiveTimeout) continue; /* no sleep for us ... */ cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs); } rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, cur_timeout * 1000L /* convert to ms */, WAIT_EVENT_CHECKPOINTER_MAIN); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); } }
/* * kafka_consume_main * * Main function for Kafka consumers running as background workers */ void kafka_consume_main(Datum arg) { char err_msg[512]; rd_kafka_topic_conf_t *topic_conf; rd_kafka_t *kafka; rd_kafka_topic_t *topic; rd_kafka_message_t **messages; const struct rd_kafka_metadata *meta; struct rd_kafka_metadata_topic topic_meta; rd_kafka_resp_err_t err; bool found; Oid id = (Oid) arg; ListCell *lc; KafkaConsumerProc *proc = hash_search(consumer_procs, &id, HASH_FIND, &found); KafkaConsumer consumer; CopyStmt *copy; int valid_brokers = 0; int i; int my_partitions = 0; if (!found) elog(ERROR, "kafka consumer %d not found", id); pqsignal(SIGTERM, kafka_consume_main_sigterm); #define BACKTRACE_SEGFAULTS #ifdef BACKTRACE_SEGFAULTS pqsignal(SIGSEGV, debug_segfault); #endif /* we're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* give this proc access to the database */ BackgroundWorkerInitializeConnection(NameStr(proc->dbname), NULL); /* load saved consumer state */ StartTransactionCommand(); load_consumer_state(proc->consumer_id, &consumer); copy = get_copy_statement(&consumer); topic_conf = rd_kafka_topic_conf_new(); kafka = rd_kafka_new(RD_KAFKA_CONSUMER, NULL, err_msg, sizeof(err_msg)); rd_kafka_set_logger(kafka, logger); /* * Add all brokers currently in pipeline_kafka_brokers */ if (consumer.brokers == NIL) elog(ERROR, "no valid brokers were found"); foreach(lc, consumer.brokers) valid_brokers += rd_kafka_brokers_add(kafka, lfirst(lc)); if (!valid_brokers) elog(ERROR, "no valid brokers were found"); /* * Set up our topic to read from */ topic = rd_kafka_topic_new(kafka, consumer.topic, topic_conf); err = rd_kafka_metadata(kafka, false, topic, &meta, CONSUMER_TIMEOUT); if (err != RD_KAFKA_RESP_ERR_NO_ERROR) elog(ERROR, "failed to acquire metadata: %s", rd_kafka_err2str(err)); Assert(meta->topic_cnt == 1); topic_meta = meta->topics[0]; load_consumer_offsets(&consumer, &topic_meta, proc->offset); CommitTransactionCommand(); /* * Begin consuming all partitions that this process is responsible for */ for (i = 0; i < topic_meta.partition_cnt; i++) { int partition = topic_meta.partitions[i].id; Assert(partition <= consumer.num_partitions); if (partition % consumer.parallelism != proc->partition_group) continue; elog(LOG, "[kafka consumer] %s <- %s consuming partition %d from offset %ld", consumer.rel->relname, consumer.topic, partition, consumer.offsets[partition]); if (rd_kafka_consume_start(topic, partition, consumer.offsets[partition]) == -1) elog(ERROR, "failed to start consuming: %s", rd_kafka_err2str(rd_kafka_errno2err(errno))); my_partitions++; } /* * No point doing anything if we don't have any partitions assigned to us */ if (my_partitions == 0) { elog(LOG, "[kafka consumer] %s <- %s consumer %d doesn't have any partitions to read from", consumer.rel->relname, consumer.topic, MyProcPid); goto done; } messages = palloc0(sizeof(rd_kafka_message_t) * consumer.batch_size); /* * Consume messages until we are terminated */ while (!got_sigterm) { ssize_t num_consumed; int i; int messages_buffered = 0; int partition; StringInfoData buf; bool xact = false; for (partition = 0; partition < consumer.num_partitions; partition++) { if (partition % consumer.parallelism != proc->partition_group) continue; num_consumed = rd_kafka_consume_batch(topic, partition, CONSUMER_TIMEOUT, messages, consumer.batch_size); if (num_consumed <= 0) continue; if (!xact) { StartTransactionCommand(); xact = true; } initStringInfo(&buf); for (i = 0; i < num_consumed; i++) { if (messages[i]->payload != NULL) { appendBinaryStringInfo(&buf, messages[i]->payload, messages[i]->len); if (buf.len > 0 && buf.data[buf.len - 1] != '\n') appendStringInfoChar(&buf, '\n'); messages_buffered++; } consumer.offsets[partition] = messages[i]->offset; rd_kafka_message_destroy(messages[i]); } } if (!xact) { pg_usleep(1 * 1000); continue; } /* we don't want to die in the event of any errors */ PG_TRY(); { if (messages_buffered) execute_copy(copy, &buf); } PG_CATCH(); { elog(LOG, "[kafka consumer] %s <- %s failed to process batch, dropped %d message%s:", consumer.rel->relname, consumer.topic, (int) num_consumed, (num_consumed == 1 ? "" : "s")); EmitErrorReport(); FlushErrorState(); AbortCurrentTransaction(); xact = false; } PG_END_TRY(); if (!xact) StartTransactionCommand(); if (messages_buffered) save_consumer_state(&consumer, proc->partition_group); CommitTransactionCommand(); } done: hash_search(consumer_procs, &id, HASH_REMOVE, NULL); rd_kafka_topic_destroy(topic); rd_kafka_destroy(kafka); rd_kafka_wait_destroyed(CONSUMER_TIMEOUT); }
/* * Main entry point for walwriter process * * This is invoked from BootstrapMain, which has already created the basic * execution environment, but not enabled signals yet. */ void WalWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext walwriter_context; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (walwriter probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * We have no particular use for SIGINT at the moment, but seems * reasonable to treat like SIGTERM. */ pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, WalShutdownHandler); /* request shutdown */ pqsignal(SIGTERM, WalShutdownHandler); /* request shutdown */ pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ pqsignal(SIGUSR2, SIG_IGN); /* not used */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (not clear that * we need this, but may as well have one). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ walwriter_context = AllocSetContextCreate(TopMemoryContext, "Wal Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(walwriter_context); /* * If an exception is encountered, processing resumes here. * * This code is heavily based on bgwriter.c, q.v. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in walwriter, but we do have LWLocks, and perhaps buffers? */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(walwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(walwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Loop forever */ for (;;) { long udelay; /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive(true)) exit(1); /* * Process any requests or signals received recently. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (shutdown_requested) { /* Normal exit from the walwriter is here */ proc_exit(0); /* done */ } /* * Do what we're here for... */ XLogBackgroundFlush(); /* * Delay until time to do something more, but fall out of delay * reasonably quickly if signaled. */ udelay = WalWriterDelay * 1000L; while (udelay > 999999L) { if (got_SIGHUP || shutdown_requested) break; pg_usleep(1000000L); udelay -= 1000000L; } if (!(got_SIGHUP || shutdown_requested)) pg_usleep(udelay); } }
/* * Main entry point for bgwriter process * * This is invoked from BootstrapMain, which has already created the basic * execution environment, but not enabled signals yet. */ void BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; am_bg_writer = true; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (bgwriter probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * SIGUSR1 is presently unused; keep it spare in case someday we want this * process to participate in ProcSignal signalling. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); /* as of 9.2 no longer requests checkpoint */ pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ pqsignal(SIGUSR2, SIG_IGN); /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(bgwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(bgwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Use the recovery target timeline ID during recovery */ if (RecoveryInProgress()) ThisTimeLineID = GetRecoveryTargetTLI(); /* * Loop forever */ for (;;) { /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive()) exit(1); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* update global shmem state for sync rep */ } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } /* * Do one cycle of dirty-buffer writing. */ BgBufferSync(); /* Nap for the configured time. */ BgWriterNap(); } }
void ContQuerySchedulerMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; List *dbs = NIL; /* we are a postmaster subprocess now */ IsUnderPostmaster = true; am_cont_scheduler = true; /* reset MyProcPid */ MyProcPid = getpid(); MyPMChildSlot = AssignPostmasterChildSlot(); /* record Start Time for logging */ MyStartTime = time(NULL); /* Identify myself via ps */ init_ps_display("continuous query scheduler process", "", "", ""); ereport(LOG, (errmsg("continuous query scheduler started"))); if (PostAuthDelay) pg_usleep(PostAuthDelay * 1000000L); SetProcessingMode(InitProcessing); /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. This is only for consistency sake, we * never fork the scheduler process. Instead dynamic bgworkers are used. */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Set up signal handlers. We operate on databases much like a regular * backend, so we use the same signal handling. See equivalent code in * tcop/postgres.c. */ pqsignal(SIGHUP, sighup_handler); pqsignal(SIGINT, sigint_handler); pqsignal(SIGTERM, sigterm_handler); pqsignal(SIGQUIT, quickdie); InitializeTimeouts(); /* establishes SIGALRM handler */ pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, sigusr2_handler); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); #define BACKTRACE_SEGFAULTS #ifdef BACKTRACE_SEGFAULTS pqsignal(SIGSEGV, debug_segfault); #endif /* Early initialization */ BaseInit(); /* * Create a per-backend PGPROC struct in shared memory, except in the * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do * this before we can use LWLocks (and in the EXEC_BACKEND case we already * had to do some stuff with LWLocks). */ #ifndef EXEC_BACKEND InitProcess(); #endif InitPostgres(NULL, InvalidOid, NULL, NULL); SetProcessingMode(NormalProcessing); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. */ ContQuerySchedulerMemCxt = AllocSetContextCreate(TopMemoryContext, "ContQuerySchedulerCtx", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(ContQuerySchedulerMemCxt); /* * If an exception is encountered, processing resumes here. * * This code is a stripped down version of PostgresMain error recovery. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Forget any pending QueryCancel or timeout request */ disable_all_timeouts(false); QueryCancelPending = false; /* second to avoid race condition */ /* Report the error to the server log */ EmitErrorReport(); /* Abort the current transaction in order to recover */ AbortCurrentTransaction(); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(ContQuerySchedulerMemCxt); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(ContQuerySchedulerMemCxt); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. We don't want to be * filling the error logs as fast as we can. */ pg_usleep(1000000L); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* must unblock signals before calling rebuild_database_list */ PG_SETMASK(&UnBlockSig); ContQuerySchedulerShmem->scheduler_pid = MyProcPid; dbs = get_database_list(); /* Loop forever */ for (;;) { ListCell *lc; int rc; foreach(lc, dbs) { DatabaseEntry *db_entry = lfirst(lc); bool found; ContQueryProcGroup *grp = hash_search(ContQuerySchedulerShmem->proc_table, &db_entry->oid, HASH_ENTER, &found); /* If we don't have an entry for this dboid, initialize a new one and fire off bg procs */ if (!found) { grp->db_oid = db_entry->oid; namestrcpy(&grp->db_name, NameStr(db_entry->name)); start_group(grp); } } /* Allow sinval catchup interrupts while sleeping */ EnableCatchupInterrupt(); /* * Wait until naptime expires or we get some type of signal (all the * signal handlers will wake us by calling SetLatch). */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, 0); ResetLatch(&MyProc->procLatch); DisableCatchupInterrupt(); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* the normal shutdown case */ if (got_SIGTERM) break; /* update config? */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* update tuning parameters, so that they can be read downstream by background processes */ update_tuning_params(); } /* terminate a proc group? */ if (got_SIGUSR2) { HASH_SEQ_STATUS status; ContQueryProcGroup *grp; got_SIGUSR2 = false; hash_seq_init(&status, ContQuerySchedulerShmem->proc_table); while ((grp = (ContQueryProcGroup *) hash_seq_search(&status)) != NULL) { ListCell *lc; if (!grp->terminate) continue; foreach(lc, dbs) { DatabaseEntry *entry = lfirst(lc); if (entry->oid == grp->db_oid) { dbs = list_delete(dbs, entry); break; } } terminate_group(grp); } }
static void WalSendServerDoRequest(WalSendRequest *walSendRequest) { bool successful; struct timeval standbyTimeout; WalSendServerGetStandbyTimeout(&standbyTimeout); switch (walSendRequest->command) { case PositionToEnd: elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "PositionToEnd"); successful = write_position_to_end(&originalEndLocation, NULL, &walsend_shutdown_requested); if (successful) elog(LOG,"Standby master returned transaction log end location %s", XLogLocationToString(&originalEndLocation)); else { disableQDMirroring_ConnectionError( "Unable to connect to standby master and determine transaction log end location", GetStandbyErrorString()); disconnectMirrorQD_SendClose(); } break; case Catchup: elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Catchup"); if (isQDMirroringCatchingUp()) { bool tooFarBehind = false; elog(LOG,"Current master transaction log is flushed through location %s", XLogLocationToString(&walSendRequest->flushedLocation)); if (XLByteLT(originalEndLocation, walSendRequest->flushedLocation)) { /* * Standby master is behind the primary. Send catchup WAL. */ /* * Use a TRY block to catch errors from our attempt to read * the primary's WAL. Errors from sending to the standby * come up as a boolean return (successful). */ PG_TRY(); { successful = XLogCatchupQDMirror( &originalEndLocation, &walSendRequest->flushedLocation, &standbyTimeout, &walsend_shutdown_requested); } PG_CATCH(); { /* * Report the error related to reading the primary's WAL * to the server log */ /* * But first demote the error to something much less * scary. */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } EmitErrorReport(); FlushErrorState(); successful = false; tooFarBehind = true; } PG_END_TRY(); if (successful) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "catchup send from standby end %s through primary flushed location %s", XLogLocationToString(&originalEndLocation), XLogLocationToString2(&walSendRequest->flushedLocation)); } } else if (XLByteEQ(originalEndLocation, walSendRequest->flushedLocation)) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Mirror was already caught up"); successful = true; } else { elog(WARNING,"Standby master transaction log location %s is beyond the current master end location %s", XLogLocationToString(&originalEndLocation), XLogLocationToString2(&walSendRequest->flushedLocation)); successful = false; } if (successful) { char detail[200]; int count; count = snprintf( detail, sizeof(detail), "Transaction log copied from locations %s through %s to the standby master", XLogLocationToString(&originalEndLocation), XLogLocationToString2(&walSendRequest->flushedLocation)); if (count >= sizeof(detail)) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("format command string failure"))); } enableQDMirroring("Master mirroring is now synchronized", detail); currentEndLocation = walSendRequest->flushedLocation; periodicLen = 0; periodicLocation = currentEndLocation; } else { if (tooFarBehind) { disableQDMirroring_TooFarBehind( "The current master was unable to synchronize the standby master " "because the transaction logs on the current master were recycled. " "A gpinitstandby (at an appropriate time) will be necessary to copy " "over the whole master database to the standby master so it may be synchronized"); } else { disableQDMirroring_ConnectionError( "Connection to the standby master was lost during transaction log catchup", GetStandbyErrorString()); } disconnectMirrorQD_SendClose(); } } else if (isQDMirroringDisabled()) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not catching-up (state is disabled)"); } else { elog(ERROR,"unexpected master mirroring state %s", QDMirroringStateString()); } break; case WriteWalPages: if (Debug_print_qd_mirroring) elog(LOG, "WriteWalPages"); if (isQDMirroringEnabled()) { char *from; Size nbytes; bool more= false; /* * For now, save copy of data until flush. This could be * optimized. */ if (saveBuffer == NULL) { uint32 totalBufferLen = XLOGbuffers * XLOG_BLCKSZ; saveBuffer = malloc(totalBufferLen); if (saveBuffer == NULL) elog(ERROR,"Could not allocate buffer for xlog data (%d bytes)", totalBufferLen); saveBufferLen = 0; } XLogGetBuffer(walSendRequest->startidx, walSendRequest->npages, &from, &nbytes); if (saveBufferLen == 0) { more = false; writeLogId = walSendRequest->logId; writeLogSeg = walSendRequest->logSeg; writeLogOff = walSendRequest->logOff; memcpy(saveBuffer, from, nbytes); saveBufferLen = nbytes; } else { more = true; memcpy(&saveBuffer[saveBufferLen], from, nbytes); saveBufferLen += nbytes; } if (Debug_print_qd_mirroring) elog(LOG, "Master Mirror Send: WriteWalPages (%s) startidx %d, npages %d, timeLineID %d, logId %u, logSeg %u, logOff 0x%X, nbytes 0x%X", (more ? "more" : "new"), walSendRequest->startidx, walSendRequest->npages, walSendRequest->timeLineID, walSendRequest->logId, walSendRequest->logSeg, walSendRequest->logOff, (int)nbytes); } case FlushWalPages: if (Debug_print_qd_mirroring) elog(LOG, "FlushWalPages"); if (isQDMirroringEnabled()) { char cmd[MAXFNAMELEN + 50]; if (saveBufferLen == 0) successful = true; else { if (snprintf(cmd, sizeof(cmd),"xlog %d %d %d %d", writeLogId, writeLogSeg, writeLogOff, (int)saveBufferLen) >= sizeof(cmd)) elog(ERROR,"could not create cmd for qd mirror logid %d seg %d", writeLogId, writeLogSeg); successful = write_qd_sync(cmd, saveBuffer, saveBufferLen, &standbyTimeout, &walsend_shutdown_requested); if (successful) { XLogRecPtr oldEndLocation; oldEndLocation = currentEndLocation; currentEndLocation.xlogid = writeLogId; currentEndLocation.xrecoff = writeLogSeg * XLogSegSize + writeLogOff; if (currentEndLocation.xrecoff >= XLogFileSize) { (currentEndLocation.xlogid)++; currentEndLocation.xrecoff = 0; } if (XLByteLT(oldEndLocation,currentEndLocation)) { periodicLen += saveBufferLen; if (periodicLen > periodicReportLen) { elog(LOG, "Master mirroring periodic report: %d bytes successfully send to standby master for locations %s through %s", periodicLen, XLogLocationToString(&periodicLocation), XLogLocationToString2(¤tEndLocation)); periodicLen = 0; periodicLocation = currentEndLocation; } } else { if (Debug_print_qd_mirroring) elog(LOG, "Send to Master mirror successful. New end location %s (old %s)", XLogLocationToString(¤tEndLocation), XLogLocationToString2(&oldEndLocation)); } } else { disableQDMirroring_ConnectionError( "Connection to the standby master was lost attempting to send new transaction log", GetStandbyErrorString()); disconnectMirrorQD_SendClose(); } /* * Reset so WriteWalPages can fill the buffer again. */ saveBufferLen = 0; writeLogId = 0; writeLogSeg = 0; writeLogOff = 0; } if (successful && walSendRequest->haveNewCheckpointLocation) { uint32 logid; uint32 seg; uint32 offset; elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"New previous checkpoint location %s", XLogLocationToString(&walSendRequest->newCheckpointLocation)); XLByteToSeg(walSendRequest->newCheckpointLocation, logid, seg); offset = walSendRequest->newCheckpointLocation.xrecoff % XLogSegSize; if (snprintf(cmd, sizeof(cmd),"new_checkpoint_location %d %d %d", logid, seg, offset) >= sizeof(cmd)) elog(ERROR,"could not create cmd for qd mirror logid %d seg %d offset %d", logid, seg, offset); successful = write_qd_sync(cmd, NULL, 0, NULL, &walsend_shutdown_requested); if (successful) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Send of new checkpoint location to master mirror successful"); } else { disableQDMirroring_ConnectionError( "Connection to the standby master was lost attempting to send new checkpoint location", GetStandbyErrorString()); disconnectMirrorQD_SendClose(); } } } else if (isQDMirroringDisabled()) { elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not enabled"); } else { elog(ERROR,"unexpected master mirroring state %s", QDMirroringStateString()); } break; case CloseForShutdown: if (Debug_print_qd_mirroring) elog(LOG, "CloseForShutdown"); /* * Do the work we would normally do when signaled to stop. */ WalSendServer_ServiceShutdown(); break; default: elog(ERROR, "Unknown WalSendRequestCommand %d", walSendRequest->command); } }
/* fork heartbeat sender child */ pid_t wd_hb_sender(int fork_wait_time, WdHbIf *hb_if) { int sock; pid_t pid = 0; WdHbPacket pkt; WdInfo * p = WD_List; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; sigjmp_buf local_sigjmp_buf; pid = fork(); if (pid != 0) { if (pid == -1) ereport(PANIC, (errmsg("failed to fork a heartbeat sender child"))); return pid; } on_exit_reset(); processType = PT_HB_SENDER; if (fork_wait_time > 0) { sleep(fork_wait_time); } POOL_SETMASK(&UnBlockSig); signal(SIGTERM, hb_sender_exit); signal(SIGINT, hb_sender_exit); signal(SIGQUIT, hb_sender_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); /* Create per loop iteration memory context */ ProcessLoopContext = AllocSetContextCreate(TopMemoryContext, "wdhb_sender", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); sock = wd_create_hb_send_socket(hb_if); set_ps_display("heartbeat sender", false); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); sleep(pool_config->wd_heartbeat_keepalive); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; for(;;) { MemoryContextSwitchTo(ProcessLoopContext); MemoryContextResetAndDeleteChildren(ProcessLoopContext); /* contents of packet */ gettimeofday(&pkt.send_time, NULL); strlcpy(pkt.from, pool_config->wd_hostname, sizeof(pkt.from)); pkt.from_pgpool_port = pool_config->port; pkt.status = p->status; /* authentication key */ if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = packet_to_string_hb(&pkt, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, pkt.hash); } /* send heartbeat signal */ wd_hb_send(sock, &pkt, sizeof(pkt), hb_if->addr, hb_if->dest_port); ereport(DEBUG1, (errmsg("watchdog heartbeat: send heartbeat signal to %s:%d", hb_if->addr, hb_if->dest_port))); sleep(pool_config->wd_heartbeat_keepalive); } return pid; }
/* fork heartbeat receiver child */ pid_t wd_hb_receiver(int fork_wait_time, WdHbIf *hb_if) { int sock; pid_t pid = 0; WdHbPacket pkt; struct timeval tv; char from[WD_MAX_HOST_NAMELEN]; int from_pgpool_port; char buf[(MD5_PASSWD_LEN+1)*2]; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; sigjmp_buf local_sigjmp_buf; WdInfo * p; pid = fork(); if (pid != 0) { if (pid == -1) ereport(PANIC, (errmsg("failed to fork a heartbeat receiver child"))); return pid; } on_exit_reset(); processType = PT_HB_RECEIVER; if (fork_wait_time > 0) { sleep(fork_wait_time); } POOL_SETMASK(&UnBlockSig); signal(SIGTERM, hb_receiver_exit); signal(SIGINT, hb_receiver_exit); signal(SIGQUIT, hb_receiver_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); /* Create per loop iteration memory context */ ProcessLoopContext = AllocSetContextCreate(TopMemoryContext, "wdhb_hb_receiver", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); sock = wd_create_hb_recv_socket(hb_if); set_ps_display("heartbeat receiver", false); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; for(;;) { MemoryContextSwitchTo(ProcessLoopContext); MemoryContextResetAndDeleteChildren(ProcessLoopContext); /* receive heartbeat signal */ wd_hb_recv(sock, &pkt); /* authentication */ if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = packet_to_string_hb(&pkt, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, buf); if (strcmp(pkt.hash, buf)) ereport(ERROR, (errmsg("watchdog heartbeat receive"), errdetail("authentication failed"))); } /* get current time */ gettimeofday(&tv, NULL); /* who send this packet? */ strlcpy(from, pkt.from, sizeof(from)); from_pgpool_port = pkt.from_pgpool_port; p = WD_List; while (p->status != WD_END) { if (!strcmp(p->hostname, from) && p->pgpool_port == from_pgpool_port) { /* ignore the packet from down pgpool */ if (pkt.status == WD_DOWN) { ereport(DEBUG1, (errmsg("watchdog heartbeat: received heartbeat signal from \"%s:%d\" whose status is down. ignored", from, from_pgpool_port))); break; } /* this is the first packet or the latest packet */ if (!WD_TIME_ISSET(p->hb_send_time) || WD_TIME_BEFORE(p->hb_send_time, pkt.send_time)) { ereport(DEBUG1, (errmsg("watchdog heartbeat: received heartbeat signal from \"%s:%d\"", from, from_pgpool_port))); p->hb_send_time = pkt.send_time; p->hb_last_recv_time = tv; } else { ereport(DEBUG1, (errmsg("watchdog heartbeat: received heartbeat signal is older than the latest, ignored"))); } break; } p++; } } return pid; }
/* * Common service main. */ void ServiceMain(ServiceCtrl *serviceCtrl) { ServiceConfig *serviceConfig; sigjmp_buf local_sigjmp_buf; Assert(serviceCtrl != NULL); serviceConfig = (ServiceConfig*)serviceCtrl->serviceConfig; Assert(serviceConfig != NULL); IsUnderPostmaster = true; /* reset MyProcPid */ MyProcPid = getpid(); /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* Identify myself via ps */ init_ps_display(serviceConfig->psTitle, "", "", ""); if (serviceConfig->ServiceEarlyInit != NULL) { serviceConfig->ServiceEarlyInit(); } else { SetProcessingMode(InitProcessing); } /* * Set up signal handlers. We operate on databases much like a regular * backend, so we use the same signal handling. See equivalent code in * tcop/postgres.c. * * Currently, we don't pay attention to postgresql.conf changes that * happen during a single daemon iteration, so we can ignore SIGHUP. */ pqsignal(SIGHUP, SIG_IGN); /* * Presently, SIGINT will lead to autovacuum shutdown, because that's how * we handle ereport(ERROR). It could be improved however. */ pqsignal(SIGINT, StatementCancelHandler); pqsignal(SIGTERM, ServiceDie); pqsignal(SIGQUIT, ServiceQuickDie); pqsignal(SIGALRM, handle_sig_alarm); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, procsignal_sigusr1_handler); /* We don't listen for async notifies */ pqsignal(SIGUSR2, serviceConfig->ServiceRequestShutdown); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); #ifdef SIGBUS pqsignal(SIGBUS, HandleCrash); #endif #ifdef SIGILL pqsignal(SIGILL, HandleCrash); #endif #ifdef SIGSEGV pqsignal(SIGSEGV, HandleCrash); #endif /* Early initialization */ BaseInit(); if (serviceConfig->ServicePostgresInit != NULL) { serviceConfig->ServicePostgresInit(); } SetProcessingMode(NormalProcessing); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * We can now go away. Note that because we'll call InitProcess, a * callback will be registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); /* set up a listener port and put it in shmem*/ serviceCtrl->listenerPort = ServiceListenerSetup(serviceCtrl); if (serviceConfig->ServiceInit != NULL) { serviceConfig->ServiceInit(serviceCtrl->listenerPort); } /* listen loop */ ServiceListenLoop(serviceCtrl); proc_exit(0); }
static bool ServiceClientPollRead(ServiceClient *serviceClient, void* response, int responseLen, bool *pollResponseReceived) { ServiceConfig *serviceConfig; int n; int saved_err; char *message; bool result = false; DECLARE_SAVE_SUPPRESS_PANIC(); Assert(serviceClient != NULL); serviceConfig = serviceClient->serviceConfig; Assert(serviceConfig != NULL); Assert(response != NULL); PG_TRY(); { SUPPRESS_PANIC(); /* * Attempt to read the response */ while (true) { n = read(serviceClient->sockfd, ((char *)response), responseLen); saved_err = errno; if (n == 0) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Connection to '%s' is closed", serviceConfig->title))); } else if (n < 0) { if (saved_err == EWOULDBLOCK) { *pollResponseReceived = false; break; } if (saved_err == EINTR) continue; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s", serviceConfig->title, strerror(saved_err)))); } if (n != responseLen) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Expecting message length %d and actual read length was %d from '%s'", responseLen, n, serviceConfig->title))); return false; } *pollResponseReceived = true; break; } result = true; RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
static bool ServiceClientRead(ServiceClient *serviceClient, void* response, int responseLen, struct timeval *timeout) { ServiceConfig *serviceConfig; int n; int bytesRead = 0; int saved_err; char *message; bool result = false; mpp_fd_set rset; struct timeval rundownTimeout = {0,0}; // Use local variable since select modifies // the timeout parameter with remaining time. DECLARE_SAVE_SUPPRESS_PANIC(); Assert(serviceClient != NULL); serviceConfig = serviceClient->serviceConfig; Assert(serviceConfig != NULL); Assert(response != NULL); if (timeout != NULL) rundownTimeout = *timeout; PG_TRY(); { SUPPRESS_PANIC(); /* * read the response */ while (bytesRead < responseLen) { n = read(serviceClient->sockfd, ((char *)response) + bytesRead, responseLen - bytesRead); saved_err = errno; if (n == 0) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Connection to '%s' is closed (%d)", serviceConfig->title, serviceClient->sockfd))); } if (n < 0) { if (saved_err != EINTR && saved_err != EWOULDBLOCK) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s (%d)", serviceConfig->title, strerror(saved_err), serviceClient->sockfd))); } if (saved_err == EWOULDBLOCK) { /* we shouldn't really get here since we are dealing with * small messages, but once we've read a bit of data we * need to finish out reading till we get the message (or error) */ do { MPP_FD_ZERO(&rset); MPP_FD_SET(serviceClient->sockfd, &rset); n = select(serviceClient->sockfd + 1, (fd_set *)&rset, NULL, NULL, (timeout == NULL ? NULL : &rundownTimeout)); if (n == 0) { if (timeout != NULL) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read from '%s' timed out after %d.%03d seconds", serviceConfig->title, (int)timeout->tv_sec, (int)timeout->tv_usec / 1000))); } } else if (n < 0 && errno == EINTR) continue; else if (n < 0) { saved_err = errno; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s (%d)", serviceConfig->title, strerror(saved_err), serviceClient->sockfd))); } } while (n < 1); } /* else saved_err == EINTR */ continue; } else bytesRead += n; } result = true; RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
/* * Main entry point for walwriter process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void WalWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext walwriter_context; int left_till_hibernate; bool hibernating; /* * Properly accept or ignore signals the postmaster might send us * * We have no particular use for SIGINT at the moment, but seems * reasonable to treat like SIGTERM. */ pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, WalShutdownHandler); /* request shutdown */ pqsignal(SIGTERM, WalShutdownHandler); /* request shutdown */ pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, walwriter_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* not used */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (not clear that * we need this, but may as well have one). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ walwriter_context = AllocSetContextCreate(TopMemoryContext, "Wal Writer", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(walwriter_context); /* * If an exception is encountered, processing resumes here. * * This code is heavily based on bgwriter.c, q.v. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in walwriter, but we do have LWLocks, and perhaps buffers? */ LWLockReleaseAll(); ConditionVariableCancelSleep(); pgstat_report_wait_end(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(walwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(walwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Reset hibernation state after any error. */ left_till_hibernate = LOOPS_UNTIL_HIBERNATE; hibernating = false; SetWalWriterSleeping(false); /* * Advertise our latch that backends can use to wake us up while we're * sleeping. */ ProcGlobal->walwriterLatch = &MyProc->procLatch; /* * Loop forever */ for (;;) { long cur_timeout; int rc; /* * Advertise whether we might hibernate in this cycle. We do this * before resetting the latch to ensure that any async commits will * see the flag set if they might possibly need to wake us up, and * that we won't miss any signal they send us. (If we discover work * to do in the last cycle before we would hibernate, the global flag * will be set unnecessarily, but little harm is done.) But avoid * touching the global flag if it doesn't need to change. */ if (hibernating != (left_till_hibernate <= 1)) { hibernating = (left_till_hibernate <= 1); SetWalWriterSleeping(hibernating); } /* Clear any already-pending wakeups */ ResetLatch(MyLatch); /* * Process any requests or signals received recently. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (shutdown_requested) { /* Normal exit from the walwriter is here */ proc_exit(0); /* done */ } /* * Do what we're here for; then, if XLogBackgroundFlush() found useful * work to do, reset hibernation counter. */ if (XLogBackgroundFlush()) left_till_hibernate = LOOPS_UNTIL_HIBERNATE; else if (left_till_hibernate > 0) left_till_hibernate--; /* * Sleep until we are signaled or WalWriterDelay has elapsed. If we * haven't done anything useful for quite some time, lengthen the * sleep time so as to reduce the server's idle power consumption. */ if (left_till_hibernate > 0) cur_timeout = WalWriterDelay; /* in ms */ else cur_timeout = WalWriterDelay * HIBERNATE_FACTOR; rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, cur_timeout, WAIT_EVENT_WAL_WRITER_MAIN); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); } }
void FileRepResetPeer_Main(void) { /* BASIC PROCESS SETUP */ FileRepReset_ConfigureSignals(); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding and comments about how the error * handling works. */ sigjmp_buf local_sigjmp_buf; if (sigsetjmp(local_sigjmp_buf, 1) != 0) { HOLD_INTERRUPTS(); EmitErrorReport(); proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); /** NOW DO THE ACTUAL WORK */ char messageFromPeer[MESSAGE_FROM_PEER_BUF_SIZE]; char resetNumberFromPeer[MESSAGE_FROM_PEER_BUF_SIZE]; char resetNumberThatIndicatesResetComplete[MESSAGE_FROM_PEER_BUF_SIZE]; struct addrinfo *addrList = NULL; char portStr[100]; PrimaryMirrorModeTransitionArguments args = primaryMirrorGetArgumentsFromLocalMemory(); Assert(args.mode == PMModePrimarySegment || args.mode == PMModeMirrorSegment); snprintf(portStr, sizeof(portStr), "%d", args.peerPostmasterPort); if (! determineTargetHost(&addrList, args.peerAddress, portStr)) { elog(WARNING, "during reset, unable to look up address for peer host to coordinate reset; " "will transition to fault state."); proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT); } sendMessageToPeerAndExitIfProblem(addrList, "beginPostmasterReset", messageFromPeer, resetNumberThatIndicatesResetComplete); for ( ;; ) { pg_usleep(10 * 1000L); /* 10 ms */ sendMessageToPeerAndExitIfProblem(addrList, "getPostmasterResetStatus", messageFromPeer, resetNumberFromPeer ); if (strequals(messageFromPeer, RESET_STATUS_IS_IN_RESET_PIVOT_POINT)) { if (args.mode == PMModeMirrorSegment) { /** * peer is in the reset pivot point, we can break out of our checking loop and * thus exit with a code telling the postmaster to begin the startup sequence again * * this is only done on the mirror as currently the mirror must execute the startup sequence * before the primary */ elog(DEBUG1, "peer reset: primary peer has reached reset point"); break; } } else if (strequals(messageFromPeer, RESET_STATUS_IS_RUNNING)) { /** it's running -- is it >= than the reset number that indicates reset complete one */ if (strcmp( resetNumberFromPeer, resetNumberThatIndicatesResetComplete) >= 0) { /** yes, the reset is complete and so we can quit and do a restart */ elog(DEBUG1, "peer reset: mirror peer reset is complete"); break; } } } proc_exit(EXIT_CODE_SHOULD_RESTART_SHMEM_CLEANLY); }
/* * Main entry point for bgwriter process * * This is invoked from BootstrapMain, which has already created the basic * execution environment, but not enabled signals yet. */ void BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; BgWriterShmem->bgwriter_pid = MyProcPid; am_bg_writer = true; /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (bgwriter probably never has any * child processes, but for consistency we make all postmaster child * processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Properly accept or ignore signals the postmaster might send us * * Note: we deliberately ignore SIGTERM, because during a standard Unix * system shutdown cycle, init will SIGTERM all processes at once. We * want to wait for the backends to exit, whereupon the postmaster will * tell us it's okay to shut down (via SIGUSR2). * * SIGUSR1 is presently unused; keep it spare in case someday we want this * process to participate in sinval messaging. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */ pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */ pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ #ifdef HAVE_SIGPROCMASK sigdelset(&BlockSig, SIGQUIT); #else BlockSig &= ~(sigmask(SIGQUIT)); #endif /* * Initialize so that first time-driven event happens at the correct time. */ last_checkpoint_time = last_xlog_switch_time = time(NULL); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_Files(); AtEOXact_HashTables(false); /* Warn any waiting backends that the checkpoint failed. */ if (ckpt_active) { /* use volatile pointer to prevent code rearrangement */ volatile BgWriterShmemStruct *bgs = BgWriterShmem; SpinLockAcquire(&bgs->ckpt_lck); bgs->ckpt_failed++; bgs->ckpt_done = bgs->ckpt_started; SpinLockRelease(&bgs->ckpt_lck); ckpt_active = false; } /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(bgwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(bgwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Loop forever */ for (;;) { bool do_checkpoint = false; int flags = 0; time_t now; int elapsed_secs; /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive(true)) exit(1); /* * Process any requests or signals received recently. */ AbsorbFsyncRequests(); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (checkpoint_requested) { checkpoint_requested = false; do_checkpoint = true; BgWriterStats.m_requested_checkpoints++; } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Close down the database */ ShutdownXLOG(0, 0); DumpFreeSpaceMap(0, 0); /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } /* * Force a checkpoint if too much time has elapsed since the last one. * Note that we count a timed checkpoint in stats only when this * occurs without an external request, but we set the CAUSE_TIME flag * bit even if there is also an external request. */ now = time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) BgWriterStats.m_timed_checkpoints++; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } /* * Do a checkpoint if requested, otherwise do one cycle of * dirty-buffer writing. */ if (do_checkpoint) { /* use volatile pointer to prevent code rearrangement */ volatile BgWriterShmemStruct *bgs = BgWriterShmem; /* * Atomically fetch the request flags to figure out what kind of a * checkpoint we should perform, and increase the started-counter * to acknowledge that we've started a new checkpoint. */ SpinLockAcquire(&bgs->ckpt_lck); flags |= bgs->ckpt_flags; bgs->ckpt_flags = 0; bgs->ckpt_started++; SpinLockRelease(&bgs->ckpt_lck); /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag * since the last checkpoint start. Note in particular that this * implementation will not generate warnings caused by * CheckPointTimeout < CheckPointWarning. */ if ((flags & CHECKPOINT_CAUSE_XLOG) && elapsed_secs < CheckPointWarning) ereport(LOG, (errmsg("checkpoints are occurring too frequently (%d seconds apart)", elapsed_secs), errhint("Consider increasing the configuration parameter \"checkpoint_segments\"."))); /* * Initialize bgwriter-private variables used during checkpoint. */ ckpt_active = true; ckpt_start_recptr = GetInsertRecPtr(); ckpt_start_time = now; ckpt_cached_elapsed = 0; /* * Do the checkpoint. */ CreateCheckPoint(flags); /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); /* * Indicate checkpoint completion to any waiting backends. */ SpinLockAcquire(&bgs->ckpt_lck); bgs->ckpt_done = bgs->ckpt_started; SpinLockRelease(&bgs->ckpt_lck); ckpt_active = false; /* * Note we record the checkpoint start time not end time as * last_checkpoint_time. This is so that time-driven checkpoints * happen at a predictable spacing. */ last_checkpoint_time = now; } else BgBufferSync(); /* Check for archive_timeout and switch xlog files if necessary. */ CheckArchiveTimeout(); /* Nap for the configured time. */ BgWriterNap(); } }
/** * This method is called after fork of the sweeper process. It sets up signal * handlers and does initialization that is required by a postgres backend. */ NON_EXEC_STATIC void BackoffSweeperMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; IsUnderPostmaster = true; isSweeperProcess = true; /* Stay away from PMChildSlot */ MyPMChildSlot = -1; /* reset MyProcPid */ MyProcPid = getpid(); /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* Identify myself via ps */ init_ps_display("sweeper process", "", "", ""); SetProcessingMode(InitProcessing); /* * Set up signal handlers. We operate on databases much like a regular * backend, so we use the same signal handling. See equivalent code in * tcop/postgres.c. */ pqsignal(SIGHUP, SIG_IGN); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); pqsignal(SIGTERM, die); pqsignal(SIGQUIT, quickdie); pqsignal(SIGUSR2, BackoffRequestShutdown); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); /* * Copied from bgwriter */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Sweeper process"); /* Early initialization */ BaseInit(); /* See InitPostgres()... */ InitProcess(); SetProcessingMode(NormalProcessing); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * We can now go away. Note that because we'll call InitProcess, a * callback will be registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); MyBackendId = InvalidBackendId; /* main loop */ BackoffSweeperLoop(); /* One iteration done, go away */ proc_exit(0); }
/* * AutoVacMain */ NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]) { ListCell *cell; List *dblist; autovac_dbase *db; TransactionId xidForceLimit; bool for_xid_wrap; sigjmp_buf local_sigjmp_buf; /* we are a postmaster subprocess now */ IsUnderPostmaster = true; am_autovacuum = true; /* MPP-4990: Autovacuum always runs as utility-mode */ Gp_role = GP_ROLE_UTILITY; /* reset MyProcPid */ MyProcPid = getpid(); /* record Start Time for logging */ MyStartTime = time(NULL); /* Identify myself via ps */ init_ps_display("autovacuum process", "", "", ""); SetProcessingMode(InitProcessing); /* * If possible, make this process a group leader, so that the postmaster * can signal any child processes too. (autovacuum probably never has * any child processes, but for consistency we make all postmaster * child processes do this.) */ #ifdef HAVE_SETSID if (setsid() < 0) elog(FATAL, "setsid() failed: %m"); #endif /* * Set up signal handlers. We operate on databases much like a regular * backend, so we use the same signal handling. See equivalent code in * tcop/postgres.c. * * Currently, we don't pay attention to postgresql.conf changes that * happen during a single daemon iteration, so we can ignore SIGHUP. */ pqsignal(SIGHUP, SIG_IGN); /* * SIGINT is used to signal cancelling the current table's vacuum; SIGTERM * means abort and exit cleanly, and SIGQUIT means abandon ship. */ pqsignal(SIGINT, StatementCancelHandler); pqsignal(SIGTERM, die); pqsignal(SIGQUIT, quickdie); pqsignal(SIGALRM, handle_sig_alarm); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, procsignal_sigusr1_handler); /* We don't listen for async notifies */ pqsignal(SIGUSR2, SIG_IGN); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); /* Early initialization */ BaseInit(); /* * Create a per-backend PGPROC struct in shared memory, except in the * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do * this before we can use LWLocks (and in the EXEC_BACKEND case we already * had to do some stuff with LWLocks). */ #ifndef EXEC_BACKEND InitProcess(); #endif /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * We can now go away. Note that because we called InitProcess, a * callback was registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); /* * Force zero_damaged_pages OFF in the autovac process, even if it is set * in postgresql.conf. We don't really want such a dangerous option being * applied non-interactively. */ SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE); /* Get a list of databases */ dblist = autovac_get_database_list(); /* * Determine the oldest datfrozenxid/relfrozenxid that we will allow * to pass without forcing a vacuum. (This limit can be tightened for * particular tables, but not loosened.) */ recentXid = ReadNewTransactionId(); xidForceLimit = recentXid - autovacuum_freeze_max_age; /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */ if (xidForceLimit < FirstNormalTransactionId) xidForceLimit -= FirstNormalTransactionId; /* * Choose a database to connect to. We pick the database that was least * recently auto-vacuumed, or one that needs vacuuming to prevent Xid * wraparound-related data loss. If any db at risk of wraparound is * found, we pick the one with oldest datfrozenxid, * independently of autovacuum times. * * Note that a database with no stats entry is not considered, except for * Xid wraparound purposes. The theory is that if no one has ever * connected to it since the stats were last initialized, it doesn't need * vacuuming. * * XXX This could be improved if we had more info about whether it needs * vacuuming before connecting to it. Perhaps look through the pgstats * data for the database's tables? One idea is to keep track of the * number of new and dead tuples per database in pgstats. However it * isn't clear how to construct a metric that measures that and not cause * starvation for less busy databases. */ db = NULL; for_xid_wrap = false; foreach(cell, dblist) { autovac_dbase *tmp = lfirst(cell); /* Find pgstat entry if any */ tmp->entry = pgstat_fetch_stat_dbentry(tmp->oid); /* Check to see if this one is at risk of wraparound */ if (TransactionIdPrecedes(tmp->frozenxid, xidForceLimit)) { if (db == NULL || TransactionIdPrecedes(tmp->frozenxid, db->frozenxid)) db = tmp; for_xid_wrap = true; continue; } else if (for_xid_wrap) continue; /* ignore not-at-risk DBs */ /* * Otherwise, skip a database with no pgstat entry; it means it * hasn't seen any activity. */ if (!tmp->entry) continue; /* * Remember the db with oldest autovac time. (If we are here, * both tmp->entry and db->entry must be non-null.) */ if (db == NULL || tmp->entry->last_autovac_time < db->entry->last_autovac_time) db = tmp; }
/* * Main entry point for bgwriter process * * This is invoked from AuxiliaryProcessMain, which has already created the * basic execution environment, but not enabled signals yet. */ void BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; bool prev_hibernate; /* * Properly accept or ignore signals the postmaster might send us. * * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1 * handler is still needed for latch wakeups. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, bgwriter_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* * Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); /* We allow SIGQUIT (quickdie) at all times */ sigdelset(&BlockSig, SIGQUIT); /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer"); /* * We just started, assume there has been either a shutdown or * end-of-recovery snapshot. */ last_snapshot_ts = GetCurrentTimestamp(); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. Formerly this code just ran in * TopMemoryContext, but resetting that would be a really bad idea. */ bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry * about in bgwriter, but we do have LWLocks, buffers, and temp files. */ LWLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); /* we needn't bother with the other ResourceOwnerRelease phases */ AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. */ MemoryContextSwitchTo(bgwriter_context); FlushErrorState(); /* Flush any leaked data in the top-level context */ MemoryContextResetAndDeleteChildren(bgwriter_context); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); /* * Sleep at least 1 second after any error. A write error is likely * to be repeated, and we don't want to be filling the error logs as * fast as we can. */ pg_usleep(1000000L); /* * Close all open files after any error. This is helpful on Windows, * where holding deleted files open causes various strange errors. * It's not clear we need it elsewhere, but shouldn't hurt. */ smgrcloseall(); /* Report wait end here, when there is no further possibility of wait */ pgstat_report_wait_end(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* * Unblock signals (they were blocked when the postmaster forked us) */ PG_SETMASK(&UnBlockSig); /* * Reset hibernation state after any error. */ prev_hibernate = false; /* * Loop forever */ for (;;) { bool can_hibernate; int rc; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (shutdown_requested) { /* * From here on, elog(ERROR) should end with exit(1), not send * control back to the sigsetjmp block above */ ExitOnAnyError = true; /* Normal exit from the bgwriter is here */ proc_exit(0); /* done */ } /* * Do one cycle of dirty-buffer writing. */ can_hibernate = BgBufferSync(); /* * Send off activity statistics to the stats collector */ pgstat_send_bgwriter(); if (FirstCallSinceLastCheckpoint()) { /* * After any checkpoint, close all smgr files. This is so we * won't hang onto smgr references to deleted files indefinitely. */ smgrcloseall(); } /* * Log a new xl_running_xacts every now and then so replication can * get into a consistent state faster (think of suboverflowed * snapshots) and clean up resources (locks, KnownXids*) more * frequently. The costs of this are relatively low, so doing it 4 * times (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine. * * We assume the interval for writing xl_running_xacts is * significantly bigger than BgWriterDelay, so we don't complicate the * overall timeout handling but just assume we're going to get called * often enough even if hibernation mode is active. It's not that * important that log_snap_interval_ms is met strictly. To make sure * we're not waking the disk up unnecessarily on an idle system we * check whether there has been any WAL inserted since the last time * we've logged a running xacts. * * We do this logging in the bgwriter as its the only process that is * run regularly and returns to its mainloop all the time. E.g. * Checkpointer, when active, is barely ever in its mainloop and thus * makes it hard to log regularly. */ if (XLogStandbyInfoActive() && !RecoveryInProgress()) { TimestampTz timeout = 0; TimestampTz now = GetCurrentTimestamp(); timeout = TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS); /* * only log if enough time has passed and some xlog record has * been inserted. */ if (now >= timeout && last_snapshot_lsn != GetXLogInsertRecPtr()) { last_snapshot_lsn = LogStandbySnapshot(); last_snapshot_ts = now; } } /* * Sleep until we are signaled or BgWriterDelay has elapsed. * * Note: the feedback control loop in BgBufferSync() expects that we * will call it every BgWriterDelay msec. While it's not critical for * correctness that that be exact, the feedback loop might misbehave * if we stray too far from that. Hence, avoid loading this process * down with latch events that are likely to happen frequently during * normal operation. */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, BgWriterDelay /* ms */ ); /* * If no latch event and BgBufferSync says nothing's happening, extend * the sleep in "hibernation" mode, where we sleep for much longer * than bgwriter_delay says. Fewer wakeups save electricity. When a * backend starts using buffers again, it will wake us up by setting * our latch. Because the extra sleep will persist only as long as no * buffer allocations happen, this should not distort the behavior of * BgBufferSync's control loop too badly; essentially, it will think * that the system-wide idle interval didn't exist. * * There is a race condition here, in that a backend might allocate a * buffer between the time BgBufferSync saw the alloc count as zero * and the time we call StrategyNotifyBgWriter. While it's not * critical that we not hibernate anyway, we try to reduce the odds of * that by only hibernating when BgBufferSync says nothing's happening * for two consecutive cycles. Also, we mitigate any possible * consequences of a missed wakeup by not hibernating forever. */ if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate) { /* Ask for notification at next buffer allocation */ StrategyNotifyBgWriter(MyProc->pgprocno); /* Sleep ... */ rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, BgWriterDelay * HIBERNATE_FACTOR); /* Reset the notification request in case we timed out */ StrategyNotifyBgWriter(-1); } /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (rc & WL_POSTMASTER_DEATH) exit(1); prev_hibernate = can_hibernate; } }
/* * There are a few ways to arrive in the initsequencer. * 1. From _PG_init (called exactly once when the library is loaded for ANY * reason). * 1a. Because of the command LOAD 'libraryname'; * This case can be distinguished because _PG_init will have found the * LOAD command and saved the 'libraryname' in pljavaLoadPath. * 1b. Because of a CREATE FUNCTION naming this library. pljavaLoadPath will * be NULL. * 1c. By the first actual use of a PL/Java function, causing this library * to be loaded. pljavaLoadPath will be NULL. The called function's Oid * will be available to the call handler once we return from _PG_init, * but it isn't (easily) available here. * 2. From the call handler, if initialization isn't complete yet. That can only * mean something failed in the earlier call to _PG_init, and whatever it was * is highly likely to fail again. That may lead to the untidyness of * duplicated diagnostic messages, but for now I like the belt-and-suspenders * approach of making sure the init sequence gets as many chances as possible * to succeed. * 3. From a GUC assign hook, if the user has updated a setting that might allow * initialization to succeed. It resumes from where it left off. * * In all cases, the sequence must progress as far as starting the VM and * initializing the PL/Java classes. In all cases except 1a, that's enough, * assuming the language handlers and schema have all been set up already (or, * in case 1b, the user is intent on setting them up explicitly). * * In case 1a, we can go ahead and test for, and create, the schema, functions, * and language entries as needed, using pljavaLoadPath as the library path * if creating the language handler functions. One-stop shopping. (The presence * of pljavaLoadPath in any of the other cases, such as resumption by an assign * hook, indicates it is really a continuation of case 1a.) */ static void initsequencer(enum initstage is, bool tolerant) { JVMOptList optList; Invocation ctx; jint JNIresult; char *greeting; switch (is) { case IS_FORMLESS_VOID: initstage = IS_GUCS_REGISTERED; case IS_GUCS_REGISTERED: libjvmlocation = strdup("libjvm.so"); initstage = IS_PLJAVA_ENABLED; case IS_PLJAVA_ENABLED: libjvm_handle = pg_dlopen(libjvmlocation); if ( NULL == libjvm_handle ) { ereport(ERROR, ( errmsg("Cannot load libjvm.so library, check that it is available in LD_LIBRARY_PATH"), errdetail("%s", (char *)pg_dlerror()))); goto check_tolerant; } initstage = IS_CAND_JVMOPENED; case IS_CAND_JVMOPENED: pljava_createvm = (jint (JNICALL *)(JavaVM **, void **, void *)) pg_dlsym(libjvm_handle, "JNI_CreateJavaVM"); if ( NULL == pljava_createvm ) { /* * If it hasn't got the symbol, it can't be the right * library, so close/unload it so another can be tried. * Format the dlerror string first: dlclose may clobber it. */ char *dle = MemoryContextStrdup(ErrorContext, pg_dlerror()); pg_dlclose(libjvm_handle); initstage = IS_CAND_JVMLOCATION; ereport(ERROR, ( errmsg("Cannot start Java VM"), errdetail("%s", dle), errhint("Check that libjvm.so is available in LD_LIBRARY_PATH"))); goto check_tolerant; } initstage = IS_CREATEVM_SYM_FOUND; case IS_CREATEVM_SYM_FOUND: s_javaLogLevel = INFO; checkIntTimeType(); HashMap_initialize(); /* creates things in TopMemoryContext */ #ifdef PLJAVA_DEBUG /* Hard setting for debug. Don't forget to recompile... */ pljava_debug = 1; #endif initstage = IS_MISC_ONCE_DONE; case IS_MISC_ONCE_DONE: JVMOptList_init(&optList); /* uses CurrentMemoryContext */ seenVisualVMName = false; addUserJVMOptions(&optList); if ( ! seenVisualVMName ) JVMOptList_addVisualVMName(&optList); JVMOptList_add(&optList, "vfprintf", (void*)my_vfprintf, true); #ifndef GCJ JVMOptList_add(&optList, "-Xrs", 0, true); #endif effectiveClassPath = getClassPath("-Djava.class.path="); if(effectiveClassPath != 0) { JVMOptList_add(&optList, effectiveClassPath, 0, true); } initstage = IS_JAVAVM_OPTLIST; case IS_JAVAVM_OPTLIST: JNIresult = initializeJavaVM(&optList); /* frees the optList */ if( JNI_OK != JNIresult ) { initstage = IS_MISC_ONCE_DONE; /* optList has been freed */ StaticAssertStmt(sizeof(jint) <= sizeof(long int), "jint wider than long int?!"); ereport(WARNING, (errmsg("failed to create Java virtual machine"), errdetail("JNI_CreateJavaVM returned an error code: %ld", (long int)JNIresult), jvmStartedAtLeastOnce ? errhint("Because an earlier attempt during this session " "did start a VM before failing, this probably means your " "Java runtime environment does not support more than one " "VM creation per session. You may need to exit this " "session and start a new one.") : 0)); goto check_tolerant; } jvmStartedAtLeastOnce = true; elog(DEBUG2, "successfully created Java virtual machine"); initstage = IS_JAVAVM_STARTED; case IS_JAVAVM_STARTED: #ifdef USE_PLJAVA_SIGHANDLERS pqsignal(SIGINT, pljavaStatementCancelHandler); pqsignal(SIGTERM, pljavaDieHandler); #endif /* Register an on_proc_exit handler that destroys the VM */ on_proc_exit(_destroyJavaVM, 0); initstage = IS_SIGHANDLERS; case IS_SIGHANDLERS: Invocation_pushBootContext(&ctx); PG_TRY(); { initPLJavaClasses(); initJavaSession(); Invocation_popBootContext(); initstage = IS_PLJAVA_FOUND; } PG_CATCH(); { MemoryContextSwitchTo(ctx.upperContext); /* leave ErrorContext */ Invocation_popBootContext(); initstage = IS_MISC_ONCE_DONE; /* We can't stay here... */ if ( tolerant ) reLogWithChangedLevel(WARNING); /* so xact is not aborted */ else { EmitErrorReport(); /* no more unwinding, just log it */ /* Seeing an ERROR emitted to the log, without leaving the * transaction aborted, would violate the principle of least * astonishment. But at check_tolerant below, another ERROR will * be thrown immediately, so the transaction effect will be as * expected and this ERROR will contribute information beyond * what is in the generic one thrown down there. */ FlushErrorState(); } } PG_END_TRY(); if ( IS_PLJAVA_FOUND != initstage ) { /* JVM initialization failed for some reason. Destroy * the VM if it exists. Perhaps the user will try * fixing the pljava.classpath and make a new attempt. */ ereport(WARNING, ( errmsg("failed to load initial PL/Java classes"), errhint("The most common reason is that \"pljava_classpath\" " "needs to be set, naming the proper \"pljava.jar\" file.") )); _destroyJavaVM(0, 0); goto check_tolerant; } case IS_PLJAVA_FOUND: greeting = InstallHelper_hello(); ereport(NULL != pljavaLoadPath ? NOTICE : DEBUG1, ( errmsg("PL/Java loaded"), errdetail("versions:\n%s", greeting))); pfree(greeting); if ( NULL != pljavaLoadPath ) InstallHelper_groundwork(); /* sqlj schema, language handlers, ...*/ initstage = IS_COMPLETE; case IS_COMPLETE: pljavaLoadingAsExtension = false; if ( alteredSettingsWereNeeded ) { /* Use this StringInfoData to conditionally construct part of the * hint string suggesting ALTER DATABASE ... SET ... FROM CURRENT * provided the server is >= 9.2 where that will actually work. * In 9.3, psprintf appeared, which would make this all simpler, * but if 9.3+ were all that had to be supported, this would all * be moot anyway. Doing the initStringInfo inside the ereport * ensures the string is allocated in ErrorContext and won't leak. * Don't remove the extra parens grouping * (initStringInfo, appendStringInfo, errhint) ... with the parens, * that's a comma expression, which is sequenced; without them, they * are just function parameters with evaluation order unknown. */ StringInfoData buf; #if PG_VERSION_NUM >= 90200 #define MOREHINT \ appendStringInfo(&buf, \ "using ALTER DATABASE %s SET ... FROM CURRENT or ", \ pljavaDbName()), #else #define MOREHINT #endif ereport(NOTICE, ( errmsg("PL/Java successfully started after adjusting settings"), (initStringInfo(&buf), MOREHINT errhint("The settings that worked should be saved (%s" "in the \"%s\" file). For a reminder of what has been set, " "try: SELECT name, setting FROM pg_settings WHERE name LIKE" " 'pljava.%%' AND source = 'session'", buf.data, superuser() ? PG_GETCONFIGOPTION("config_file") : "postgresql.conf")))); #undef MOREHINT if ( loadAsExtensionFailed ) { ereport(NOTICE, (errmsg( "PL/Java load successful after failed CREATE EXTENSION"), errdetail( "PL/Java is now installed, but not as an extension."), errhint( "To correct that, either COMMIT or ROLLBACK, make sure " "the working settings are saved, exit this session, and " "in a new session, either: " "1. if committed, run " "\"CREATE EXTENSION pljava FROM unpackaged\", or 2. " "if rolled back, simply \"CREATE EXTENSION pljava\" again." ))); } } return; default: ereport(ERROR, ( errmsg("cannot set up PL/Java"), errdetail( "An unexpected stage was reached in the startup sequence."), errhint( "Please report the circumstances to the PL/Java maintainers.") )); } check_tolerant: if ( pljavaLoadingAsExtension ) { tolerant = false; loadAsExtensionFailed = true; pljavaLoadingAsExtension = false; } if ( !tolerant ) { ereport(ERROR, ( errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg( "cannot use PL/Java before successfully completing its setup"), errhint( "Check the log for messages closely preceding this one, " "detailing what step of setup failed and what will be needed, " "probably setting one of the \"pljava.\" configuration " "variables, to complete the setup. If there is not enough " "help in the log, try again with different settings for " "\"log_min_messages\" or \"log_error_verbosity\"."))); } }
/* fork lifecheck process*/ static pid_t fork_a_lifecheck(int fork_wait_time) { pid_t pid; sigjmp_buf local_sigjmp_buf; pid = fork(); if (pid != 0) { if (pid == -1) ereport(ERROR, (errmsg("failed to fork a lifecheck process"))); return pid; } on_exit_reset(); processType = PT_LIFECHECK; if (fork_wait_time > 0) { sleep(fork_wait_time); } POOL_SETMASK(&UnBlockSig); init_ps_display("", "", "", ""); signal(SIGTERM, wd_exit); signal(SIGINT, wd_exit); signal(SIGQUIT, wd_exit); signal(SIGCHLD, SIG_DFL); signal(SIGHUP, SIG_IGN); signal(SIGPIPE, SIG_IGN); /* Create per loop iteration memory context */ ProcessLoopContext = AllocSetContextCreate(TopMemoryContext, "wd_lifecheck_main_loop", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); set_ps_display("lifecheck",false); /* wait until ready to go */ while (WD_OK != is_wd_lifecheck_ready()) { sleep(pool_config->wd_interval * 10); } ereport(LOG, (errmsg("watchdog: lifecheck started"))); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); sleep(pool_config->wd_heartbeat_keepalive); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* watchdog loop */ for (;;) { MemoryContextSwitchTo(ProcessLoopContext); MemoryContextResetAndDeleteChildren(ProcessLoopContext); /* pgpool life check */ wd_lifecheck(); sleep(pool_config->wd_interval); } return pid; }
void FileRepSubProcess_Main() { const char *statmsg; MemoryContext fileRepSubProcessMemoryContext; sigjmp_buf local_sigjmp_buf; MyProcPid = getpid(); MyStartTime = time(NULL); /* * Create a PGPROC so we can use LWLocks in FileRep sub-processes. The * routine also register clean up at process exit */ InitAuxiliaryProcess(); InitBufferPoolBackend(); FileRepSubProcess_ConfigureSignals(); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); LWLockReleaseAll(); if (FileRepPrimary_IsResyncManagerOrWorker()) { LockReleaseAll(DEFAULT_LOCKMETHOD, false); } if (FileRepIsBackendSubProcess(fileRepProcessType)) { AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); } /* * We can now go away. Note that because we'll call InitProcess, a * callback will be registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); /* * Identify myself via ps */ statmsg = FileRepProcessTypeToString[fileRepProcessType]; init_ps_display(statmsg, "", "", ""); /* Create the memory context where cross-transaction state is stored */ fileRepSubProcessMemoryContext = AllocSetContextCreate(TopMemoryContext, "filerep subprocess memory context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(fileRepSubProcessMemoryContext); stateChangeRequestCounter++; FileRepSubProcess_ProcessSignals(); switch (fileRepProcessType) { case FileRepProcessTypePrimarySender: FileRepPrimary_StartSender(); break; case FileRepProcessTypeMirrorReceiver: FileRepMirror_StartReceiver(); break; case FileRepProcessTypeMirrorConsumer: case FileRepProcessTypeMirrorConsumerWriter: case FileRepProcessTypeMirrorConsumerAppendOnly1: FileRepMirror_StartConsumer(); break; case FileRepProcessTypeMirrorSenderAck: FileRepAckMirror_StartSender(); break; case FileRepProcessTypePrimaryReceiverAck: FileRepAckPrimary_StartReceiver(); break; case FileRepProcessTypePrimaryConsumerAck: FileRepAckPrimary_StartConsumer(); break; case FileRepProcessTypePrimaryRecovery: FileRepSubProcess_InitProcess(); /* * At this point, database is starting up and xlog is not yet * replayed. Initializing relcache now is dangerous, a sequential * scan of catalog tables may end up with incorrect hint bits. * E.g. a committed transaction's dirty heap pages made it to disk * but pg_clog update was still in memory and we crashed. If a * tuple inserted by this transaction is read during relcache * initialization, status of the tuple's xmin will be incorrectly * determined as "not commited" from pg_clog. And * HEAP_XMIN_INVALID hint bit will be set, rendering the tuple * perpetually invisible. Relcache initialization must be * deferred to only after all of xlog has been replayed. */ FileRepPrimary_StartRecovery(); ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); break; case FileRepProcessTypeResyncManager: FileRepSubProcess_InitProcess(); FileRepPrimary_StartResyncManager(); ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); break; case FileRepProcessTypeResyncWorker1: case FileRepProcessTypeResyncWorker2: case FileRepProcessTypeResyncWorker3: case FileRepProcessTypeResyncWorker4: FileRepSubProcess_InitProcess(); FileRepPrimary_StartResyncWorker(); ResourceOwnerRelease(CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); break; default: elog(PANIC, "unrecognized process type: %s(%d)", statmsg, fileRepProcessType); break; } switch (FileRepSubProcess_GetState()) { case FileRepStateShutdown: case FileRepStateReady: proc_exit(0); break; default: proc_exit(2); break; } }
/* * FtsProbeMain */ NON_EXEC_STATIC void ftsMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; char *fullpath; IsUnderPostmaster = true; am_ftsprobe = true; /* Stay away from PMChildSlot */ MyPMChildSlot = -1; /* reset MyProcPid */ MyProcPid = getpid(); /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* Identify myself via ps */ init_ps_display("ftsprobe process", "", "", ""); SetProcessingMode(InitProcessing); /* * reread postgresql.conf if requested */ pqsignal(SIGHUP, sigHupHandler); /* * Presently, SIGINT will lead to autovacuum shutdown, because that's how * we handle ereport(ERROR). It could be improved however. */ pqsignal(SIGINT, ReqFtsFullScan); /* request full-scan */ pqsignal(SIGTERM, die); pqsignal(SIGQUIT, quickdie); /* we don't do any ftsprobe specific cleanup, just use the standard. */ pqsignal(SIGALRM, handle_sig_alarm); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, procsignal_sigusr1_handler); /* We don't listen for async notifies */ pqsignal(SIGUSR2, RequestShutdown); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); /* * Copied from bgwriter */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "FTS Probe"); /* Early initialization */ BaseInit(); /* See InitPostgres()... */ InitProcess(); InitBufferPoolBackend(); InitXLOGAccess(); SetProcessingMode(NormalProcessing); /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Prevents interrupts while cleaning up */ HOLD_INTERRUPTS(); /* Report the error to the server log */ EmitErrorReport(); /* * We can now go away. Note that because we'll call InitProcess, a * callback will be registered to do ProcKill, which will clean up * necessary state. */ proc_exit(0); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; PG_SETMASK(&UnBlockSig); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* heap access requires the rel-cache */ RelationCacheInitialize(); InitCatalogCache(); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase2(); /* * In order to access the catalog, we need a database, and a * tablespace; our access to the heap is going to be slightly * limited, so we'll just use some defaults. */ if (!FindMyDatabase(probeDatabase, &MyDatabaseId, &MyDatabaseTableSpace)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exit", probeDatabase))); /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); SetDatabasePath(fullpath); RelationCacheInitializePhase3(); /* shmem: publish probe pid */ ftsProbeInfo->fts_probePid = MyProcPid; /* main loop */ FtsLoop(); /* One iteration done, go away */ proc_exit(0); }