/* * MPP-220077: real_act_prefix_size should not go beyond ps_buffer_size */ void test__set_ps_display__real_act_prefix_size(void **state) { int len; ps_buffer = (char *) malloc(127 * sizeof(char)); ps_buffer_fixed_size = 79; memset(ps_buffer, 'x', ps_buffer_fixed_size * sizeof(char)); ps_buffer_size = 127; IsUnderPostmaster = true; StrNCpy(ps_host_info, "msa4000125.europe.corp.microsoft.com(57193)", sizeof(ps_host_info)); ps_host_info_size = 0; gp_session_id = 26351; Gp_role = GP_ROLE_DISPATCH; Gp_segment = -1; gp_command_count = 964; currentSliceId = -1; set_ps_display("testing activity", true); assert_true(real_act_prefix_size <= ps_buffer_size); get_real_act_ps_display(&len); assert_true(len >= 0); }
/* Flush the log to disk */ static void XLogWalRcvFlush(void) { if (XLByteLT(LogstreamResult.Flush, LogstreamResult.Write)) { /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; issue_xlog_fsync(recvFile, recvId, recvSeg); LogstreamResult.Flush = LogstreamResult.Write; /* Update shared-memory status */ SpinLockAcquire(&walrcv->mutex); walrcv->latestChunkStart = walrcv->receivedUpto; walrcv->receivedUpto = LogstreamResult.Flush; SpinLockRelease(&walrcv->mutex); /* Report XLOG streaming progress in PS display */ if (update_process_title) { char activitymsg[50]; snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X", LogstreamResult.Write.xlogid, LogstreamResult.Write.xrecoff); set_ps_display(activitymsg, false); } } }
/* * PerformAuthentication -- authenticate a remote client * * returns: nothing. Will not return at all if there's any failure. */ static void PerformAuthentication(Port *port) { /* This should be set already, but let's make sure */ ClientAuthInProgress = true; /* limit visibility of log messages */ /* * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.conf * etcetera from the postmaster, and have to load them ourselves. Note we * are loading them into the startup transaction's memory context, not * PostmasterContext, but that shouldn't matter. * * FIXME: [fork/exec] Ugh. Is there a way around this overhead? */ #ifdef EXEC_BACKEND if (!load_hba()) { /* * It makes no sense to continue if we fail to load the HBA file, * since there is no way to connect to the database in this case. */ ereport(FATAL, (errmsg("could not load pg_hba.conf"))); } load_ident(); #endif /* * Set up a timeout in case a buggy or malicious client fails to respond * during authentication. Since we're inside a transaction and might do * database access, we have to use the statement_timeout infrastructure. */ enable_timeout_after(STATEMENT_TIMEOUT, AuthenticationTimeout * 1000); /* * Now perform authentication exchange. */ ClientAuthentication(port); /* might not return, if failure */ /* * Done with authentication. Disable the timeout, and log if needed. */ disable_timeout(STATEMENT_TIMEOUT, false); if (Log_connections) { if (am_walsender) ereport(LOG, (errmsg("replication connection authorized: user=%s", port->user_name))); else ereport(LOG, (errmsg("connection authorized: user=%s database=%s", port->user_name, port->database_name))); } set_ps_display("startup", false); ClientAuthInProgress = false; /* client_min_messages is active now */ }
/* * Call this once during subprocess startup to set the identification * values. At this point, the original argv[] array may be overwritten. */ void init_ps_display( const char *username, const char *dbname, const char *host_info, const char *initial_str) { ASSERT(username); ASSERT(dbname); ASSERT(host_info); #ifndef PS_USE_NONE /* no ps display for stand-alone backend */ if (!child) return; /* no ps display if you didn't call save_args() */ if (!save_argv) return; #ifdef PS_USE_CLOBBER_ARGV /* If ps_buffer is a pointer, it might still be null */ if (!ps_buffer) return; #endif // PS_USE_CLOBBER_ARGV /* * Overwrite argv[] to point at appropriate space, if needed */ #ifdef PS_USE_CHANGE_ARGV save_argv[0] = ps_buffer; save_argv[1] = NULL; #endif /* PS_USE_CHANGE_ARGV */ #ifdef PS_USE_CLOBBER_ARGV int i; /* make extra argv slots point at end_of_area (a NUL) */ for (i = 1; i < save_argc; i++) save_argv[i] = ps_buffer + ps_buffer_size; #endif /* PS_USE_CLOBBER_ARGV */ /* * Make fixed prefix of ps display. */ #ifdef PS_USE_SETPROCTITLE /* * apparently setproctitle() already adds a `progname:' prefix to the * ps line */ snprintf(ps_buffer, ps_buffer_size, "%s %s %s ", username, dbname, host_info); #else snprintf(ps_buffer, ps_buffer_size, "postgres: %s %s %s ", username, dbname, host_info); #endif // PS_USE_SETPROCTITLE ps_buffer_cur_len = ps_buffer_fixed_size = strlen(ps_buffer); set_ps_display(initial_str, true); #endif /* not PS_USE_NONE */ }
/* * Check it won't crash in case the ps_buffer overflows. */ void test__set_ps_display(void **state) { ps_buffer = (char *) malloc(64 * sizeof(char)); memset(ps_buffer, 0x7F, 64 * sizeof(char)); ps_buffer_fixed_size = 25; ps_buffer_size = 32; IsUnderPostmaster = true; gp_session_id = 1024; Gp_role = GP_ROLE_DISPATCH; Gp_segment = 24; gp_command_count = 1024; currentSliceId = 40; set_ps_display("testing activity", true); set_ps_display("testing activity", false); assert_true(ps_buffer[32] == 0x7f); }
/*************************M*A*I*N*************************/ int main(void) { turtle_t boo; pen_t pen; srand(time(NULL)); set_ps_header(HEIGHT, WIDTH); process_commands(&boo, &pen); set_ps_display(); return 0; }
/*************************M*A*I*N*************************/ int main(void) { turtle_t boo; pen_t pen; srand(time(NULL)); set_ps_header(HEIGHT, WIDTH); turtle_goto(&boo, 297.5, 420.5); turtle_random_walk(&boo, &pen, 20000); set_ps_display(); return 0; }
/* * worker child main loop */ void do_worker_child(void) { pool_debug("I am %d", getpid()); /* Identify myself via ps */ init_ps_display("", "", "", ""); set_ps_display("worker process", false); /* set up signal handlers */ signal(SIGALRM, SIG_DFL); signal(SIGTERM, my_signal_handler); signal(SIGINT, my_signal_handler); signal(SIGHUP, reload_config_handler); signal(SIGQUIT, my_signal_handler); signal(SIGCHLD, SIG_IGN); signal(SIGUSR1, my_signal_handler); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); /* Initialize my backend status */ pool_initialize_private_backend_status(); /* Initialize per process context */ pool_init_process_context(); for (;;) { CHECK_REQUEST; if (pool_config->sr_check_period <= 0) { sleep(30); } /* * If streaming replication mode, do time lag checking */ if (pool_config->sr_check_period > 0 && MASTER_SLAVE && !strcmp(pool_config->master_slave_sub_mode, MODE_STREAMREP)) { /* Check and establish persistent connections to the backend */ establish_persistent_connection(); /* Do replication time lag checking */ check_replication_time_lag(); /* Discard persistent connections */ discard_persistent_connection(); } sleep(pool_config->sr_check_period); } exit(0); }
static PyObject * spt_setproctitle(PyObject *self /* Not used */, PyObject *args) { const char *title; if (!PyArg_ParseTuple(args, "s", &title)) return NULL; set_ps_display(title, true); Py_INCREF(Py_None); return Py_None; }
/* fork lifecheck process*/ static pid_t fork_a_lifecheck(int fork_wait_time) { pid_t pid; pid = fork(); if (pid != 0) { if (pid == -1) pool_error("fork_a_lifecheck: fork() failed."); return pid; } if (fork_wait_time > 0) { sleep(fork_wait_time); } myargv = save_ps_display_args(myargc, myargv); POOL_SETMASK(&UnBlockSig); init_ps_display("", "", "", ""); signal(SIGTERM, wd_exit); signal(SIGINT, wd_exit); signal(SIGQUIT, wd_exit); signal(SIGCHLD, SIG_DFL); signal(SIGHUP, SIG_IGN); signal(SIGPIPE, SIG_IGN); set_ps_display("lifecheck",false); /* wait until ready to go */ while (WD_OK != is_wd_lifecheck_ready()) { sleep(pool_config->wd_interval * 10); } pool_log("watchdog: lifecheck started"); /* watchdog loop */ for (;;) { /* pgpool life check */ wd_lifecheck(); sleep(pool_config->wd_interval); } return pid; }
/* * Show ps idle status */ void pool_ps_idle_display(POOL_CONNECTION_POOL * backend) { StartupPacket *sp; char psbuf[1024]; sp = MASTER_CONNECTION(backend)->sp; if (MASTER(backend)->tstate == 'T') snprintf(psbuf, sizeof(psbuf), "%s %s %s idle in transaction", sp->user, sp->database, remote_ps_data); else snprintf(psbuf, sizeof(psbuf), "%s %s %s idle", sp->user, sp->database, remote_ps_data); set_ps_display(psbuf, false); }
/* * Flush the log to disk. * * If we're in the midst of dying, it's unwise to do anything that might throw * an error, so we skip sending a reply in that case. */ static void XLogWalRcvFlush(bool dying) { if (XLByteLT(LogstreamResult.Flush, LogstreamResult.Write)) { /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; issue_xlog_fsync(recvFile, recvId, recvSeg); LogstreamResult.Flush = LogstreamResult.Write; /* Update shared-memory status */ SpinLockAcquire(&walrcv->mutex); if (XLByteLT(walrcv->receivedUpto, LogstreamResult.Flush)) { walrcv->latestChunkStart = walrcv->receivedUpto; walrcv->receivedUpto = LogstreamResult.Flush; } SpinLockRelease(&walrcv->mutex); /* Signal the startup process and walsender that new WAL has arrived */ WakeupRecovery(); if (AllowCascadeReplication()) WalSndWakeup(); /* Report XLOG streaming progress in PS display */ if (update_process_title) { char activitymsg[50]; snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X", LogstreamResult.Write.xlogid, LogstreamResult.Write.xrecoff); set_ps_display(activitymsg, false); } /* Also let the master know that we made some progress */ if (!dying) { XLogWalRcvSendReply(); XLogWalRcvSendHSFeedback(); } } }
/* * Call this once during subprocess startup to set the identification * values. At this point, the original argv[] array may be overwritten. */ void init_ps_display(const char *initial_str) { #ifndef PS_USE_NONE /* no ps display if you didn't call save_ps_display_args() */ if (!save_argv) return; #ifdef PS_USE_CLOBBER_ARGV /* If ps_buffer is a pointer, it might still be null */ if (!ps_buffer) return; #endif /* * Overwrite argv[] to point at appropriate space, if needed */ #ifdef PS_USE_CHANGE_ARGV save_argv[0] = ps_buffer; save_argv[1] = NULL; #endif /* PS_USE_CHANGE_ARGV */ #ifdef PS_USE_CLOBBER_ARGV { int i; /* make extra argv slots point at end_of_area (a NUL) */ for (i = 1; i < save_argc; i++) save_argv[i] = ps_buffer + ps_buffer_size; } #endif /* PS_USE_CLOBBER_ARGV */ /* * Make fixed prefix of ps display. */ ps_buffer[0] = '\0'; ps_buffer_fixed_size = strlen(ps_buffer); set_ps_display(initial_str, true); #endif /* not PS_USE_NONE */ }
/* * PgArchiverMain * * The argc/argv parameters are valid only in EXEC_BACKEND case. However, * since we don't use 'em, it hardly matters... */ NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]) { IsUnderPostmaster = true; /* we are a postmaster subprocess now */ MyProcPid = getpid(); /* reset MyProcPid */ /* Lose the postmaster's on-exit routines */ on_exit_reset(); /* * Ignore all signals usually bound to some action in the postmaster, * except for SIGHUP, SIGUSR1 and SIGQUIT. */ pqsignal(SIGHUP, ArchSigHupHandler); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); pqsignal(SIGQUIT, pgarch_exit); pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, pgarch_waken); pqsignal(SIGUSR2, SIG_IGN); pqsignal(SIGCHLD, SIG_DFL); pqsignal(SIGTTIN, SIG_DFL); pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); pqsignal(SIGWINCH, SIG_DFL); PG_SETMASK(&UnBlockSig); /* * Identify myself via ps */ init_ps_display("archiver process", "", ""); set_ps_display(""); pgarch_MainLoop(); exit(0); }
/* * Execute commands from walreceiver, until we enter streaming mode. */ static void WalSndHandshake(void) { StringInfoData input_message; bool replication_started = false; initStringInfo(&input_message); while (!replication_started) { int firstchar; WalSndSetState(WALSNDSTATE_STARTUP); set_ps_display("idle", false); /* Wait for a command to arrive */ firstchar = pq_getbyte(); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive()) exit(1); /* * Check for any other interesting events that happened while we * slept. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (firstchar != EOF) { /* * Read the message contents. This is expected to be done without * blocking because we've been able to get message type code. */ if (pq_getmessage(&input_message, 0)) firstchar = EOF; /* suitable message already logged */ } /* Handle the very limited subset of commands expected in this phase */ switch (firstchar) { case 'Q': /* Query message */ { const char *query_string; query_string = pq_getmsgstring(&input_message); pq_getmsgend(&input_message); if (HandleReplicationCommand(query_string)) replication_started = true; } break; case 'X': /* standby is closing the connection */ proc_exit(0); case EOF: /* standby disconnected unexpectedly */ ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected EOF on standby connection"))); proc_exit(0); default: ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid standby handshake message type %d", firstchar))); } } }
/* * Read up to MAX_SEND_SIZE bytes of WAL that's been flushed to disk, * but not yet sent to the client, and buffer it in the libpq output * buffer. * * msgbuf is a work area in which the output message is constructed. It's * passed in just so we can avoid re-palloc'ing the buffer on each cycle. * It must be of size 1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE. * * If there is no unsent WAL remaining, *caughtup is set to true, otherwise * *caughtup is set to false. */ static void XLogSend(char *msgbuf, bool *caughtup) { XLogRecPtr SendRqstPtr; XLogRecPtr startptr; XLogRecPtr endptr; Size nbytes; WalDataMessageHeader msghdr; /* * Attempt to send all data that's already been written out and fsync'd to * disk. We cannot go further than what's been written out given the * current implementation of XLogRead(). And in any case it's unsafe to * send WAL that is not securely down to disk on the master: if the master * subsequently crashes and restarts, slaves must not have applied any WAL * that gets lost on the master. */ SendRqstPtr = am_cascading_walsender ? GetStandbyFlushRecPtr() : GetFlushRecPtr(); /* Quick exit if nothing to do */ if (XLByteLE(SendRqstPtr, sentPtr)) { *caughtup = true; return; } /* * Figure out how much to send in one message. If there's no more than * MAX_SEND_SIZE bytes to send, send everything. Otherwise send * MAX_SEND_SIZE bytes, but round back to logfile or page boundary. * * The rounding is not only for performance reasons. Walreceiver relies on * the fact that we never split a WAL record across two messages. Since a * long WAL record is split at page boundary into continuation records, * page boundary is always a safe cut-off point. We also assume that * SendRqstPtr never points to the middle of a WAL record. */ startptr = sentPtr; if (startptr.xrecoff >= XLogFileSize) { /* * crossing a logid boundary, skip the non-existent last log segment * in previous logical log file. */ startptr.xlogid += 1; startptr.xrecoff = 0; } endptr = startptr; XLByteAdvance(endptr, MAX_SEND_SIZE); if (endptr.xlogid != startptr.xlogid) { /* Don't cross a logfile boundary within one message */ Assert(endptr.xlogid == startptr.xlogid + 1); endptr.xlogid = startptr.xlogid; endptr.xrecoff = XLogFileSize; } /* if we went beyond SendRqstPtr, back off */ if (XLByteLE(SendRqstPtr, endptr)) { endptr = SendRqstPtr; *caughtup = true; } else { /* round down to page boundary. */ endptr.xrecoff -= (endptr.xrecoff % XLOG_BLCKSZ); *caughtup = false; } nbytes = endptr.xrecoff - startptr.xrecoff; Assert(nbytes <= MAX_SEND_SIZE); /* * OK to read and send the slice. */ msgbuf[0] = 'w'; /* * Read the log directly into the output buffer to avoid extra memcpy * calls. */ XLogRead(msgbuf + 1 + sizeof(WalDataMessageHeader), startptr, nbytes); /* * We fill the message header last so that the send timestamp is taken as * late as possible. */ msghdr.dataStart = startptr; msghdr.walEnd = SendRqstPtr; msghdr.sendTime = GetCurrentTimestamp(); memcpy(msgbuf + 1, &msghdr, sizeof(WalDataMessageHeader)); pq_putmessage_noblock('d', msgbuf, 1 + sizeof(WalDataMessageHeader) + nbytes); sentPtr = endptr; /* Update shared memory status */ { /* use volatile pointer to prevent code rearrangement */ volatile WalSnd *walsnd = MyWalSnd; SpinLockAcquire(&walsnd->mutex); walsnd->sentPtr = sentPtr; SpinLockRelease(&walsnd->mutex); } /* Report progress of XLOG streaming in PS display */ if (update_process_title) { char activitymsg[50]; snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X", sentPtr.xlogid, sentPtr.xrecoff); set_ps_display(activitymsg, false); } return; }
/* * Wait for synchronous replication, if requested by user. * * Initially backends start in state SYNC_REP_NOT_WAITING and then * change that state to SYNC_REP_WAITING before adding ourselves * to the wait queue. During SyncRepWakeQueue() a WALSender changes * the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed. * This backend then resets its state to SYNC_REP_NOT_WAITING. */ void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN) { char *new_status = NULL; const char *old_status; /* * Fast exit if user has not requested sync replication, or * there are no sync replication standby names defined. * Note that those standbys don't need to be connected. */ if (!SyncRepRequested() || !SyncStandbysDefined()) return; Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks))); Assert(WalSndCtl != NULL); /* Reset the latch before adding ourselves to the queue. */ ResetLatch(&MyProc->waitLatch); /* * Set our waitLSN so WALSender will know when to wake us, and add * ourselves to the queue. */ LWLockAcquire(SyncRepLock, LW_EXCLUSIVE); Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING); if (!WalSndCtl->sync_standbys_defined) { /* * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is * not set. See SyncRepUpdateSyncStandbysDefined. */ LWLockRelease(SyncRepLock); return; } MyProc->waitLSN = XactCommitLSN; MyProc->syncRepState = SYNC_REP_WAITING; SyncRepQueueInsert(); Assert(SyncRepQueueIsOrderedByLSN()); LWLockRelease(SyncRepLock); /* Alter ps display to show waiting for sync rep. */ if (update_process_title) { int len; old_status = get_ps_display(&len); new_status = (char *) palloc(len + 32 + 1); memcpy(new_status, old_status, len); sprintf(new_status + len, " waiting for %X/%X", XactCommitLSN.xlogid, XactCommitLSN.xrecoff); set_ps_display(new_status, false); new_status[len] = '\0'; /* truncate off " waiting ..." */ } /* * Wait for specified LSN to be confirmed. * * Each proc has its own wait latch, so we perform a normal latch * check/wait loop here. */ for (;;) { int syncRepState; /* * Wait on latch for up to 60 seconds. This allows us to * check for postmaster death regularly while waiting. * Note that timeout here does not necessarily release from loop. */ WaitLatch(&MyProc->waitLatch, 60000000L); /* Must reset the latch before testing state. */ ResetLatch(&MyProc->waitLatch); /* * Try checking the state without the lock first. There's no guarantee * that we'll read the most up-to-date value, so if it looks like we're * still waiting, recheck while holding the lock. But if it looks like * we're done, we must really be done, because once walsender changes * the state to SYNC_REP_WAIT_COMPLETE, it will never update it again, * so we can't be seeing a stale value in that case. */ syncRepState = MyProc->syncRepState; if (syncRepState == SYNC_REP_WAITING) { LWLockAcquire(SyncRepLock, LW_SHARED); syncRepState = MyProc->syncRepState; LWLockRelease(SyncRepLock); } if (syncRepState == SYNC_REP_WAIT_COMPLETE) break; /* * If a wait for synchronous replication is pending, we can neither * acknowledge the commit nor raise ERROR or FATAL. The latter * would lead the client to believe that that the transaction * aborted, which is not true: it's already committed locally. * The former is no good either: the client has requested * synchronous replication, and is entitled to assume that an * acknowledged commit is also replicated, which may not be true. * So in this case we issue a WARNING (which some clients may * be able to interpret) and shut off further output. We do NOT * reset ProcDiePending, so that the process will die after the * commit is cleaned up. */ if (ProcDiePending) { ereport(WARNING, (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"), errdetail("The transaction has already committed locally, but may not have been replicated to the standby."))); whereToSendOutput = DestNone; SyncRepCancelWait(); break; } /* * It's unclear what to do if a query cancel interrupt arrives. We * can't actually abort at this point, but ignoring the interrupt * altogether is not helpful, so we just terminate the wait with * a suitable warning. */ if (QueryCancelPending) { QueryCancelPending = false; ereport(WARNING, (errmsg("canceling wait for synchronous replication due to user request"), errdetail("The transaction has already committed locally, but may not have been replicated to the standby."))); SyncRepCancelWait(); break; } /* * If the postmaster dies, we'll probably never get an acknowledgement, * because all the wal sender processes will exit. So just bail out. */ if (!PostmasterIsAlive(true)) { ProcDiePending = true; whereToSendOutput = DestNone; SyncRepCancelWait(); break; } } /* * WalSender has checked our LSN and has removed us from queue. Clean up * state and leave. It's OK to reset these shared memory fields without * holding SyncRepLock, because any walsenders will ignore us anyway when * we're not on the queue. */ Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks))); MyProc->syncRepState = SYNC_REP_NOT_WAITING; MyProc->waitLSN.xlogid = 0; MyProc->waitLSN.xrecoff = 0; if (new_status) { /* Reset ps display */ set_ps_display(new_status, false); pfree(new_status); } }
pid_t wd_child(int fork_wait_time) { int sock; int fd; int rtn; pid_t pid = 0; pid = fork(); if (pid != 0) { if (pid == -1) pool_error("wd_child: fork() failed."); return pid; } if (fork_wait_time > 0) { sleep(fork_wait_time); } myargv = save_ps_display_args(myargc, myargv); POOL_SETMASK(&UnBlockSig); signal(SIGTERM, wd_child_exit); signal(SIGINT, wd_child_exit); signal(SIGQUIT, wd_child_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); if (WD_List == NULL) { /* memory allocate is not ready */ wd_child_exit(15); } sock = wd_create_recv_socket(WD_MYSELF->wd_port); if (sock < 0) { /* socket create failed */ wd_child_exit(15); } set_ps_display("watchdog", false); /* child loop */ for(;;) { WdPacket buf; fd = wd_accept(sock); if (fd < 0) { continue; } rtn = wd_recv_packet(fd, &buf); if (rtn == WD_OK) { wd_send_response(fd, &buf); } close(fd); } return pid; }
/* * This is the main executioner for any query backend that conflicts with * recovery processing. Judgement has already been passed on it within * a specific rmgr. Here we just issue the orders to the procs. The procs * then throw the required error as instructed. */ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ProcSignalReason reason) { TimestampTz waitStart; char *new_status; /* Fast exit, to avoid a kernel call if there's no work to be done. */ if (!VirtualTransactionIdIsValid(*waitlist)) return; waitStart = GetCurrentTimestamp(); new_status = NULL; /* we haven't changed the ps display */ while (VirtualTransactionIdIsValid(*waitlist)) { /* reset standbyWait_us for each xact we wait for */ standbyWait_us = STANDBY_INITIAL_WAIT_US; /* wait until the virtual xid is gone */ while (!ConditionalVirtualXactLockTableWait(*waitlist)) { /* * Report via ps if we have been waiting for more than 500 msec * (should that be configurable?) */ if (update_process_title && new_status == NULL && TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(), 500)) { const char *old_status; int len; old_status = get_ps_display(&len); new_status = (char *) palloc(len + 8 + 1); memcpy(new_status, old_status, len); strcpy(new_status + len, " waiting"); set_ps_display(new_status, false); new_status[len] = '\0'; /* truncate off " waiting" */ } /* Is it time to kill it? */ if (WaitExceedsMaxStandbyDelay()) { pid_t pid; /* * Now find out who to throw out of the balloon. */ Assert(VirtualTransactionIdIsValid(*waitlist)); pid = CancelVirtualTransaction(*waitlist, reason); /* * Wait a little bit for it to die so that we avoid flooding * an unresponsive backend when system is heavily loaded. */ if (pid != 0) pg_usleep(5000L); } } /* The virtual transaction is gone now, wait for the next one */ waitlist++; } /* Reset ps display if we changed it */ if (new_status) { set_ps_display(new_status, false); pfree(new_status); } }
/* pcp command processor */ static void pcp_process_command(char tos, char *buf, int buf_len) { if (tos == 'C' || tos == 'd' || tos == 'D' || tos == 'j' || tos == 'J' || tos == 'O' || tos == 'T') { if (Req_info->switching) { if(Req_info->request_queue_tail != Req_info->request_queue_head) { POOL_REQUEST_KIND reqkind; reqkind = Req_info->request[(Req_info->request_queue_head +1) % MAX_REQUEST_QUEUE_SIZE].kind; if (reqkind == NODE_UP_REQUEST) ereport(ERROR, (errmsg("failed to process PCP request at the moment"), errdetail("failback is in progress"))); else if (reqkind == NODE_DOWN_REQUEST) ereport(ERROR, (errmsg("failed to process PCP request at the moment"), errdetail("failover is in progress"))); else if (reqkind == PROMOTE_NODE_REQUEST) ereport(ERROR, (errmsg("failed to process PCP request at the moment"), errdetail("promote node operation is in progress"))); ereport(ERROR, (errmsg("failed to process PCP request at the moment"), errdetail("operation is in progress"))); } } } switch (tos) { case 'A': /* set configuration parameter */ set_ps_display("PCP: processing set configration parameter request", false); process_set_configration_parameter(pcp_frontend,buf,buf_len); break; case 'L': /* node count */ set_ps_display("PCP: processing node count request", false); inform_node_count(pcp_frontend); break; case 'I': /* node info */ set_ps_display("PCP: processing node info request", false); inform_node_info(pcp_frontend, buf); break; case 'N': /* process count */ set_ps_display("PCP: processing process count request", false); inform_process_count(pcp_frontend); break; case 'P': /* process info */ set_ps_display("PCP: processing process info request", false); inform_process_info(pcp_frontend, buf); break; case 'W': /* watchdog info */ set_ps_display("PCP: processing watchdog info request", false); inform_watchdog_info(pcp_frontend, buf); break; case 'D': /* detach node */ case 'd': /* detach node gracefully */ set_ps_display("PCP: processing detach node request", false); process_detach_node(pcp_frontend, buf, tos); break; case 'C': /* attach node */ set_ps_display("PCP: processing attach node request", false); process_attach_node(pcp_frontend, buf); break; case 'T': set_ps_display("PCP: processing shutdown request", false); process_shutown_request(pcp_frontend, buf[0]); break; case 'O': /* recovery request */ set_ps_display("PCP: processing recovery request", false); process_recovery_request(pcp_frontend, buf); break; case 'B': /* status request*/ set_ps_display("PCP: processing status request request", false); process_status_request(pcp_frontend); break; case 'J': /* promote node */ case 'j': /* promote node gracefully */ set_ps_display("PCP: processing promote node request", false); process_promote_node(pcp_frontend,buf,tos); break; case 'F': ereport(DEBUG1, (errmsg("PCP processing request, stop online recovery"))); break; case 'X': /* disconnect */ ereport(DEBUG1, (errmsg("PCP processing request, client disconnecting"), errdetail("closing PCP connection, and exiting child"))); pcp_close(pcp_frontend); pcp_frontend = NULL; /* This child has done its part. Rest in peace now */ exit(0); break; default: ereport(FATAL, (errmsg("PCP processing request"), errdetail("unknown PCP packet type \"%c\"",tos))); } }
/* * -------------------------------------------------------------- * ProcessIncomingNotify * * Deal with arriving NOTIFYs from other backends. * This is called either directly from the SIGUSR2 signal handler, * or the next time control reaches the outer idle loop. * Scan pg_listener for arriving notifies, report them to my front end, * and clear the notification field in pg_listener until next time. * * NOTE: since we are outside any transaction, we must create our own. * -------------------------------------------------------------- */ static void ProcessIncomingNotify(void) { Relation lRel; TupleDesc tdesc; ScanKeyData key[1]; HeapScanDesc scan; HeapTuple lTuple, rTuple; Datum value[Natts_pg_listener]; char repl[Natts_pg_listener], nulls[Natts_pg_listener]; bool catchup_enabled; /* Must prevent SIGUSR1 interrupt while I am running */ catchup_enabled = DisableCatchupInterrupt(); if (Trace_notify) elog(DEBUG1, "ProcessIncomingNotify"); set_ps_display("notify interrupt", false); notifyInterruptOccurred = 0; StartTransactionCommand(); lRel = heap_open(ListenerRelationId, ExclusiveLock); tdesc = RelationGetDescr(lRel); /* Scan only entries with my listenerPID */ ScanKeyInit(&key[0], Anum_pg_listener_pid, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(MyProcPid)); scan = heap_beginscan(lRel, SnapshotNow, 1, key); /* Prepare data for rewriting 0 into notification field */ nulls[0] = nulls[1] = nulls[2] = ' '; repl[0] = repl[1] = repl[2] = ' '; repl[Anum_pg_listener_notify - 1] = 'r'; value[0] = value[1] = value[2] = (Datum) 0; value[Anum_pg_listener_notify - 1] = Int32GetDatum(0); while ((lTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_listener listener = (Form_pg_listener) GETSTRUCT(lTuple); char *relname = NameStr(listener->relname); int32 sourcePID = listener->notification; if (sourcePID != 0) { /* Notify the frontend */ if (Trace_notify) elog(DEBUG1, "ProcessIncomingNotify: received %s from %d", relname, (int) sourcePID); NotifyMyFrontEnd(relname, sourcePID); /* * Rewrite the tuple with 0 in notification column. * * simple_heap_update is safe here because no one else would have * tried to UNLISTEN us, so there can be no uncommitted changes. */ rTuple = heap_modifytuple(lTuple, tdesc, value, nulls, repl); simple_heap_update(lRel, &lTuple->t_self, rTuple); #ifdef NOT_USED /* currently there are no indexes */ CatalogUpdateIndexes(lRel, rTuple); #endif } } heap_endscan(scan); /* * We do NOT release the lock on pg_listener here; we need to hold it * until end of transaction (which is about to happen, anyway) to ensure * that other backends see our tuple updates when they look. Otherwise, a * transaction started after this one might mistakenly think it doesn't * need to send this backend a new NOTIFY. */ heap_close(lRel, NoLock); CommitTransactionCommand(); /* * Must flush the notify messages to ensure frontend gets them promptly. */ pq_flush(); set_ps_display("idle", false); if (Trace_notify) elog(DEBUG1, "ProcessIncomingNotify: done"); if (catchup_enabled) EnableCatchupInterrupt(); }
/* fork lifecheck process*/ static pid_t fork_a_lifecheck(int fork_wait_time) { pid_t pid; sigjmp_buf local_sigjmp_buf; pid = fork(); if (pid != 0) { if (pid == -1) ereport(ERROR, (errmsg("failed to fork a lifecheck process"))); return pid; } on_exit_reset(); processType = PT_LIFECHECK; if (fork_wait_time > 0) { sleep(fork_wait_time); } POOL_SETMASK(&UnBlockSig); init_ps_display("", "", "", ""); signal(SIGTERM, wd_exit); signal(SIGINT, wd_exit); signal(SIGQUIT, wd_exit); signal(SIGCHLD, SIG_DFL); signal(SIGHUP, SIG_IGN); signal(SIGPIPE, SIG_IGN); /* Create per loop iteration memory context */ ProcessLoopContext = AllocSetContextCreate(TopMemoryContext, "wd_lifecheck_main_loop", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); set_ps_display("lifecheck",false); /* wait until ready to go */ while (WD_OK != is_wd_lifecheck_ready()) { sleep(pool_config->wd_interval * 10); } ereport(LOG, (errmsg("watchdog: lifecheck started"))); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); sleep(pool_config->wd_heartbeat_keepalive); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; /* watchdog loop */ for (;;) { MemoryContextSwitchTo(ProcessLoopContext); MemoryContextResetAndDeleteChildren(ProcessLoopContext); /* pgpool life check */ wd_lifecheck(); sleep(pool_config->wd_interval); } return pid; }
/* * fork de-escalation process */ pid_t fork_plunging_process(void) { pid_t pid; pid = fork(); if (pid != 0) { if (pid == -1) ereport(NOTICE, (errmsg("failed to fork a de-escalation process"))); return pid; } on_exit_reset(); processType = PT_WATCHDOG_UTILITY; POOL_SETMASK(&UnBlockSig); init_ps_display("", "", "", ""); pool_signal(SIGTERM, wd_exit); pool_signal(SIGINT, wd_exit); pool_signal(SIGQUIT, wd_exit); pool_signal(SIGCHLD, SIG_DFL); pool_signal(SIGHUP, SIG_IGN); pool_signal(SIGPIPE, SIG_IGN); MemoryContextSwitchTo(TopMemoryContext); set_ps_display("watchdog de-escalation", false); ereport(LOG, (errmsg("watchdog: de-escalation started"))); /* * STEP 1 execute de-escalation command provided by user in pgpool conf * file */ if (strlen(pool_config->wd_de_escalation_command)) { int r = system(pool_config->wd_de_escalation_command); if (WIFEXITED(r)) { if (WEXITSTATUS(r) == EXIT_SUCCESS) ereport(LOG, (errmsg("watchdog de-escalation successful"))); else { ereport(WARNING, (errmsg("watchdog de-escalation command failed with exit status: %d", WEXITSTATUS(r)))); } } else { ereport(WARNING, (errmsg("watchdog de-escalation command exit abnormally"))); } } /* * STEP 2 bring down the delegate IP */ if (strlen(pool_config->delegate_IP) != 0) { if (wd_IP_down() != WD_OK) ereport(WARNING, (errmsg("watchdog de-escalation failed to bring down delegate IP"))); } exit(0); }
/* * fork escalation process */ pid_t fork_escalation_process(void) { pid_t pid; pid = fork(); if (pid != 0) { if (pid == -1) ereport(NOTICE, (errmsg("failed to fork a escalation process"))); return pid; } on_exit_reset(); processType = PT_WATCHDOG_UTILITY; POOL_SETMASK(&UnBlockSig); init_ps_display("", "", "", ""); pool_signal(SIGTERM, wd_exit); pool_signal(SIGINT, wd_exit); pool_signal(SIGQUIT, wd_exit); pool_signal(SIGCHLD, SIG_DFL); pool_signal(SIGHUP, SIG_IGN); pool_signal(SIGPIPE, SIG_IGN); MemoryContextSwitchTo(TopMemoryContext); set_ps_display("watchdog escalation", false); ereport(LOG, (errmsg("watchdog: escalation started"))); /* * STEP 1 clear shared memory cache */ if (pool_config->memory_cache_enabled && pool_is_shmem_cache() && pool_config->clear_memqcache_on_escalation) { ereport(LOG, (errmsg("watchdog escalation"), errdetail("clearing all the query cache on shared memory"))); pool_clear_memory_cache(); } /* * STEP 2 execute escalation command provided by user in pgpool conf file */ if (strlen(pool_config->wd_escalation_command)) { int r = system(pool_config->wd_escalation_command); if (WIFEXITED(r)) { if (WEXITSTATUS(r) == EXIT_SUCCESS) ereport(LOG, (errmsg("watchdog escalation successful"))); else { ereport(WARNING, (errmsg("watchdog escalation command failed with exit status: %d", WEXITSTATUS(r)))); } } else { ereport(WARNING, (errmsg("watchdog escalation command exit abnormally"))); } } /* * STEP 3 bring up the delegate IP */ if (strlen(pool_config->delegate_IP) != 0) { if (wd_IP_up() != WD_OK) ereport(WARNING, (errmsg("watchdog escalation failed to acquire delegate IP"))); } exit(0); }
/* * Call this once during subprocess startup to set the identification * values. At this point, the original argv[] array may be overwritten. */ void init_ps_display(const char *username, const char *dbname, const char *host_info, const char *initial_str) { Assert(username); Assert(dbname); Assert(host_info); #ifndef PS_USE_NONE /* no ps display for stand-alone backend */ if (!IsUnderPostmaster) return; /* no ps display if you didn't call save_ps_display_args() */ if (!save_argv) return; #ifdef PS_USE_CLOBBER_ARGV /* If ps_buffer is a pointer, it might still be null */ if (!ps_buffer) return; #endif /* * Overwrite argv[] to point at appropriate space, if needed */ #ifdef PS_USE_CHANGE_ARGV save_argv[0] = ps_buffer; save_argv[1] = NULL; #endif /* PS_USE_CHANGE_ARGV */ #ifdef PS_USE_CLOBBER_ARGV { int i; /* make extra argv slots point at end_of_area (a NUL) */ for (i = 1; i < save_argc; i++) save_argv[i] = ps_buffer + ps_buffer_size; } #endif /* PS_USE_CLOBBER_ARGV */ /* * Make fixed prefix of ps display. */ #ifdef PS_USE_SETPROCTITLE /* * apparently setproctitle() already adds a `progname:' prefix to the ps * line */ #define PROGRAM_NAME_PREFIX "" #else #define PROGRAM_NAME_PREFIX "postgres: " #endif if (*cluster_name == '\0') { snprintf(ps_buffer, ps_buffer_size, PROGRAM_NAME_PREFIX "%s %s %s ", username, dbname, host_info); } else { snprintf(ps_buffer, ps_buffer_size, PROGRAM_NAME_PREFIX "%s: %s %s %s ", cluster_name, username, dbname, host_info); } ps_buffer_cur_len = ps_buffer_fixed_size = strlen(ps_buffer); set_ps_display(initial_str, true); #endif /* not PS_USE_NONE */ }
/* * perform accept() and return new fd */ static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd, struct timeval *timeout) { fd_set readmask; int fds; int save_errno; SockAddr saddr; int fd = 0; int afd; int inet = 0; POOL_CONNECTION *cp; #ifdef ACCEPT_PERFORMANCE struct timeval now1, now2; static long atime; static int cnt; #endif struct timeval *timeoutval; struct timeval tv1, tv2, tmback = {0, 0}; set_ps_display("wait for connection request", false); /* Destroy session context for just in case... */ pool_session_context_destroy(); FD_ZERO(&readmask); FD_SET(unix_fd, &readmask); if (inet_fd) FD_SET(inet_fd, &readmask); if (timeout->tv_sec == 0 && timeout->tv_usec == 0) timeoutval = NULL; else { timeoutval = timeout; tmback.tv_sec = timeout->tv_sec; tmback.tv_usec = timeout->tv_usec; gettimeofday(&tv1, NULL); #ifdef DEBUG pool_log("before select = {%d, %d}", timeoutval->tv_sec, timeoutval->tv_usec); pool_log("g:before select = {%d, %d}", tv1.tv_sec, tv1.tv_usec); #endif } fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, timeoutval); save_errno = errno; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } /* * following code fragment computes remaining timeout val in a * portable way. Linux does this automatically but other platforms do not. */ if (timeoutval) { gettimeofday(&tv2, NULL); tmback.tv_usec -= tv2.tv_usec - tv1.tv_usec; tmback.tv_sec -= tv2.tv_sec - tv1.tv_sec; if (tmback.tv_usec < 0) { tmback.tv_sec--; if (tmback.tv_sec < 0) { timeout->tv_sec = 0; timeout->tv_usec = 0; } else { tmback.tv_usec += 1000000; timeout->tv_sec = tmback.tv_sec; timeout->tv_usec = tmback.tv_usec; } } #ifdef DEBUG pool_log("g:after select = {%d, %d}", tv2.tv_sec, tv2.tv_usec); pool_log("after select = {%d, %d}", timeout->tv_sec, timeout->tv_usec); #endif } errno = save_errno; if (fds == -1) { if (errno == EAGAIN || errno == EINTR) return NULL; pool_error("select() failed. reason %s", strerror(errno)); return NULL; } /* timeout */ if (fds == 0) { return NULL; } if (FD_ISSET(unix_fd, &readmask)) { fd = unix_fd; } if (FD_ISSET(inet_fd, &readmask)) { fd = inet_fd; inet++; } /* * Note that some SysV systems do not work here. For those * systems, we need some locking mechanism for the fd. */ memset(&saddr, 0, sizeof(saddr)); saddr.salen = sizeof(saddr.addr); #ifdef ACCEPT_PERFORMANCE gettimeofday(&now1,0); #endif retry_accept: /* wait if recovery is started */ while (*InRecovery == 1) { pause(); } afd = accept(fd, (struct sockaddr *)&saddr.addr, &saddr.salen); save_errno = errno; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } errno = save_errno; if (afd < 0) { if (errno == EINTR && *InRecovery) goto retry_accept; /* * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK) * can be silently ignored. And EINTR can be ignored. */ if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) pool_error("accept() failed. reason: %s", strerror(errno)); return NULL; } #ifdef ACCEPT_PERFORMANCE gettimeofday(&now2,0); atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec); cnt++; if (cnt % 100 == 0) { pool_log("cnt: %d atime: %ld", cnt, atime); } #endif /* reload config file */ if (got_sighup) { pool_get_config(get_config_file_name(), RELOAD_CONFIG); if (pool_config->enable_pool_hba) { load_hba(get_hba_file_name()); if (strcmp("", pool_config->pool_passwd)) pool_reopen_passwd_file(); } if (pool_config->parallel_mode) pool_memset_system_db_info(system_db_info->info); got_sighup = 0; } connection_count_up(); accepted = 1; if (pool_config->parallel_mode) { /* * do not accept new connection if any of DB node or SystemDB is down when operating in * parallel mode */ int i; for (i=0;i<NUM_BACKENDS;i++) { if (BACKEND_INFO(i).backend_status == CON_DOWN || SYSDB_STATUS == CON_DOWN) { StartupPacket *sp; char *msg = "pgpool is not available in parallel query mode"; if (SYSDB_STATUS == CON_DOWN) pool_log("Cannot accept() new connection. SystemDB is down"); else pool_log("Cannot accept() new connection. %d th backend is down", i); if ((cp = pool_open(afd)) == NULL) { close(afd); child_exit(1); } sp = read_startup_packet(cp); if (sp == NULL) { /* failed to read the startup packet. return to the accept() loop */ pool_close(cp); child_exit(1); } pool_debug("do_accept: send error message to frontend"); if (sp->major == PROTO_MAJOR_V3) { char buf[256]; if (SYSDB_STATUS == CON_DOWN) snprintf(buf, sizeof(buf), "SystemDB is down"); else snprintf(buf, sizeof(buf), "%d th backend is down", i); pool_send_error_message(cp, sp->major, "08S01", msg, buf, ((SYSDB_STATUS == CON_DOWN) ? "repair the SystemDB and restart pgpool" : "repair the backend and restart pgpool"), __FILE__, __LINE__); } else { pool_send_error_message(cp, sp->major, 0, msg, "", "", "", 0); } pool_close(cp); child_exit(1); } } } else { /* * do not accept new connection if all DB nodes are down when operating in * non parallel mode */ int i; int found = 0; for (i=0;i<NUM_BACKENDS;i++) { if (VALID_BACKEND(i)) { found = 1; } } if (found == 0) { pool_log("Cannot accept() new connection. all backends are down"); child_exit(1); } } pool_debug("I am %d accept fd %d", getpid(), afd); pool_getnameinfo_all(&saddr, remote_host, remote_port); snprintf(remote_ps_data, sizeof(remote_ps_data), remote_port[0] == '\0' ? "%s" : "%s(%s)", remote_host, remote_port); set_ps_display("accept connection", false); /* log who is connecting */ if (pool_config->log_connections) { pool_log("connection received: host=%s%s%s", remote_host, remote_port[0] ? " port=" : "", remote_port); } /* set NODELAY and KEEPALIVE options if INET connection */ if (inet) { int on = 1; if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)) < 0) { pool_error("do_accept: setsockopt() failed: %s", strerror(errno)); close(afd); return NULL; } if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof(on)) < 0) { pool_error("do_accept: setsockopt() failed: %s", strerror(errno)); close(afd); return NULL; } } if ((cp = pool_open(afd)) == NULL) { close(afd); return NULL; } /* save ip address for hba */ memcpy(&cp->raddr, &saddr, sizeof(SockAddr)); if (cp->raddr.addr.ss_family == 0) cp->raddr.addr.ss_family = AF_UNIX; return cp; }
/* * main entry pont of pcp worker child process */ void pcp_worker_main(int port) { sigjmp_buf local_sigjmp_buf; MemoryContext PCPMemoryContext; int authenticated = 0; char salt[4]; int random_salt = 0; struct timeval uptime; char tos; int rsize; char *buf = NULL; ereport(DEBUG1, (errmsg("I am PCP worker child with pid:%d",getpid()))); /* Identify myself via ps */ init_ps_display("", "", "", ""); gettimeofday(&uptime, NULL); srandom((unsigned int) (getpid() ^ uptime.tv_usec)); /* set up signal handlers */ signal(SIGTERM, die); signal(SIGINT, die); signal(SIGQUIT, die); signal(SIGCHLD, SIG_DFL); signal(SIGUSR2, wakeup_handler_child); signal(SIGUSR1, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); /* Create per loop iteration memory context */ PCPMemoryContext = AllocSetContextCreate(TopMemoryContext, "PCP_worker_main_loop", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); /* * install the call back for preparation of pcp worker child exit */ on_system_exit(pcp_worker_will_go_down, (Datum)NULL); /* Initialize my backend status */ pool_initialize_private_backend_status(); /* Initialize process context */ pool_init_process_context(); pcp_frontend = pcp_open(port); unset_nonblock(pcp_frontend->fd); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; for(;;) { MemoryContextSwitchTo(PCPMemoryContext); MemoryContextResetAndDeleteChildren(PCPMemoryContext); errno = 0; /* read a PCP packet */ do_pcp_read(pcp_frontend, &tos, 1); do_pcp_read(pcp_frontend, &rsize, sizeof(int)); rsize = ntohl(rsize); if ((rsize - sizeof(int)) > 0) { buf = (char *)palloc(rsize - sizeof(int)); do_pcp_read(pcp_frontend, buf, rsize - sizeof(int)); } ereport(DEBUG1, (errmsg("received PCP packet"), errdetail("PCP packet type of service '%c'", tos))); if (tos == 'R') /* authentication */ { set_ps_display("PCP: processing authentication", false); process_authentication(pcp_frontend, buf,salt, &random_salt); authenticated = 1; continue; } if (tos == 'M') /* md5 salt */ { set_ps_display("PCP: processing authentication", false); send_md5salt(pcp_frontend, salt); random_salt = 1; continue; } /* is this connection authenticated? if not disconnect immediately*/ if (!authenticated) ereport(FATAL, (errmsg("authentication failed for new PCP connection"), errdetail("connection not authorized"))); /* process a request */ pcp_process_command(tos, buf, rsize); } exit(0); }
/* * PerformAuthentication -- authenticate a remote client * * returns: nothing. Will not return at all if there's any failure. */ static void PerformAuthentication(Port *port) { /* This should be set already, but let's make sure */ ClientAuthInProgress = true; /* limit visibility of log messages */ /* * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.conf * etcetera from the postmaster, and have to load them ourselves. * * FIXME: [fork/exec] Ugh. Is there a way around this overhead? */ #ifdef EXEC_BACKEND if (!load_hba()) { /* * It makes no sense to continue if we fail to load the HBA file, * since there is no way to connect to the database in this case. */ ereport(FATAL, (errmsg("could not load pg_hba.conf"))); } if (!load_ident()) { /* * It is ok to continue if we fail to load the IDENT file, although it * means that you cannot log in using any of the authentication * methods that need a user name mapping. load_ident() already logged * the details of error to the log. */ } #endif /* * Set up a timeout in case a buggy or malicious client fails to respond * during authentication. Since we're inside a transaction and might do * database access, we have to use the statement_timeout infrastructure. */ enable_timeout_after(STATEMENT_TIMEOUT, AuthenticationTimeout * 1000); /* * Now perform authentication exchange. */ ClientAuthentication(port); /* might not return, if failure */ /* * Done with authentication. Disable the timeout, and log if needed. */ disable_timeout(STATEMENT_TIMEOUT, false); if (Log_connections) { if (am_walsender) { #ifdef USE_SSL if (port->ssl) ereport(LOG, (errmsg("replication connection authorized: user=%s SSL enabled (protocol=%s, cipher=%s, compression=%s)", port->user_name, SSL_get_version(port->ssl), SSL_get_cipher(port->ssl), SSL_get_current_compression(port->ssl) ? _("on") : _("off")))); else #endif ereport(LOG, (errmsg("replication connection authorized: user=%s", port->user_name))); } else { #ifdef USE_SSL if (port->ssl) ereport(LOG, (errmsg("connection authorized: user=%s database=%s SSL enabled (protocol=%s, cipher=%s, compression=%s)", port->user_name, port->database_name, SSL_get_version(port->ssl), SSL_get_cipher(port->ssl), SSL_get_current_compression(port->ssl) ? _("on") : _("off")))); else #endif ereport(LOG, (errmsg("connection authorized: user=%s database=%s", port->user_name, port->database_name))); } } set_ps_display("startup", false); ClientAuthInProgress = false; /* client_min_messages is active now */ }
/* * pgarch_archiveXlog * * Invokes system(3) to copy one archive file to wherever it should go * * Returns true if successful */ static bool pgarch_archiveXlog(char *xlog) { char xlogarchcmd[MAXPGPATH]; char pathname[MAXPGPATH]; char activitymsg[MAXFNAMELEN + 16]; char *dp; char *endp; const char *sp; int rc; snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog); /* * construct the command to be executed */ dp = xlogarchcmd; endp = xlogarchcmd + MAXPGPATH - 1; *endp = '\0'; for (sp = XLogArchiveCommand; *sp; sp++) { if (*sp == '%') { switch (sp[1]) { case 'p': /* %p: relative path of source file */ sp++; strlcpy(dp, pathname, endp - dp); make_native_path(dp); dp += strlen(dp); break; case 'f': /* %f: filename of source file */ sp++; strlcpy(dp, xlog, endp - dp); dp += strlen(dp); break; case '%': /* convert %% to a single % */ sp++; if (dp < endp) *dp++ = *sp; break; default: /* otherwise treat the % as not special */ if (dp < endp) *dp++ = *sp; break; } } else { if (dp < endp) *dp++ = *sp; } } *dp = '\0'; ereport(DEBUG3, (errmsg_internal("executing archive command \"%s\"", xlogarchcmd))); /* Report archive activity in PS display */ snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog); set_ps_display(activitymsg, false); rc = system(xlogarchcmd); if (rc != 0) { /* * If either the shell itself, or a called command, died on a signal, * abort the archiver. We do this because system() ignores SIGINT and * SIGQUIT while waiting; so a signal is very likely something that * should have interrupted us too. If we overreact it's no big deal, * the postmaster will just start the archiver again. * * Per the Single Unix Spec, shells report exit status > 128 when a * called command died on a signal. */ int lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG; if (WIFEXITED(rc)) { ereport(lev, (errmsg("archive command failed with exit code %d", WEXITSTATUS(rc)), errdetail("The failed archive command was: %s", xlogarchcmd))); } else if (WIFSIGNALED(rc)) { #if defined(WIN32) ereport(lev, (errmsg("archive command was terminated by exception 0x%X", WTERMSIG(rc)), errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."), errdetail("The failed archive command was: %s", xlogarchcmd))); #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST ereport(lev, (errmsg("archive command was terminated by signal %d: %s", WTERMSIG(rc), WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"), errdetail("The failed archive command was: %s", xlogarchcmd))); #else ereport(lev, (errmsg("archive command was terminated by signal %d", WTERMSIG(rc)), errdetail("The failed archive command was: %s", xlogarchcmd))); #endif } else { ereport(lev, (errmsg("archive command exited with unrecognized status %d", rc), errdetail("The failed archive command was: %s", xlogarchcmd))); } snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog); set_ps_display(activitymsg, false); return false; } ereport(DEBUG1, (errmsg("archived transaction log file \"%s\"", xlog))); snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog); set_ps_display(activitymsg, false); return true; }
/* * child main loop */ void do_child(int unix_fd, int inet_fd) { POOL_CONNECTION *frontend; POOL_CONNECTION_POOL *backend; struct timeval now; struct timezone tz; struct timeval timeout; static int connected; /* non 0 if has been accepted connections from frontend */ int connections_count = 0; /* used if child_max_connections > 0 */ int found; char psbuf[NI_MAXHOST + 128]; pool_debug("I am %d", getpid()); /* Identify myself via ps */ init_ps_display("", "", "", ""); /* set up signal handlers */ signal(SIGALRM, SIG_DFL); signal(SIGTERM, die); signal(SIGINT, die); signal(SIGHUP, reload_config_handler); signal(SIGQUIT, die); signal(SIGCHLD, SIG_DFL); signal(SIGUSR1, close_idle_connection); signal(SIGUSR2, wakeup_handler); signal(SIGPIPE, SIG_IGN); #ifdef NONE_BLOCK /* set listen fds to none-blocking */ pool_set_nonblock(unix_fd); if (inet_fd) { pool_set_nonblock(inet_fd); } #endif /* Initialize my backend status */ pool_initialize_private_backend_status(); /* Initialize per process context */ pool_init_process_context(); /* initialize random seed */ gettimeofday(&now, &tz); #if defined(sun) || defined(__sun) srand((unsigned int) now.tv_usec); #else srandom((unsigned int) now.tv_usec); #endif /* initialize system db connection */ init_system_db_connection(); /* initialize connection pool */ if (pool_init_cp()) { child_exit(1); } /* * Open pool_passwd in child process. This is necessary to avoid the * file descriptor race condition reported in [pgpool-general: 1141]. */ if (strcmp("", pool_config->pool_passwd)) { pool_reopen_passwd_file(); } timeout.tv_sec = pool_config->child_life_time; timeout.tv_usec = 0; for (;;) { StartupPacket *sp; idle = 1; /* pgpool stop request already sent? */ check_stop_request(); /* Check if restart request is set because of failback event * happend. If so, exit myself with exit code 1 to be * restarted by pgpool parent. */ if (pool_get_my_process_info()->need_to_restart) { pool_log("do_child: failback event found. restart myself."); pool_get_my_process_info()->need_to_restart = 0; child_exit(1); } accepted = 0; /* perform accept() */ frontend = do_accept(unix_fd, inet_fd, &timeout); if (frontend == NULL) /* connection request from frontend timed out */ { /* check select() timeout */ if (connected && pool_config->child_life_time > 0 && timeout.tv_sec == 0 && timeout.tv_usec == 0) { pool_debug("child life %d seconds expired", pool_config->child_life_time); /* * Doesn't need to call this. child_exit() calls it. * send_frontend_exits(); */ child_exit(2); } continue; } /* set frontend fd to blocking */ pool_unset_nonblock(frontend->fd); /* reset busy flag */ idle = 0; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } /* read the startup packet */ retry_startup: sp = read_startup_packet(frontend); if (sp == NULL) { /* failed to read the startup packet. return to the accept() loop */ pool_close(frontend); connection_count_down(); continue; } /* cancel request? */ if (sp->major == 1234 && sp->minor == 5678) { cancel_request((CancelPacket *)sp->startup_packet); pool_close(frontend); pool_free_startup_packet(sp); connection_count_down(); continue; } /* SSL? */ if (sp->major == 1234 && sp->minor == 5679 && !frontend->ssl_active) { pool_debug("SSLRequest from client"); pool_ssl_negotiate_serverclient(frontend); goto retry_startup; } if (pool_config->enable_pool_hba) { /* * do client authentication. * Note that ClientAuthentication does not return if frontend * was rejected; it simply terminates this process. */ frontend->protoVersion = sp->major; frontend->database = strdup(sp->database); if (frontend->database == NULL) { pool_error("do_child: strdup failed: %s\n", strerror(errno)); child_exit(1); } frontend->username = strdup(sp->user); if (frontend->username == NULL) { pool_error("do_child: strdup failed: %s\n", strerror(errno)); child_exit(1); } ClientAuthentication(frontend); } /* * Ok, negotiation with frontend has been done. Let's go to the * next step. Connect to backend if there's no existing * connection which can be reused by this frontend. * Authentication is also done in this step. */ /* Check if restart request is set because of failback event * happend. If so, close idle connections to backend and make * a new copy of backend status. */ if (pool_get_my_process_info()->need_to_restart) { pool_log("do_child: failback event found. discard existing connections"); pool_get_my_process_info()->need_to_restart = 0; close_idle_connection(0); pool_initialize_private_backend_status(); } /* * if there's no connection associated with user and database, * we need to connect to the backend and send the startup packet. */ /* look for existing connection */ found = 0; backend = pool_get_cp(sp->user, sp->database, sp->major, 1); if (backend != NULL) { found = 1; /* existing connection associated with same user/database/major found. * however we should make sure that the startup packet contents are identical. * OPTION data and others might be different. */ if (sp->len != MASTER_CONNECTION(backend)->sp->len) { pool_debug("do_child: connection exists but startup packet length is not identical"); found = 0; } else if(memcmp(sp->startup_packet, MASTER_CONNECTION(backend)->sp->startup_packet, sp->len) != 0) { pool_debug("do_child: connection exists but startup packet contents is not identical"); found = 0; } if (found == 0) { /* we need to discard existing connection since startup packet is different */ pool_discard_cp(sp->user, sp->database, sp->major); backend = NULL; } } if (backend == NULL) { /* create a new connection to backend */ if ((backend = connect_backend(sp, frontend)) == NULL) { connection_count_down(); continue; } } else { /* reuse existing connection */ if (!connect_using_existing_connection(frontend, backend, sp)) continue; } connected = 1; /* show ps status */ sp = MASTER_CONNECTION(backend)->sp; snprintf(psbuf, sizeof(psbuf), "%s %s %s idle", sp->user, sp->database, remote_ps_data); set_ps_display(psbuf, false); /* * Initialize per session context */ pool_init_session_context(frontend, backend); /* Mark this connection pool is connected from frontend */ pool_coninfo_set_frontend_connected(pool_get_process_context()->proc_id, pool_pool_index()); /* query process loop */ for (;;) { POOL_STATUS status; status = pool_process_query(frontend, backend, 0); sp = MASTER_CONNECTION(backend)->sp; switch (status) { /* client exits */ case POOL_END: /* * do not cache connection if: * pool_config->connection_cahe == 0 or * database name is template0, template1, postgres or regression */ if (pool_config->connection_cache == 0 || !strcmp(sp->database, "template0") || !strcmp(sp->database, "template1") || !strcmp(sp->database, "postgres") || !strcmp(sp->database, "regression")) { reset_connection(); pool_close(frontend); pool_send_frontend_exits(backend); pool_discard_cp(sp->user, sp->database, sp->major); } else { POOL_STATUS status1; /* send reset request to backend */ status1 = pool_process_query(frontend, backend, 1); pool_close(frontend); /* if we detect errors on resetting connection, we need to discard * this connection since it might be in unknown status */ if (status1 != POOL_CONTINUE) { pool_debug("error in resetting connections. discarding connection pools..."); pool_send_frontend_exits(backend); pool_discard_cp(sp->user, sp->database, sp->major); } else pool_connection_pool_timer(backend); } break; /* error occurred. discard backend connection pool and disconnect connection to the frontend */ case POOL_ERROR: pool_log("do_child: exits with status 1 due to error"); child_exit(1); break; /* fatal error occurred. just exit myself... */ case POOL_FATAL: notice_backend_error(1); child_exit(1); break; /* not implemented yet */ case POOL_IDLE: do_accept(unix_fd, inet_fd, &timeout); pool_debug("accept while idle"); break; default: break; } if (status != POOL_CONTINUE) break; } /* Destroy session context */ pool_session_context_destroy(); /* Mark this connection pool is not connected from frontend */ pool_coninfo_unset_frontend_connected(pool_get_process_context()->proc_id, pool_pool_index()); accepted = 0; connection_count_down(); timeout.tv_sec = pool_config->child_life_time; timeout.tv_usec = 0; /* increment queries counter if necessary */ if ( pool_config->child_max_connections > 0 ) connections_count++; /* check if maximum connections count for this child reached */ if ( ( pool_config->child_max_connections > 0 ) && ( connections_count >= pool_config->child_max_connections ) ) { pool_log("child exiting, %d connections reached", pool_config->child_max_connections); send_frontend_exits(); child_exit(2); } } child_exit(0); }