/* * START_REPLICATION */ static void StartReplication(StartReplicationCmd *cmd) { StringInfoData buf; /* * Let postmaster know that we're streaming. Once we've declared us as a * WAL sender process, postmaster will let us outlive the bgwriter and * kill us last in the shutdown sequence, so we get a chance to stream all * remaining WAL at shutdown, including the shutdown checkpoint. Note that * there's no going back, and we mustn't write any WAL records after this. */ MarkPostmasterChildWalSender(); SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE); /* * Check that we're logging enough information in the WAL for * log-shipping. * * NOTE: This only checks the current value of wal_level. Even if the * current setting is not 'minimal', there can be old WAL in the pg_xlog * directory that was created with 'minimal'. So this is not bulletproof, * the purpose is just to give a user-friendly error message that hints * how to configure the system correctly. */ if (wal_level == WAL_LEVEL_MINIMAL) ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("standby connections not allowed because wal_level=minimal"))); /* * When we first start replication the standby will be behind the primary. * For some applications, for example, synchronous replication, it is * important to have a clear state for this initial catchup mode, so we * can trigger actions when we change streaming state later. We may stay * in this state for a long time, which is exactly why we want to be able * to monitor whether or not we are still here. */ WalSndSetState(WALSNDSTATE_CATCHUP); /* Send a CopyBothResponse message, and start streaming */ pq_beginmessage(&buf, 'W'); pq_sendbyte(&buf, 0); pq_sendint(&buf, 0, 2); pq_endmessage(&buf); pq_flush(); /* * Initialize position to the received one, then the xlog records begin to * be shipped from that position */ sentPtr = cmd->startpoint; }
/* * Rotate log file * * Permission checking for this function is managed through the normal * GRANT system. */ Datum pg_rotate_logfile(PG_FUNCTION_ARGS) { if (!Logging_collector) { ereport(WARNING, (errmsg("rotation not possible because log collection not active"))); PG_RETURN_BOOL(false); } SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE); PG_RETURN_BOOL(true); }
/* * SIInsertDataEntry * Add a new invalidation message to the buffer. * * If we are unable to insert the message because the buffer is full, * then clear the buffer and assert the "reset" flag to each backend. * This will cause all the backends to discard *all* invalidatable state. * * Returns true for normal successful insertion, false if had to reset. */ bool SIInsertDataEntry(SISeg *segP, SharedInvalidationMessage *data) { int numMsgs = segP->maxMsgNum - segP->minMsgNum; /* Is the buffer full? */ if (numMsgs >= MAXNUMMESSAGES) { /* * Don't panic just yet: slowest backend might have consumed some * messages but not yet have done SIDelExpiredDataEntries() to advance * minMsgNum. So, make sure minMsgNum is up-to-date. */ SIDelExpiredDataEntries(segP); numMsgs = segP->maxMsgNum - segP->minMsgNum; if (numMsgs >= MAXNUMMESSAGES) { /* Yup, it's definitely full, no choice but to reset */ SISetProcStateInvalid(segP); return false; } } /* * Try to prevent table overflow. When the table is 70% full send a * WAKEN_CHILDREN request to the postmaster. The postmaster will send a * SIGUSR1 signal to all the backends, which will cause sinval.c to read * any pending SI entries. * * This should never happen if all the backends are actively executing * queries, but if a backend is sitting idle then it won't be starting * transactions and so won't be reading SI entries. */ if (numMsgs == (MAXNUMMESSAGES * 70 / 100) && IsUnderPostmaster) { elog(DEBUG4, "SI table is 70%% full, signaling postmaster"); SendPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN); } /* * Insert new message into proper slot of circular buffer */ segP->buffer[segP->maxMsgNum % MAXNUMMESSAGES] = *data; segP->maxMsgNum++; return true; }
/* * Rotate log file */ Datum pg_rotate_logfile(PG_FUNCTION_ARGS) { if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to rotate log files")))); if (!Logging_collector) { ereport(WARNING, (errmsg("rotation not possible because log collection not active"))); PG_RETURN_BOOL(false); } SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE); PG_RETURN_BOOL(true); }
/* * Request postmaster to start walreceiver. * * recptr indicates the position where streaming should begin, and conninfo * is a libpq connection string to use. */ void RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo) { /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; pg_time_t now = (pg_time_t) time(NULL); /* * We always start at the beginning of the segment. That prevents a broken * segment (i.e., with no records in the first half of a segment) from * being created by XLOG streaming, which might cause trouble later on if * the segment is e.g archived. */ if (recptr.xrecoff % XLogSegSize != 0) recptr.xrecoff -= recptr.xrecoff % XLogSegSize; SpinLockAcquire(&walrcv->mutex); /* It better be stopped before we try to restart it */ Assert(walrcv->walRcvState == WALRCV_STOPPED); if (conninfo != NULL) strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO); else walrcv->conninfo[0] = '\0'; walrcv->walRcvState = WALRCV_STARTING; walrcv->startTime = now; /* * If this is the first startup of walreceiver, we initialize receivedUpto * and latestChunkStart to receiveStart. */ if (walrcv->receiveStart.xlogid == 0 && walrcv->receiveStart.xrecoff == 0) { walrcv->receivedUpto = recptr; walrcv->latestChunkStart = recptr; } walrcv->receiveStart = recptr; SpinLockRelease(&walrcv->mutex); SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER); }
/* * Rotate log file * * This function is kept to support adminpack 1.0. */ Datum pg_rotate_logfile(PG_FUNCTION_ARGS) { if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to rotate log files with adminpack 1.0"), errhint("Consider using pg_logfile_rotate(), which is part of core, instead.")))); if (!Logging_collector) { ereport(WARNING, (errmsg("rotation not possible because log collection not active"))); PG_RETURN_BOOL(false); } SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE); PG_RETURN_BOOL(true); }
/* * START_REPLICATION */ static void StartReplication(StartReplicationCmd *cmd) { StringInfoData buf; /* * Let postmaster know that we're streaming. Once we've declared us as a * WAL sender process, postmaster will let us outlive the bgwriter and * kill us last in the shutdown sequence, so we get a chance to stream all * remaining WAL at shutdown, including the shutdown checkpoint. Note that * there's no going back, and we mustn't write any WAL records after this. */ MarkPostmasterChildWalSender(); SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE); /* * When promoting a cascading standby, postmaster sends SIGUSR2 to * any cascading walsenders to kill them. But there is a corner-case where * such walsender fails to receive SIGUSR2 and survives a standby promotion * unexpectedly. This happens when postmaster sends SIGUSR2 before * the walsender marks itself as a WAL sender, because postmaster sends * SIGUSR2 to only the processes marked as a WAL sender. * * To avoid this corner-case, if recovery is NOT in progress even though * the walsender is cascading one, we do the same thing as SIGUSR2 signal * handler does, i.e., set walsender_ready_to_stop to true. Which causes * the walsender to end later. * * When terminating cascading walsenders, usually postmaster writes * the log message announcing the terminations. But there is a race condition * here. If there is no walsender except this process before reaching here, * postmaster thinks that there is no walsender and suppresses that * log message. To handle this case, we always emit that log message here. * This might cause duplicate log messages, but which is less likely to happen, * so it's not worth writing some code to suppress them. */ if (am_cascading_walsender && !RecoveryInProgress()) { ereport(LOG, (errmsg("terminating walsender process to force cascaded standby " "to update timeline and reconnect"))); walsender_ready_to_stop = true; } /* * We assume here that we're logging enough information in the WAL for * log-shipping, since this is checked in PostmasterMain(). * * NOTE: wal_level can only change at shutdown, so in most cases it is * difficult for there to be WAL data that we can still see that was written * at wal_level='minimal'. */ /* * When we first start replication the standby will be behind the primary. * For some applications, for example, synchronous replication, it is * important to have a clear state for this initial catchup mode, so we * can trigger actions when we change streaming state later. We may stay * in this state for a long time, which is exactly why we want to be able * to monitor whether or not we are still here. */ WalSndSetState(WALSNDSTATE_CATCHUP); /* Send a CopyBothResponse message, and start streaming */ pq_beginmessage(&buf, 'W'); pq_sendbyte(&buf, 0); pq_sendint(&buf, 0, 2); pq_endmessage(&buf); pq_flush(); /* * Initialize position to the received one, then the xlog records begin to * be shipped from that position */ sentPtr = cmd->startpoint; }
/* * Request postmaster to start walreceiver. * * recptr indicates the position where streaming should begin, conninfo * is a libpq connection string to use, and slotname is, optionally, the name * of a replication slot to acquire. */ void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo, const char *slotname) { WalRcvData *walrcv = WalRcv; bool launch = false; pg_time_t now = (pg_time_t) time(NULL); /* * We always start at the beginning of the segment. That prevents a broken * segment (i.e., with no records in the first half of a segment) from * being created by XLOG streaming, which might cause trouble later on if * the segment is e.g archived. */ if (recptr % XLogSegSize != 0) recptr -= recptr % XLogSegSize; SpinLockAcquire(&walrcv->mutex); /* It better be stopped if we try to restart it */ Assert(walrcv->walRcvState == WALRCV_STOPPED || walrcv->walRcvState == WALRCV_WAITING); if (conninfo != NULL) strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO); else walrcv->conninfo[0] = '\0'; if (slotname != NULL) strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN); else walrcv->slotname[0] = '\0'; if (walrcv->walRcvState == WALRCV_STOPPED) { launch = true; walrcv->walRcvState = WALRCV_STARTING; } else walrcv->walRcvState = WALRCV_RESTARTING; walrcv->startTime = now; /* * If this is the first startup of walreceiver (on this timeline), * initialize receivedUpto and latestChunkStart to the starting point. */ if (walrcv->receiveStart == 0 || walrcv->receivedTLI != tli) { walrcv->receivedUpto = recptr; walrcv->receivedTLI = tli; walrcv->latestChunkStart = recptr; } walrcv->receiveStart = recptr; walrcv->receiveStartTLI = tli; SpinLockRelease(&walrcv->mutex); if (launch) SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER); else SetLatch(&walrcv->latch); }
/* * We have to cut&paste copde of GetNewTransactionId from varsup because we change way of advancing ShmemVariableCache->nextXid */ TransactionId DtmGetNewTransactionId(bool isSubXact) { TransactionId xid; XTM_INFO("%d: GetNewTransactionId\n", getpid()); /* * Workers synchronize transaction state at the beginning of each parallel * operation, so we can't account for new XIDs after that point. */ if (IsInParallelMode()) elog(ERROR, "cannot assign TransactionIds during a parallel operation"); /* * During bootstrap initialization, we return the special bootstrap * transaction id. */ if (IsBootstrapProcessingMode()) { Assert(!isSubXact); MyPgXact->xid = BootstrapTransactionId; return BootstrapTransactionId; } /* safety check, we should never get this far in a HS slave */ if (RecoveryInProgress()) elog(ERROR, "cannot assign TransactionIds during recovery"); LWLockAcquire(XidGenLock, LW_EXCLUSIVE); xid = DtmGetNextXid(); /*---------- * Check to see if it's safe to assign another XID. This protects against * catastrophic data loss due to XID wraparound. The basic rules are: * * If we're past xidVacLimit, start trying to force autovacuum cycles. * If we're past xidWarnLimit, start issuing warnings. * If we're past xidStopLimit, refuse to execute transactions, unless * we are running in single-user mode (which gives an escape hatch * to the DBA who somehow got past the earlier defenses). * * Note that this coding also appears in GetNewMultiXactId. *---------- */ if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidVacLimit)) { /* * For safety's sake, we release XidGenLock while sending signals, * warnings, etc. This is not so much because we care about * preserving concurrency in this situation, as to avoid any * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit; TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit; TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit; Oid oldest_datoid = ShmemVariableCache->oldestXidDB; LWLockRelease(XidGenLock); /* * To avoid swamping the postmaster with signals, we issue the autovac * request only once per 64K transaction starts. This still gives * plenty of chances before we get into real trouble. */ if (IsUnderPostmaster && (xid % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); if (IsUnderPostmaster && TransactionIdFollowsOrEquals(xid, xidStopLimit)) { char *oldest_datname = get_database_name(oldest_datoid); /* complain even if that DB has disappeared */ if (oldest_datname) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"", oldest_datname), errhint("Stop the postmaster and vacuum that database in single-user mode.\n" "You might also need to commit or roll back old prepared transactions."))); else ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u", oldest_datoid), errhint("Stop the postmaster and vacuum that database in single-user mode.\n" "You might also need to commit or roll back old prepared transactions."))); } else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) { char *oldest_datname = get_database_name(oldest_datoid); /* complain even if that DB has disappeared */ if (oldest_datname) ereport(WARNING, (errmsg("database \"%s\" must be vacuumed within %u transactions", oldest_datname, xidWrapLimit - xid), errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions."))); else ereport(WARNING, (errmsg("database with OID %u must be vacuumed within %u transactions", oldest_datoid, xidWrapLimit - xid), errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions."))); } /* Re-acquire lock and start over */ LWLockAcquire(XidGenLock, LW_EXCLUSIVE); xid = DtmGetNextXid(); } /* * If we are allocating the first XID of a new page of the commit log, * zero out that commit-log page before returning. We must do this while * holding XidGenLock, else another xact could acquire and commit a later * XID before we zero the page. Fortunately, a page of the commit log * holds 32K or more transactions, so we don't have to do this very often. * * Extend pg_subtrans and pg_commit_ts too. */ if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->nextXid)) { ExtendCLOG(xid); ExtendCommitTs(xid); ExtendSUBTRANS(xid); } /* * Now advance the nextXid counter. This must not happen until after we * have successfully completed ExtendCLOG() --- if that routine fails, we * want the next incoming transaction to try it again. We cannot assign * more XIDs until there is CLOG space for them. */ if (xid == ShmemVariableCache->nextXid) TransactionIdAdvance(ShmemVariableCache->nextXid); else Assert(TransactionIdPrecedes(xid, ShmemVariableCache->nextXid)); /* * We must store the new XID into the shared ProcArray before releasing * XidGenLock. This ensures that every active XID older than * latestCompletedXid is present in the ProcArray, which is essential for * correct OldestXmin tracking; see src/backend/access/transam/README. * * XXX by storing xid into MyPgXact without acquiring ProcArrayLock, we * are relying on fetch/store of an xid to be atomic, else other backends * might see a partially-set xid here. But holding both locks at once * would be a nasty concurrency hit. So for now, assume atomicity. * * Note that readers of PGXACT xid fields should be careful to fetch the * value only once, rather than assume they can read a value multiple * times and get the same answer each time. * * The same comments apply to the subxact xid count and overflow fields. * * A solution to the atomic-store problem would be to give each PGXACT its * own spinlock used only for fetching/storing that PGXACT's xid and * related fields. * * If there's no room to fit a subtransaction XID into PGPROC, set the * cache-overflowed flag instead. This forces readers to look in * pg_subtrans to map subtransaction XIDs up to top-level XIDs. There is a * race-condition window, in that the new XID will not appear as running * until its parent link has been placed into pg_subtrans. However, that * will happen before anyone could possibly have a reason to inquire about * the status of the XID, so it seems OK. (Snapshots taken during this * window *will* include the parent XID, so they will deliver the correct * answer later on when someone does have a reason to inquire.) */ { /* * Use volatile pointer to prevent code rearrangement; other backends * could be examining my subxids info concurrently, and we don't want * them to see an invalid intermediate state, such as incrementing * nxids before filling the array entry. Note we are assuming that * TransactionId and int fetch/store are atomic. */ volatile PGPROC *myproc = MyProc; volatile PGXACT *mypgxact = MyPgXact; if (!isSubXact) mypgxact->xid = xid; else { int nxids = mypgxact->nxids; if (nxids < PGPROC_MAX_CACHED_SUBXIDS) { myproc->subxids.xids[nxids] = xid; mypgxact->nxids = nxids + 1; } else mypgxact->overflowed = true; } } LWLockRelease(XidGenLock); return xid; }