Пример #1
0
/*
 * START_REPLICATION
 */
static void
StartReplication(StartReplicationCmd *cmd)
{
    StringInfoData buf;

    /*
     * Let postmaster know that we're streaming. Once we've declared us as a
     * WAL sender process, postmaster will let us outlive the bgwriter and
     * kill us last in the shutdown sequence, so we get a chance to stream all
     * remaining WAL at shutdown, including the shutdown checkpoint. Note that
     * there's no going back, and we mustn't write any WAL records after this.
     */
    MarkPostmasterChildWalSender();
    SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE);

    /*
     * Check that we're logging enough information in the WAL for
     * log-shipping.
     *
     * NOTE: This only checks the current value of wal_level. Even if the
     * current setting is not 'minimal', there can be old WAL in the pg_xlog
     * directory that was created with 'minimal'. So this is not bulletproof,
     * the purpose is just to give a user-friendly error message that hints
     * how to configure the system correctly.
     */
    if (wal_level == WAL_LEVEL_MINIMAL)
        ereport(FATAL,
                (errcode(ERRCODE_CANNOT_CONNECT_NOW),
                 errmsg("standby connections not allowed because wal_level=minimal")));

    /*
     * When we first start replication the standby will be behind the primary.
     * For some applications, for example, synchronous replication, it is
     * important to have a clear state for this initial catchup mode, so we
     * can trigger actions when we change streaming state later. We may stay
     * in this state for a long time, which is exactly why we want to be able
     * to monitor whether or not we are still here.
     */
    WalSndSetState(WALSNDSTATE_CATCHUP);

    /* Send a CopyBothResponse message, and start streaming */
    pq_beginmessage(&buf, 'W');
    pq_sendbyte(&buf, 0);
    pq_sendint(&buf, 0, 2);
    pq_endmessage(&buf);
    pq_flush();

    /*
     * Initialize position to the received one, then the xlog records begin to
     * be shipped from that position
     */
    sentPtr = cmd->startpoint;
}
Пример #2
0
/* Main loop of walsender process */
static int
WalSndLoop(void)
{
	char	   *output_message;
	bool		caughtup = false;

	/*
	 * Allocate buffer that will be used for each output message.  We do this
	 * just once to reduce palloc overhead.  The buffer must be made large
	 * enough for maximum-sized messages.
	 */
	output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE);

	/*
	 * Allocate buffer that will be used for processing reply messages.  As
	 * above, do this just once to reduce palloc overhead.
	 */
	initStringInfo(&reply_message);

	/* Initialize the last reply timestamp */
	last_reply_timestamp = GetCurrentTimestamp();

	/* Loop forever, unless we get an error */
	for (;;)
	{
		/* Clear any already-pending wakeups */
		ResetLatch(&MyWalSnd->latch);

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive())
			exit(1);

		/* Process any requests or signals received recently */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
			SyncRepInitConfig();
		}

		/* Normal exit from the walsender is here */
		if (walsender_shutdown_requested)
		{
			/* Inform the standby that XLOG streaming is done */
			pq_puttextmessage('C', "COPY 0");
			pq_flush();

			proc_exit(0);
		}

		/* Check for input from the client */
		ProcessRepliesIfAny();

		/*
		 * If we don't have any pending data in the output buffer, try to send
		 * some more.  If there is some, we don't bother to call XLogSend
		 * again until we've flushed it ... but we'd better assume we are not
		 * caught up.
		 */
		if (!pq_is_send_pending())
			XLogSend(output_message, &caughtup);
		else
			caughtup = false;

		/* Try to flush pending output to the client */
		if (pq_flush_if_writable() != 0)
			break;

		/* If nothing remains to be sent right now ... */
		if (caughtup && !pq_is_send_pending())
		{
			/*
			 * If we're in catchup state, move to streaming.  This is an
			 * important state change for users to know about, since before
			 * this point data loss might occur if the primary dies and we
			 * need to failover to the standby. The state change is also
			 * important for synchronous replication, since commits that
			 * started to wait at that point might wait for some time.
			 */
			if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
			{
				ereport(DEBUG1,
						(errmsg("standby \"%s\" has now caught up with primary",
								application_name)));
				WalSndSetState(WALSNDSTATE_STREAMING);
			}

			/*
			 * When SIGUSR2 arrives, we send any outstanding logs up to the
			 * shutdown checkpoint record (i.e., the latest record) and exit.
			 * This may be a normal termination at shutdown, or a promotion,
			 * the walsender is not sure which.
			 */
			if (walsender_ready_to_stop)
			{
				/* ... let's just be real sure we're caught up ... */
				XLogSend(output_message, &caughtup);
				if (caughtup && !pq_is_send_pending())
				{
					walsender_shutdown_requested = true;
					continue;		/* don't want to wait more */
				}
			}
		}

		/*
		 * We don't block if not caught up, unless there is unsent data
		 * pending in which case we'd better block until the socket is
		 * write-ready.  This test is only needed for the case where XLogSend
		 * loaded a subset of the available data but then pq_flush_if_writable
		 * flushed it all --- we should immediately try to send more.
		 */
		if (caughtup || pq_is_send_pending())
		{
			TimestampTz finish_time = 0;
			long		sleeptime = -1;
			int			wakeEvents;

			wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
				WL_SOCKET_READABLE;
			if (pq_is_send_pending())
				wakeEvents |= WL_SOCKET_WRITEABLE;

			/* Determine time until replication timeout */
			if (replication_timeout > 0)
			{
				long		secs;
				int			usecs;

				finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp,
														  replication_timeout);
				TimestampDifference(GetCurrentTimestamp(),
									finish_time, &secs, &usecs);
				sleeptime = secs * 1000 + usecs / 1000;
				/* Avoid Assert in WaitLatchOrSocket if timeout is past */
				if (sleeptime < 0)
					sleeptime = 0;
				wakeEvents |= WL_TIMEOUT;
			}

			/* Sleep until something happens or replication timeout */
			WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents,
							  MyProcPort->sock, sleeptime);

			/*
			 * Check for replication timeout.  Note we ignore the corner case
			 * possibility that the client replied just as we reached the
			 * timeout ... he's supposed to reply *before* that.
			 */
			if (replication_timeout > 0 &&
				GetCurrentTimestamp() >= finish_time)
			{
				/*
				 * Since typically expiration of replication timeout means
				 * communication problem, we don't send the error message to
				 * the standby.
				 */
				ereport(COMMERROR,
						(errmsg("terminating walsender process due to replication timeout")));
				break;
			}
		}
	}

	/*
	 * Get here on send failure.  Clean up and exit.
	 *
	 * Reset whereToSendOutput to prevent ereport from attempting to send any
	 * more messages to the standby.
	 */
	if (whereToSendOutput == DestRemote)
		whereToSendOutput = DestNone;

	proc_exit(0);
	return 1;					/* keep the compiler quiet */
}
Пример #3
0
/*
 * START_REPLICATION
 */
static void
StartReplication(StartReplicationCmd *cmd)
{
	StringInfoData buf;

	/*
	 * Let postmaster know that we're streaming. Once we've declared us as a
	 * WAL sender process, postmaster will let us outlive the bgwriter and
	 * kill us last in the shutdown sequence, so we get a chance to stream all
	 * remaining WAL at shutdown, including the shutdown checkpoint. Note that
	 * there's no going back, and we mustn't write any WAL records after this.
	 */
	MarkPostmasterChildWalSender();
	SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE);

	/*
	 * When promoting a cascading standby, postmaster sends SIGUSR2 to
	 * any cascading walsenders to kill them. But there is a corner-case where
	 * such walsender fails to receive SIGUSR2 and survives a standby promotion
	 * unexpectedly. This happens when postmaster sends SIGUSR2 before
	 * the walsender marks itself as a WAL sender, because postmaster sends
	 * SIGUSR2 to only the processes marked as a WAL sender.
	 *
	 * To avoid this corner-case, if recovery is NOT in progress even though
	 * the walsender is cascading one, we do the same thing as SIGUSR2 signal
	 * handler does, i.e., set walsender_ready_to_stop to true. Which causes
	 * the walsender to end later.
	 *
	 * When terminating cascading walsenders, usually postmaster writes
	 * the log message announcing the terminations. But there is a race condition
	 * here. If there is no walsender except this process before reaching here,
	 * postmaster thinks that there is no walsender and suppresses that
	 * log message. To handle this case, we always emit that log message here.
	 * This might cause duplicate log messages, but which is less likely to happen,
	 * so it's not worth writing some code to suppress them.
	 */
	if (am_cascading_walsender && !RecoveryInProgress())
	{
		ereport(LOG,
				(errmsg("terminating walsender process to force cascaded standby "
						"to update timeline and reconnect")));
		walsender_ready_to_stop = true;
	}

	/*
	 * We assume here that we're logging enough information in the WAL for
	 * log-shipping, since this is checked in PostmasterMain().
	 *
	 * NOTE: wal_level can only change at shutdown, so in most cases it is
	 * difficult for there to be WAL data that we can still see that was written
	 * at wal_level='minimal'.
	 */

	/*
	 * When we first start replication the standby will be behind the primary.
	 * For some applications, for example, synchronous replication, it is
	 * important to have a clear state for this initial catchup mode, so we
	 * can trigger actions when we change streaming state later. We may stay
	 * in this state for a long time, which is exactly why we want to be able
	 * to monitor whether or not we are still here.
	 */
	WalSndSetState(WALSNDSTATE_CATCHUP);

	/* Send a CopyBothResponse message, and start streaming */
	pq_beginmessage(&buf, 'W');
	pq_sendbyte(&buf, 0);
	pq_sendint(&buf, 0, 2);
	pq_endmessage(&buf);
	pq_flush();

	/*
	 * Initialize position to the received one, then the xlog records begin to
	 * be shipped from that position
	 */
	sentPtr = cmd->startpoint;
}
Пример #4
0
/*
 * Execute commands from walreceiver, until we enter streaming mode.
 */
static void
WalSndHandshake(void)
{
	StringInfoData input_message;
	bool		replication_started = false;

	initStringInfo(&input_message);

	while (!replication_started)
	{
		int			firstchar;

		WalSndSetState(WALSNDSTATE_STARTUP);
		set_ps_display("idle", false);

		/* Wait for a command to arrive */
		firstchar = pq_getbyte();

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive())
			exit(1);

		/*
		 * Check for any other interesting events that happened while we
		 * slept.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		if (firstchar != EOF)
		{
			/*
			 * Read the message contents. This is expected to be done without
			 * blocking because we've been able to get message type code.
			 */
			if (pq_getmessage(&input_message, 0))
				firstchar = EOF;	/* suitable message already logged */
		}

		/* Handle the very limited subset of commands expected in this phase */
		switch (firstchar)
		{
			case 'Q':			/* Query message */
				{
					const char *query_string;

					query_string = pq_getmsgstring(&input_message);
					pq_getmsgend(&input_message);

					if (HandleReplicationCommand(query_string))
						replication_started = true;
				}
				break;

			case 'X':
				/* standby is closing the connection */
				proc_exit(0);

			case EOF:
				/* standby disconnected unexpectedly */
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("unexpected EOF on standby connection")));
				proc_exit(0);

			default:
				ereport(FATAL,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("invalid standby handshake message type %d", firstchar)));
		}
	}
}
Пример #5
0
/* Main loop of walsender process */
static int
WalSndLoop(void)
{
    char	   *output_message;
    bool		caughtup = false;

    /*
     * Allocate buffer that will be used for each output message.  We do this
     * just once to reduce palloc overhead.  The buffer must be made large
     * enough for maximum-sized messages.
     */
    output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE);

    /*
     * Allocate buffer that will be used for processing reply messages.  As
     * above, do this just once to reduce palloc overhead.
     */
    initStringInfo(&reply_message);

    /* Initialize the last reply timestamp */
    last_reply_timestamp = GetCurrentTimestamp();

    /* Loop forever, unless we get an error */
    for (;;)
    {
        /*
         * Emergency bailout if postmaster has died.  This is to avoid the
         * necessity for manual cleanup of all postmaster children.
         */
        if (!PostmasterIsAlive(true))
            exit(1);

        /* Process any requests or signals received recently */
        if (got_SIGHUP)
        {
            got_SIGHUP = false;
            ProcessConfigFile(PGC_SIGHUP);
            SyncRepInitConfig();
        }

        /* Normal exit from the walsender is here */
        if (walsender_shutdown_requested)
        {
            /* Inform the standby that XLOG streaming was done */
            pq_puttextmessage('C', "COPY 0");
            pq_flush();

            proc_exit(0);
        }

        /*
         * If we don't have any pending data in the output buffer, try to send
         * some more.
         */
        if (!pq_is_send_pending())
        {
            XLogSend(output_message, &caughtup);

            /*
             * Even if we wrote all the WAL that was available when we started
             * sending, more might have arrived while we were sending this
             * batch. We had the latch set while sending, so we have not
             * received any signals from that time. Let's arm the latch again,
             * and after that check that we're still up-to-date.
             */
            if (caughtup && !pq_is_send_pending())
            {
                ResetLatch(&MyWalSnd->latch);

                XLogSend(output_message, &caughtup);
            }
        }

        /* Flush pending output to the client */
        if (pq_flush_if_writable() != 0)
            break;

        /*
         * When SIGUSR2 arrives, we send any outstanding logs up to the
         * shutdown checkpoint record (i.e., the latest record) and exit.
         */
        if (walsender_ready_to_stop && !pq_is_send_pending())
        {
            XLogSend(output_message, &caughtup);
            ProcessRepliesIfAny();
            if (caughtup && !pq_is_send_pending())
                walsender_shutdown_requested = true;
        }

        if ((caughtup || pq_is_send_pending()) &&
                !got_SIGHUP &&
                !walsender_shutdown_requested)
        {
            TimestampTz finish_time = 0;
            long		sleeptime;

            /* Reschedule replication timeout */
            if (replication_timeout > 0)
            {
                long		secs;
                int			usecs;

                finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp,
                              replication_timeout);
                TimestampDifference(GetCurrentTimestamp(),
                                    finish_time, &secs, &usecs);
                sleeptime = secs * 1000 + usecs / 1000;
                if (WalSndDelay < sleeptime)
                    sleeptime = WalSndDelay;
            }
            else
            {
                /*
                 * XXX: Without timeout, we don't really need the periodic
                 * wakeups anymore, WaitLatchOrSocket should reliably wake up
                 * as soon as something interesting happens.
                 */
                sleeptime = WalSndDelay;
            }

            /* Sleep */
            WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock,
                              true, pq_is_send_pending(),
                              sleeptime);

            /* Check for replication timeout */
            if (replication_timeout > 0 &&
                    GetCurrentTimestamp() >= finish_time)
            {
                /*
                 * Since typically expiration of replication timeout means
                 * communication problem, we don't send the error message to
                 * the standby.
                 */
                ereport(COMMERROR,
                        (errmsg("terminating walsender process due to replication timeout")));
                break;
            }
        }

        /*
         * If we're in catchup state, see if its time to move to streaming.
         * This is an important state change for users, since before this
         * point data loss might occur if the primary dies and we need to
         * failover to the standby. The state change is also important for
         * synchronous replication, since commits that started to wait at that
         * point might wait for some time.
         */
        if (MyWalSnd->state == WALSNDSTATE_CATCHUP && caughtup)
        {
            ereport(DEBUG1,
                    (errmsg("standby \"%s\" has now caught up with primary",
                            application_name)));
            WalSndSetState(WALSNDSTATE_STREAMING);
        }

        ProcessRepliesIfAny();
    }

    /*
     * Get here on send failure.  Clean up and exit.
     *
     * Reset whereToSendOutput to prevent ereport from attempting to send any
     * more messages to the standby.
     */
    if (whereToSendOutput == DestRemote)
        whereToSendOutput = DestNone;

    proc_exit(0);
    return 1;					/* keep the compiler quiet */
}
Пример #6
0
/* Main loop of walsender process */
static int
WalSndLoop(void)
{
	char	   *output_message;
	bool		caughtup = false;

	/*
	 * Allocate buffer that will be used for each output message.  We do this
	 * just once to reduce palloc overhead.  The buffer must be made large
	 * enough for maximum-sized messages.
	 */
	output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE);

	/* Loop forever, unless we get an error */
	for (;;)
	{
		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/* Process any requests or signals received recently */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		/*
		 * When SIGUSR2 arrives, we send all outstanding logs up to the
		 * shutdown checkpoint record (i.e., the latest record) and exit.
		 */
		if (walsender_ready_to_stop)
		{
			if (!XLogSend(output_message, &caughtup))
				break;
			if (caughtup)
				walsender_shutdown_requested = true;
		}

		/* Normal exit from the walsender is here */
		if (walsender_shutdown_requested)
		{
			/* Inform the standby that XLOG streaming was done */
			pq_puttextmessage('C', "COPY 0");
			pq_flush();

			proc_exit(0);
		}

		/*
		 * If we had sent all accumulated WAL in last round, nap for the
		 * configured time before retrying.
		 */
		if (caughtup)
		{
			/*
			 * Even if we wrote all the WAL that was available when we started
			 * sending, more might have arrived while we were sending this
			 * batch. We had the latch set while sending, so we have not
			 * received any signals from that time. Let's arm the latch
			 * again, and after that check that we're still up-to-date.
			 */
			ResetLatch(&MyWalSnd->latch);

			if (!XLogSend(output_message, &caughtup))
				break;
			if (caughtup && !got_SIGHUP && !walsender_ready_to_stop && !walsender_shutdown_requested)
			{
				/*
				 * XXX: We don't really need the periodic wakeups anymore,
				 * WaitLatchOrSocket should reliably wake up as soon as
				 * something interesting happens.
				 */

				/* Sleep */
				WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock,
								  WalSndDelay * 1000L);
			}

			/* Check if the connection was closed */
			CheckClosedConnection();
		}
		else
		{
			/* Attempt to send the log once every loop */
			if (!XLogSend(output_message, &caughtup))
				break;
		}

		/* Update our state to indicate if we're behind or not */
		WalSndSetState(caughtup ? WALSNDSTATE_STREAMING : WALSNDSTATE_CATCHUP);
	}

	/*
	 * Get here on send failure.  Clean up and exit.
	 *
	 * Reset whereToSendOutput to prevent ereport from attempting to send any
	 * more messages to the standby.
	 */
	if (whereToSendOutput == DestRemote)
		whereToSendOutput = DestNone;

	proc_exit(0);
	return 1;					/* keep the compiler quiet */
}