예제 #1
0
파일: startup.c 프로젝트: Brar/postgres
/* Handle SIGHUP and SIGTERM signals of startup process */
void
HandleStartupProcInterrupts(void)
{
	/*
	 * Check if we were requested to re-read config file.
	 */
	if (got_SIGHUP)
	{
		got_SIGHUP = false;
		ProcessConfigFile(PGC_SIGHUP);
	}

	/*
	 * Check if we were requested to exit without finishing recovery.
	 */
	if (shutdown_requested)
		proc_exit(1);

	/*
	 * Emergency bailout if postmaster has died.  This is to avoid the
	 * necessity for manual cleanup of all postmaster children.
	 */
	if (IsUnderPostmaster && !PostmasterIsAlive())
		exit(1);
}
예제 #2
0
/*
 * TaskTrackerRunning checks if the task tracker process is running. To do this,
 * the function checks if the task tracker is configured to start up, and infers
 * from shared memory that the tracker hasn't received a shut down request.
 */
static bool
TaskTrackerRunning(void)
{
	WorkerTask *workerTask = NULL;
	bool postmasterAlive = true;
	bool taskTrackerRunning = true;

	/* if postmaster shut down, infer task tracker shut down from it */
	postmasterAlive = PostmasterIsAlive();
	if (!postmasterAlive)
	{
		return false;
	}

	/*
	 * When the task tracker receives a termination signal, it inserts a special
	 * marker task to the shared hash. We need to look up this marker task since
	 * the postmaster doesn't send a terminate signal to running backends.
	 */
	LWLockAcquire(&WorkerTasksSharedState->taskHashLock, LW_SHARED);

	workerTask = WorkerTasksHashFind(RESERVED_JOB_ID, SHUTDOWN_MARKER_TASK_ID);
	if (workerTask != NULL)
	{
		taskTrackerRunning = false;
	}

	LWLockRelease(&WorkerTasksSharedState->taskHashLock);

	return taskTrackerRunning;
}
예제 #3
0
/*
 * pgarch_ArchiverCopyLoop
 *
 * Archives all outstanding xlogs then returns
 */
static void
pgarch_ArchiverCopyLoop(void)
{
	char		xlog[MAX_XFN_CHARS + 1];

	if (!XLogArchiveCommandSet())
	{
		ereport(WARNING,
		   (errmsg("archive_mode enabled, yet archive_command is not set")));
		/* can't do anything if no command ... */
		return;
	}

	/*
	 * loop through all xlogs with archive_status of .ready and archive
	 * them...mostly we expect this to be a single file, though it is possible
	 * some backend will add files onto the list of those that need archiving
	 * while we are still copying earlier archives
	 */
	while (pgarch_readyXlog(xlog))
	{
		int			failures = 0;

		for (;;)
		{
			/*
			 * Do not initiate any more archive commands after receiving
			 * SIGTERM, nor after the postmaster has died unexpectedly. The
			 * first condition is to try to keep from having init SIGKILL the
			 * command, and the second is to avoid conflicts with another
			 * archiver spawned by a newer postmaster.
			 */
			if (got_SIGTERM || !PostmasterIsAlive(true))
				return;

			if (pgarch_archiveXlog(xlog))
			{
				/* successful */
				pgarch_archiveDone(xlog);
				break;			/* out of inner retry loop */
			}
			else
			{
				if (++failures >= NUM_ARCHIVE_RETRIES)
				{
					ereport(WARNING,
							(errmsg("transaction log file \"%s\" could not be archived: too many failures",
									xlog)));
					return;		/* give up archiving for now */
				}
				pg_usleep(1000000L);	/* wait a bit before retrying */
			}
		}
	}
}
예제 #4
0
inline void body() {

        /*
         * We run the copy loop immediately upon entry, in case there are
         * unarchived files left over from a previous database run (or maybe
         * the archiver died unexpectedly).  After that we wait for a signal
         * or timeout before doing more.
         */
        wakend = true;

        while(1)
        {
                /* Check for config update */
                if (got_SIGHUP>0)
                {
                        got_SIGHUP = 0;
                        ProcessConfigFile(PGC_SIGHUP);
			__rho_3_ = XLogArchivingActive();
			//	assume(__rho_3_>0);
			int tt = __rho_3_;
			  if (tt<=0)
                                break;                  /* user wants us to shut down */
                }
                /* Do what we're here for */
                if (wakend>0)
                {
                        wakend = 0;
                        pgarch_ArchiverCopyLoop();
			__rho_4_ = time(NULL);
                        last_copy_time = __rho_4_;
                }
                /*
                 * There shouldn't be anything for the archiver to do except to
                 * wait for a signal, ... however, the archiver exists to
                 * protect our data, so she wakes up occasionally to allow
                 * herself to be proactive. In particular this avoids getting
                 * stuck if a signal arrives just before we sleep.
                 */
                if (wakend<=0)
                {
		  //pg_usleep(PGARCH_AUTOWAKE_INTERVAL * 1000000L);

                        curtime = time(NULL);
                        if ((curtime - last_copy_time) >=
			     PGARCH_AUTOWAKE_INTERVAL)
                                wakend = true;
                }
		__rho_5_ = PostmasterIsAlive(true);
		dummy =  __rho_5_;
		if (dummy<=0) break;
        }

  while(1) { dummy=dummy; } L_return: return 0;
}
예제 #5
0
/*
 * pgarch_MainLoop
 *
 * Main loop for archiver
 */
static void
pgarch_MainLoop(void)
{
	time_t		last_copy_time = 0;

	/*
	 * We run the copy loop immediately upon entry, in case there are
	 * unarchived files left over from a previous database run (or maybe the
	 * archiver died unexpectedly).  After that we wait for a signal or
	 * timeout before doing more.
	 */
	wakened = true;

	do
	{
		/* Check for config update */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
			if (!XLogArchivingActive())
				break;			/* user wants us to shut down */
		}

		/* Do what we're here for */
		if (wakened)
		{
			wakened = false;
			pgarch_ArchiverCopyLoop();
			last_copy_time = time(NULL);
		}

		/*
		 * There shouldn't be anything for the archiver to do except to wait
		 * for a signal ... however, the archiver exists to protect our data,
		 * so she wakes up occasionally to allow herself to be proactive.
		 *
		 * On some platforms, signals won't interrupt the sleep.  To ensure we
		 * respond reasonably promptly when someone signals us, break down the
		 * sleep into 1-second increments, and check for interrupts after each
		 * nap.
		 */
		while (!(wakened || got_SIGHUP))
		{
			time_t		curtime;

			pg_usleep(1000000L);
			curtime = time(NULL);
			if ((unsigned int) (curtime - last_copy_time) >=
				(unsigned int) PGARCH_AUTOWAKE_INTERVAL)
				wakened = true;
		}
	} while (PostmasterIsAlive(true));
}
예제 #6
0
/*
 * pgarch_MainLoop
 *
 * Main loop for archiver
 */
static void
pgarch_MainLoop(void)
{
	time_t		last_copy_time = 0;
	time_t		curtime;

	/*
	 * We run the copy loop immediately upon entry, in case there are
	 * unarchived files left over from a previous database run (or maybe the
	 * archiver died unexpectedly).  After that we wait for a signal or
	 * timeout before doing more.
	 */
	wakened = true;

	do
	{

		/* Check for config update */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
			if (!XLogArchivingActive())
				break;			/* user wants us to shut down */
		}

		/* Do what we're here for */
		if (wakened)
		{
			wakened = false;
			pgarch_ArchiverCopyLoop();
			last_copy_time = time(NULL);
		}

		/*
		 * There shouldn't be anything for the archiver to do except to wait
		 * for a signal, ... however, the archiver exists to protect our data,
		 * so she wakes up occasionally to allow herself to be proactive. In
		 * particular this avoids getting stuck if a signal arrives just
		 * before we sleep.
		 */
		if (!wakened)
		{
			pg_usleep(PGARCH_AUTOWAKE_INTERVAL * 1000000L);

			curtime = time(NULL);
			if ((unsigned int) (curtime - last_copy_time) >=
				(unsigned int) PGARCH_AUTOWAKE_INTERVAL)
				wakened = true;
		}
	} while (PostmasterIsAlive(true));
}
예제 #7
0
/**
 * Main loop of the sender process. It wakes up every
 * gp_perfmon_segment_interval ms to send segment
 * information to perfmon
 */
static void
SegmentInfoSenderLoop(void)
{
	int rc;
	int counter;

	for (counter = 0;; counter += SEGMENT_INFO_LOOP_SLEEP_MS)
	{
		CHECK_FOR_INTERRUPTS();

		if (senderShutdownRequested)
		{
			break;
		}

		/* no need to live on if postmaster has died */
		if (!PostmasterIsAlive())
			exit(1);

		if (cluster_state_collect_hook)
			cluster_state_collect_hook();

		if (gp_enable_gpperfmon && counter >= gp_perfmon_segment_interval)
		{
			SegmentInfoSender();
			counter = 0;
		}

		/* Sleep a while. */
		rc = WaitLatch(&MyProc->procLatch,
				WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
				SEGMENT_INFO_LOOP_SLEEP_MS);
		ResetLatch(&MyProc->procLatch);

		/* emergency bailout if postmaster has died */
		if (rc & WL_POSTMASTER_DEATH)
			proc_exit(1);
	} /* end server loop */

	return;
}
예제 #8
0
/*
 * pgarch_ArchiverCopyLoop
 *
 * Archives all outstanding xlogs then returns
 */
static void
pgarch_ArchiverCopyLoop(void)
{
	char		xlog[MAX_XFN_CHARS + 1];

	/*
	 * loop through all xlogs with archive_status of .ready and archive
	 * them...mostly we expect this to be a single file, though it is possible
	 * some backend will add files onto the list of those that need archiving
	 * while we are still copying earlier archives
	 */
	while (pgarch_readyXlog(xlog))
	{
		int			failures = 0;

		for (;;)
		{
			/* Abandon processing if we notice our postmaster has died */
			if (!PostmasterIsAlive(true))
				return;

			if (pgarch_archiveXlog(xlog))
			{
				/* successful */
				pgarch_archiveDone(xlog);
				break;			/* out of inner retry loop */
			}
			else
			{
				if (++failures >= NUM_ARCHIVE_RETRIES)
				{
					ereport(WARNING,
							(errmsg("transaction log file \"%s\" could not be archived: too many failures",
									xlog)));
					return;		/* give up archiving for now */
				}
				pg_usleep(1000000L);	/* wait a bit before retrying */
			}
		}
	}
}
예제 #9
0
파일: backoff.c 프로젝트: adam8157/gpdb
/**
 * Main loop of the sweeper process. It wakes up once in a while, marks backends as active
 * or not and re-calculates CPU usage among active backends.
 */
void
BackoffSweeperLoop(void)
{
	for (;;)
	{
		CHECK_FOR_INTERRUPTS();

		if (sweeperShutdownRequested)
			break;

		/* no need to live on if postmaster has died */
		if (!PostmasterIsAlive())
			exit(1);

		if (gp_enable_resqueue_priority)
			BackoffSweeper();

		Assert(gp_resqueue_priority_sweeper_interval > 0.0);
		/* Sleep a while. */
		pg_usleep(gp_resqueue_priority_sweeper_interval * 1000.0);
	}							/* end server loop */

	return;
}
예제 #10
0
파일: latch.c 프로젝트: Gordiychuk/postgres
/*
 * Wait using linux's epoll_wait(2).
 *
 * This is the preferrable wait method, as several readiness notifications are
 * delivered, without having to iterate through all of set->events. The return
 * epoll_event struct contain a pointer to our events, making association
 * easy.
 */
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
					  WaitEvent *occurred_events, int nevents)
{
	int			returned_events = 0;
	int			rc;
	WaitEvent  *cur_event;
	struct epoll_event *cur_epoll_event;

	/* Sleep */
	rc = epoll_wait(set->epoll_fd, set->epoll_ret_events,
					nevents, cur_timeout);

	/* Check return code */
	if (rc < 0)
	{
		/* EINTR is okay, otherwise complain */
		if (errno != EINTR)
		{
			waiting = false;
			ereport(ERROR,
					(errcode_for_socket_access(),
					 errmsg("epoll_wait() failed: %m")));
		}
		return 0;
	}
	else if (rc == 0)
	{
		/* timeout exceeded */
		return -1;
	}

	/*
	 * At least one event occurred, iterate over the returned epoll events
	 * until they're either all processed, or we've returned all the events
	 * the caller desired.
	 */
	for (cur_epoll_event = set->epoll_ret_events;
		 cur_epoll_event < (set->epoll_ret_events + rc) &&
		 returned_events < nevents;
		 cur_epoll_event++)
	{
		/* epoll's data pointer is set to the associated WaitEvent */
		cur_event = (WaitEvent *) cur_epoll_event->data.ptr;

		occurred_events->pos = cur_event->pos;
		occurred_events->user_data = cur_event->user_data;
		occurred_events->events = 0;

		if (cur_event->events == WL_LATCH_SET &&
			cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
		{
			/* There's data in the self-pipe, clear it. */
			drainSelfPipe();

			if (set->latch->is_set)
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_LATCH_SET;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events == WL_POSTMASTER_DEATH &&
				 cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
		{
			/*
			 * We expect an EPOLLHUP when the remote end is closed, but
			 * because we don't expect the pipe to become readable or to have
			 * any errors either, treat those cases as postmaster death, too.
			 *
			 * As explained in the WAIT_USE_SELECT implementation, select(2)
			 * may spuriously return. Be paranoid about that here too, a
			 * spurious WL_POSTMASTER_DEATH would be painful.
			 */
			if (!PostmasterIsAlive())
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_POSTMASTER_DEATH;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			Assert(cur_event->fd != PGINVALID_SOCKET);

			if ((cur_event->events & WL_SOCKET_READABLE) &&
				(cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)))
			{
				/* data available in socket, or EOF */
				occurred_events->events |= WL_SOCKET_READABLE;
			}

			if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
				(cur_epoll_event->events & (EPOLLOUT | EPOLLERR | EPOLLHUP)))
			{
				/* writable, or EOF */
				occurred_events->events |= WL_SOCKET_WRITEABLE;
			}

			if (occurred_events->events != 0)
			{
				occurred_events->fd = cur_event->fd;
				occurred_events++;
				returned_events++;
			}
		}
	}

	return returned_events;
}
예제 #11
0
파일: latch.c 프로젝트: Gordiychuk/postgres
/*
 * Wait using Windows' WaitForMultipleObjects().
 *
 * Unfortunately this will only ever return a single readiness notification at
 * a time.  Note that while the official documentation for
 * WaitForMultipleObjects is ambiguous about multiple events being "consumed"
 * with a single bWaitAll = FALSE call,
 * https://blogs.msdn.microsoft.com/oldnewthing/20150409-00/?p=44273 confirms
 * that only one event is "consumed".
 */
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
					  WaitEvent *occurred_events, int nevents)
{
	int			returned_events = 0;
	DWORD		rc;
	WaitEvent  *cur_event;

	/*
	 * Sleep.
	 *
	 * Need to wait for ->nevents + 1, because signal handle is in [0].
	 */
	rc = WaitForMultipleObjects(set->nevents + 1, set->handles, FALSE,
								cur_timeout);

	/* Check return code */
	if (rc == WAIT_FAILED)
		elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
			 GetLastError());
	else if (rc == WAIT_TIMEOUT)
	{
		/* timeout exceeded */
		return -1;
	}

	if (rc == WAIT_OBJECT_0)
	{
		/* Service newly-arrived signals */
		pgwin32_dispatch_queued_signals();
		return 0;				/* retry */
	}

	/*
	 * With an offset of one, due to the always present pgwin32_signal_event,
	 * the handle offset directly corresponds to a wait event.
	 */
	cur_event = (WaitEvent *) &set->events[rc - WAIT_OBJECT_0 - 1];

	occurred_events->pos = cur_event->pos;
	occurred_events->user_data = cur_event->user_data;
	occurred_events->events = 0;

	if (cur_event->events == WL_LATCH_SET)
	{
		if (!ResetEvent(set->latch->event))
			elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());

		if (set->latch->is_set)
		{
			occurred_events->fd = PGINVALID_SOCKET;
			occurred_events->events = WL_LATCH_SET;
			occurred_events++;
			returned_events++;
		}
	}
	else if (cur_event->events == WL_POSTMASTER_DEATH)
	{
		/*
		 * Postmaster apparently died.  Since the consequences of falsely
		 * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we take
		 * the trouble to positively verify this with PostmasterIsAlive(),
		 * even though there is no known reason to think that the event could
		 * be falsely set on Windows.
		 */
		if (!PostmasterIsAlive())
		{
			occurred_events->fd = PGINVALID_SOCKET;
			occurred_events->events = WL_POSTMASTER_DEATH;
			occurred_events++;
			returned_events++;
		}
	}
	else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
	{
		WSANETWORKEVENTS resEvents;
		HANDLE		handle = set->handles[cur_event->pos + 1];

		Assert(cur_event->fd);

		occurred_events->fd = cur_event->fd;

		ZeroMemory(&resEvents, sizeof(resEvents));
		if (WSAEnumNetworkEvents(cur_event->fd, handle, &resEvents) != 0)
			elog(ERROR, "failed to enumerate network events: error code %u",
				 WSAGetLastError());
		if ((cur_event->events & WL_SOCKET_READABLE) &&
			(resEvents.lNetworkEvents & FD_READ))
		{
			/* data available in socket */
			occurred_events->events |= WL_SOCKET_READABLE;
		}
		if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
			(resEvents.lNetworkEvents & FD_WRITE))
		{
			/* writeable */
			occurred_events->events |= WL_SOCKET_WRITEABLE;
		}
		if (resEvents.lNetworkEvents & FD_CLOSE)
		{
			/* EOF */
			if (cur_event->events & WL_SOCKET_READABLE)
				occurred_events->events |= WL_SOCKET_READABLE;
			if (cur_event->events & WL_SOCKET_WRITEABLE)
				occurred_events->events |= WL_SOCKET_WRITEABLE;
		}

		if (occurred_events->events != 0)
		{
			occurred_events++;
			returned_events++;
		}
	}

	return returned_events;
}
예제 #12
0
파일: latch.c 프로젝트: Gordiychuk/postgres
/*
 * Wait using select(2).
 *
 * XXX: On at least older linux kernels select(), in violation of POSIX,
 * doesn't reliably return a socket as writable if closed - but we rely on
 * that. So far all the known cases of this problem are on platforms that also
 * provide a poll() implementation without that bug.  If we find one where
 * that's not the case, we'll need to add a workaround.
 */
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
					  WaitEvent *occurred_events, int nevents)
{
	int			returned_events = 0;
	int			rc;
	WaitEvent  *cur_event;
	fd_set		input_mask;
	fd_set		output_mask;
	int			hifd;
	struct timeval tv;
	struct timeval *tvp = NULL;

	FD_ZERO(&input_mask);
	FD_ZERO(&output_mask);

	/*
	 * Prepare input/output masks. We do so every loop iteration as there's no
	 * entirely portable way to copy fd_sets.
	 */
	for (cur_event = set->events;
		 cur_event < (set->events + set->nevents);
		 cur_event++)
	{
		if (cur_event->events == WL_LATCH_SET)
			FD_SET(cur_event->fd, &input_mask);
		else if (cur_event->events == WL_POSTMASTER_DEATH)
			FD_SET(cur_event->fd, &input_mask);
		else
		{
			Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE));
			if (cur_event->events == WL_SOCKET_READABLE)
				FD_SET(cur_event->fd, &input_mask);
			else if (cur_event->events == WL_SOCKET_WRITEABLE)
				FD_SET(cur_event->fd, &output_mask);
		}

		if (cur_event->fd > hifd)
			hifd = cur_event->fd;
	}

	/* Sleep */
	if (cur_timeout >= 0)
	{
		tv.tv_sec = cur_timeout / 1000L;
		tv.tv_usec = (cur_timeout % 1000L) * 1000L;
		tvp = &tv;
	}
	rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);

	/* Check return code */
	if (rc < 0)
	{
		/* EINTR is okay, otherwise complain */
		if (errno != EINTR)
		{
			waiting = false;
			ereport(ERROR,
					(errcode_for_socket_access(),
					 errmsg("select() failed: %m")));
		}
		return 0;				/* retry */
	}
	else if (rc == 0)
	{
		/* timeout exceeded */
		return -1;
	}

	/*
	 * To associate events with select's masks, we have to check the status of
	 * the file descriptors associated with an event; by looping through all
	 * events.
	 */
	for (cur_event = set->events;
		 cur_event < (set->events + set->nevents)
		 && returned_events < nevents;
		 cur_event++)
	{
		occurred_events->pos = cur_event->pos;
		occurred_events->user_data = cur_event->user_data;
		occurred_events->events = 0;

		if (cur_event->events == WL_LATCH_SET &&
			FD_ISSET(cur_event->fd, &input_mask))
		{
			/* There's data in the self-pipe, clear it. */
			drainSelfPipe();

			if (set->latch->is_set)
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_LATCH_SET;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events == WL_POSTMASTER_DEATH &&
				 FD_ISSET(cur_event->fd, &input_mask))
		{
			/*
			 * According to the select(2) man page on Linux, select(2) may
			 * spuriously return and report a file descriptor as readable,
			 * when it's not; and presumably so can poll(2).  It's not clear
			 * that the relevant cases would ever apply to the postmaster
			 * pipe, but since the consequences of falsely returning
			 * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
			 * trouble to positively verify EOF with PostmasterIsAlive().
			 */
			if (!PostmasterIsAlive())
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_POSTMASTER_DEATH;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			Assert(cur_event->fd != PGINVALID_SOCKET);

			if ((cur_event->events & WL_SOCKET_READABLE) &&
				FD_ISSET(cur_event->fd, &input_mask))
			{
				/* data available in socket, or EOF */
				occurred_events->events |= WL_SOCKET_READABLE;
			}

			if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
				FD_ISSET(cur_event->fd, &output_mask))
			{
				/* socket is writeable, or EOF */
				occurred_events->events |= WL_SOCKET_WRITEABLE;
			}

			if (occurred_events->events != 0)
			{
				occurred_events->fd = cur_event->fd;
				occurred_events++;
				returned_events++;
			}
		}
	}
	return returned_events;
}
예제 #13
0
파일: latch.c 프로젝트: Gordiychuk/postgres
/*
 * Wait using poll(2).
 *
 * This allows to receive readiness notifications for several events at once,
 * but requires iterating through all of set->pollfds.
 */
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
					  WaitEvent *occurred_events, int nevents)
{
	int			returned_events = 0;
	int			rc;
	WaitEvent  *cur_event;
	struct pollfd *cur_pollfd;

	/* Sleep */
	rc = poll(set->pollfds, set->nevents, (int) cur_timeout);

	/* Check return code */
	if (rc < 0)
	{
		/* EINTR is okay, otherwise complain */
		if (errno != EINTR)
		{
			waiting = false;
			ereport(ERROR,
					(errcode_for_socket_access(),
					 errmsg("poll() failed: %m")));
		}
		return 0;
	}
	else if (rc == 0)
	{
		/* timeout exceeded */
		return -1;
	}

	for (cur_event = set->events, cur_pollfd = set->pollfds;
		 cur_event < (set->events + set->nevents) &&
		 returned_events < nevents;
		 cur_event++, cur_pollfd++)
	{
		/* no activity on this FD, skip */
		if (cur_pollfd->revents == 0)
			continue;

		occurred_events->pos = cur_event->pos;
		occurred_events->user_data = cur_event->user_data;
		occurred_events->events = 0;

		if (cur_event->events == WL_LATCH_SET &&
			(cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
		{
			/* There's data in the self-pipe, clear it. */
			drainSelfPipe();

			if (set->latch->is_set)
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_LATCH_SET;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events == WL_POSTMASTER_DEATH &&
			 (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
		{
			/*
			 * We expect an POLLHUP when the remote end is closed, but because
			 * we don't expect the pipe to become readable or to have any
			 * errors either, treat those cases as postmaster death, too.
			 *
			 * As explained in the WAIT_USE_SELECT implementation, select(2)
			 * may spuriously return. Be paranoid about that here too, a
			 * spurious WL_POSTMASTER_DEATH would be painful.
			 */
			if (!PostmasterIsAlive())
			{
				occurred_events->fd = PGINVALID_SOCKET;
				occurred_events->events = WL_POSTMASTER_DEATH;
				occurred_events++;
				returned_events++;
			}
		}
		else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			int			errflags = POLLHUP | POLLERR | POLLNVAL;

			Assert(cur_event->fd >= PGINVALID_SOCKET);

			if ((cur_event->events & WL_SOCKET_READABLE) &&
				(cur_pollfd->revents & (POLLIN | errflags)))
			{
				/* data available in socket, or EOF */
				occurred_events->events |= WL_SOCKET_READABLE;
			}

			if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
				(cur_pollfd->revents & (POLLOUT | errflags)))
			{
				/* writeable, or EOF */
				occurred_events->events |= WL_SOCKET_WRITEABLE;
			}

			if (occurred_events->events != 0)
			{
				occurred_events->fd = cur_event->fd;
				occurred_events++;
				returned_events++;
			}
		}
	}
	return returned_events;
}
예제 #14
0
파일: syncrep.c 프로젝트: agentm/postgres
/*
 * Wait for synchronous replication, if requested by user.
 *
 * Initially backends start in state SYNC_REP_NOT_WAITING and then
 * change that state to SYNC_REP_WAITING before adding ourselves
 * to the wait queue. During SyncRepWakeQueue() a WALSender changes
 * the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed.
 * This backend then resets its state to SYNC_REP_NOT_WAITING.
 */
void
SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
{
	char 		*new_status = NULL;
	const char *old_status;

	/*
	 * Fast exit if user has not requested sync replication, or
	 * there are no sync replication standby names defined.
	 * Note that those standbys don't need to be connected.
	 */
	if (!SyncRepRequested() || !SyncStandbysDefined())
		return;

	Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));
	Assert(WalSndCtl != NULL);

	/* Reset the latch before adding ourselves to the queue. */
	ResetLatch(&MyProc->waitLatch);

	/*
	 * Set our waitLSN so WALSender will know when to wake us, and add
	 * ourselves to the queue.
	 */
	LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
	Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING);
	if (!WalSndCtl->sync_standbys_defined)
	{
		/*
		 * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is
		 * not set.  See SyncRepUpdateSyncStandbysDefined.
		 */
		LWLockRelease(SyncRepLock);
		return;
	}
	MyProc->waitLSN = XactCommitLSN;
	MyProc->syncRepState = SYNC_REP_WAITING;
	SyncRepQueueInsert();
	Assert(SyncRepQueueIsOrderedByLSN());
	LWLockRelease(SyncRepLock);

	/* Alter ps display to show waiting for sync rep. */
	if (update_process_title)
	{
		int			len;

		old_status = get_ps_display(&len);
		new_status = (char *) palloc(len + 32 + 1);
		memcpy(new_status, old_status, len);
		sprintf(new_status + len, " waiting for %X/%X",
				XactCommitLSN.xlogid, XactCommitLSN.xrecoff);
		set_ps_display(new_status, false);
		new_status[len] = '\0'; /* truncate off " waiting ..." */
	}

	/*
	 * Wait for specified LSN to be confirmed.
	 *
	 * Each proc has its own wait latch, so we perform a normal latch
	 * check/wait loop here.
	 */
	for (;;)
	{
		int syncRepState;

		/*
		 * Wait on latch for up to 60 seconds. This allows us to
		 * check for postmaster death regularly while waiting.
		 * Note that timeout here does not necessarily release from loop.
		 */
		WaitLatch(&MyProc->waitLatch, 60000000L);

		/* Must reset the latch before testing state. */
		ResetLatch(&MyProc->waitLatch);

		/*
		 * Try checking the state without the lock first.  There's no guarantee
		 * that we'll read the most up-to-date value, so if it looks like we're
		 * still waiting, recheck while holding the lock.  But if it looks like
		 * we're done, we must really be done, because once walsender changes
		 * the state to SYNC_REP_WAIT_COMPLETE, it will never update it again,
		 * so we can't be seeing a stale value in that case.
		 */
		syncRepState = MyProc->syncRepState;
		if (syncRepState == SYNC_REP_WAITING)
		{
			LWLockAcquire(SyncRepLock, LW_SHARED);
			syncRepState = MyProc->syncRepState;
			LWLockRelease(SyncRepLock);
		}
		if (syncRepState == SYNC_REP_WAIT_COMPLETE)
			break;

		/*
		 * If a wait for synchronous replication is pending, we can neither
		 * acknowledge the commit nor raise ERROR or FATAL.  The latter
		 * would lead the client to believe that that the transaction
		 * aborted, which is not true: it's already committed locally.
		 * The former is no good either: the client has requested
		 * synchronous replication, and is entitled to assume that an
		 * acknowledged commit is also replicated, which may not be true.
		 * So in this case we issue a WARNING (which some clients may
		 * be able to interpret) and shut off further output.  We do NOT
		 * reset ProcDiePending, so that the process will die after the
		 * commit is cleaned up.
		 */
		if (ProcDiePending)
		{
			ereport(WARNING,
					(errcode(ERRCODE_ADMIN_SHUTDOWN),
					 errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
					 errdetail("The transaction has already committed locally, but may not have been replicated to the standby.")));
			whereToSendOutput = DestNone;
			SyncRepCancelWait();
			break;
		}

		/*
		 * It's unclear what to do if a query cancel interrupt arrives.  We
		 * can't actually abort at this point, but ignoring the interrupt
		 * altogether is not helpful, so we just terminate the wait with
		 * a suitable warning.
		 */
		if (QueryCancelPending)
		{
			QueryCancelPending = false;
			ereport(WARNING,
					(errmsg("canceling wait for synchronous replication due to user request"),
					 errdetail("The transaction has already committed locally, but may not have been replicated to the standby.")));
			SyncRepCancelWait();
			break;
		}

		/*
		 * If the postmaster dies, we'll probably never get an acknowledgement,
		 * because all the wal sender processes will exit.  So just bail out.
		 */
		if (!PostmasterIsAlive(true))
		{
			ProcDiePending = true;
			whereToSendOutput = DestNone;
			SyncRepCancelWait();
			break;
		}
	}

	/*
	 * WalSender has checked our LSN and has removed us from queue. Clean up
	 * state and leave.  It's OK to reset these shared memory fields without
	 * holding SyncRepLock, because any walsenders will ignore us anyway when
	 * we're not on the queue.
	 */
	Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));
	MyProc->syncRepState = SYNC_REP_NOT_WAITING;
	MyProc->waitLSN.xlogid = 0;
	MyProc->waitLSN.xrecoff = 0;

	if (new_status)
	{
		/* Reset ps display */
		set_ps_display(new_status, false);
		pfree(new_status);
	}
}
예제 #15
0
/* Main loop of walsender process */
static int
WalSndLoop(void)
{
	char	   *output_message;
	bool		caughtup = false;

	/*
	 * Allocate buffer that will be used for each output message.  We do this
	 * just once to reduce palloc overhead.  The buffer must be made large
	 * enough for maximum-sized messages.
	 */
	output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE);

	/* Loop forever, unless we get an error */
	for (;;)
	{
		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/* Process any requests or signals received recently */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		/*
		 * When SIGUSR2 arrives, we send all outstanding logs up to the
		 * shutdown checkpoint record (i.e., the latest record) and exit.
		 */
		if (ready_to_stop)
		{
			if (!XLogSend(output_message, &caughtup))
				break;
			if (caughtup)
				shutdown_requested = true;
		}

		/* Normal exit from the walsender is here */
		if (shutdown_requested)
		{
			/* Inform the standby that XLOG streaming was done */
			pq_puttextmessage('C', "COPY 0");
			pq_flush();

			proc_exit(0);
		}

		/*
		 * If we had sent all accumulated WAL in last round, nap for the
		 * configured time before retrying.
		 */
		if (caughtup)
		{
			/*
			 * Even if we wrote all the WAL that was available when we started
			 * sending, more might have arrived while we were sending this
			 * batch. We had the latch set while sending, so we have not
			 * received any signals from that time. Let's arm the latch
			 * again, and after that check that we're still up-to-date.
			 */
			ResetLatch(&MyWalSnd->latch);

			if (!XLogSend(output_message, &caughtup))
				break;
			if (caughtup && !got_SIGHUP && !ready_to_stop && !shutdown_requested)
			{
				/*
				 * XXX: We don't really need the periodic wakeups anymore,
				 * WaitLatchOrSocket should reliably wake up as soon as
				 * something interesting happens.
				 */

				/* Sleep */
				WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock,
								  WalSndDelay * 1000L);
			}

			/* Check if the connection was closed */
			CheckClosedConnection();
		}
		else
		{
			/* Attempt to send the log once every loop */
			if (!XLogSend(output_message, &caughtup))
				break;
		}
	}

	/*
	 * Get here on send failure.  Clean up and exit.
	 *
	 * Reset whereToSendOutput to prevent ereport from attempting to send any
	 * more messages to the standby.
	 */
	if (whereToSendOutput == DestRemote)
		whereToSendOutput = DestNone;

	proc_exit(0);
	return 1;					/* keep the compiler quiet */
}
예제 #16
0
/*
 *  FileRepSubProcess_ProcessSignals()
 *
 */
bool
FileRepSubProcess_ProcessSignals()
{
	bool		processExit = false;

	if (reloadConfigFile)
	{
		reloadConfigFile = false;
		ProcessConfigFile(PGC_SIGHUP);

		FileRep_SetFileRepRetry();
	}

	if (shutdownRequested)
	{
		SegmentState_e segmentState;

		getPrimaryMirrorStatusCodes(NULL, &segmentState, NULL, NULL);

		shutdownRequested = false;

		if (segmentState == SegmentStateShutdownFilerepBackends)
		{
			processExit = FileRepIsBackendSubProcess(fileRepProcessType);
			FileRepSubProcess_SetState(FileRepStateShutdownBackends);
		}
		else
		{
			processExit = true;
			FileRepSubProcess_SetState(FileRepStateShutdown);
		}
	}

	/*
	 * Immediate shutdown if postmaster or main filerep process (parent) is
	 * not alive to avoid manual cleanup.
	 */
	if (!PostmasterIsAlive(false /* amDirectChild */ ) || !ParentProcIsAlive())
	{
		quickdie_impl();
	}

	for (;;)
	{
		/* check to see if change required */
		sig_atomic_t curStateChangeRequestCounter = stateChangeRequestCounter;

		if (curStateChangeRequestCounter == lastChangeRequestProcessCounterValue)
			break;
		lastChangeRequestProcessCounterValue = curStateChangeRequestCounter;

		/* do the change in local memory */
		getFileRepRoleAndState(&fileRepRole, &segmentState, &dataState, NULL, NULL);
		switch (segmentState)
		{

			case SegmentStateNotInitialized:
				FileRepSubProcess_SetState(FileRepStateNotInitialized);
				break;

			case SegmentStateInitialization:
				FileRepSubProcess_SetState(FileRepStateInitialization);
				break;

			case SegmentStateInResyncTransition:
				FileRepSubProcess_SetState(FileRepStateInitialization);
				break;

			case SegmentStateInChangeTrackingTransition:
			case SegmentStateInSyncTransition:
				/* fileRepState remains Ready */
				break;

			case SegmentStateChangeTrackingDisabled:
			case SegmentStateReady:
				FileRepSubProcess_SetState(FileRepStateReady);
				break;

			case SegmentStateFault:
				FileRepSubProcess_SetState(FileRepStateFault);
				break;

			case SegmentStateShutdownFilerepBackends:
				if (fileRepRole == FileRepPrimaryRole)
				{
					FileRepSubProcess_SetState(FileRepStateShutdownBackends);
				}
				else
				{
					processExit = true;
					FileRepSubProcess_SetState(FileRepStateShutdown);
				}
				break;

			case SegmentStateImmediateShutdown:
			case SegmentStateShutdown:
				processExit = true;
				FileRepSubProcess_SetState(FileRepStateShutdown);
				break;

			default:
				Assert(0);
				break;
		} //switch ()

				if (processExit == true)
				{
					FileRep_IpcSignalAll();
				}
	}

	return (processExit);
}
예제 #17
0
/* Main loop of walsender process */
static int
WalSndLoop(void)
{
	char	   *output_message;
	bool		caughtup = false;

	/*
	 * Allocate buffer that will be used for each output message.  We do this
	 * just once to reduce palloc overhead.  The buffer must be made large
	 * enough for maximum-sized messages.
	 */
	output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE);

	/*
	 * Allocate buffer that will be used for processing reply messages.  As
	 * above, do this just once to reduce palloc overhead.
	 */
	initStringInfo(&reply_message);

	/* Initialize the last reply timestamp */
	last_reply_timestamp = GetCurrentTimestamp();

	/* Loop forever, unless we get an error */
	for (;;)
	{
		/* Clear any already-pending wakeups */
		ResetLatch(&MyWalSnd->latch);

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive())
			exit(1);

		/* Process any requests or signals received recently */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
			SyncRepInitConfig();
		}

		/* Normal exit from the walsender is here */
		if (walsender_shutdown_requested)
		{
			/* Inform the standby that XLOG streaming is done */
			pq_puttextmessage('C', "COPY 0");
			pq_flush();

			proc_exit(0);
		}

		/* Check for input from the client */
		ProcessRepliesIfAny();

		/*
		 * If we don't have any pending data in the output buffer, try to send
		 * some more.  If there is some, we don't bother to call XLogSend
		 * again until we've flushed it ... but we'd better assume we are not
		 * caught up.
		 */
		if (!pq_is_send_pending())
			XLogSend(output_message, &caughtup);
		else
			caughtup = false;

		/* Try to flush pending output to the client */
		if (pq_flush_if_writable() != 0)
			break;

		/* If nothing remains to be sent right now ... */
		if (caughtup && !pq_is_send_pending())
		{
			/*
			 * If we're in catchup state, move to streaming.  This is an
			 * important state change for users to know about, since before
			 * this point data loss might occur if the primary dies and we
			 * need to failover to the standby. The state change is also
			 * important for synchronous replication, since commits that
			 * started to wait at that point might wait for some time.
			 */
			if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
			{
				ereport(DEBUG1,
						(errmsg("standby \"%s\" has now caught up with primary",
								application_name)));
				WalSndSetState(WALSNDSTATE_STREAMING);
			}

			/*
			 * When SIGUSR2 arrives, we send any outstanding logs up to the
			 * shutdown checkpoint record (i.e., the latest record) and exit.
			 * This may be a normal termination at shutdown, or a promotion,
			 * the walsender is not sure which.
			 */
			if (walsender_ready_to_stop)
			{
				/* ... let's just be real sure we're caught up ... */
				XLogSend(output_message, &caughtup);
				if (caughtup && !pq_is_send_pending())
				{
					walsender_shutdown_requested = true;
					continue;		/* don't want to wait more */
				}
			}
		}

		/*
		 * We don't block if not caught up, unless there is unsent data
		 * pending in which case we'd better block until the socket is
		 * write-ready.  This test is only needed for the case where XLogSend
		 * loaded a subset of the available data but then pq_flush_if_writable
		 * flushed it all --- we should immediately try to send more.
		 */
		if (caughtup || pq_is_send_pending())
		{
			TimestampTz finish_time = 0;
			long		sleeptime = -1;
			int			wakeEvents;

			wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
				WL_SOCKET_READABLE;
			if (pq_is_send_pending())
				wakeEvents |= WL_SOCKET_WRITEABLE;

			/* Determine time until replication timeout */
			if (replication_timeout > 0)
			{
				long		secs;
				int			usecs;

				finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp,
														  replication_timeout);
				TimestampDifference(GetCurrentTimestamp(),
									finish_time, &secs, &usecs);
				sleeptime = secs * 1000 + usecs / 1000;
				/* Avoid Assert in WaitLatchOrSocket if timeout is past */
				if (sleeptime < 0)
					sleeptime = 0;
				wakeEvents |= WL_TIMEOUT;
			}

			/* Sleep until something happens or replication timeout */
			WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents,
							  MyProcPort->sock, sleeptime);

			/*
			 * Check for replication timeout.  Note we ignore the corner case
			 * possibility that the client replied just as we reached the
			 * timeout ... he's supposed to reply *before* that.
			 */
			if (replication_timeout > 0 &&
				GetCurrentTimestamp() >= finish_time)
			{
				/*
				 * Since typically expiration of replication timeout means
				 * communication problem, we don't send the error message to
				 * the standby.
				 */
				ereport(COMMERROR,
						(errmsg("terminating walsender process due to replication timeout")));
				break;
			}
		}
	}

	/*
	 * Get here on send failure.  Clean up and exit.
	 *
	 * Reset whereToSendOutput to prevent ereport from attempting to send any
	 * more messages to the standby.
	 */
	if (whereToSendOutput == DestRemote)
		whereToSendOutput = DestNone;

	proc_exit(0);
	return 1;					/* keep the compiler quiet */
}
예제 #18
0
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext bgwriter_context;

	BgWriterShmem->bgwriter_pid = MyProcPid;
	am_bg_writer = true;

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(bgwriter probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * Note: we deliberately ignore SIGTERM, because during a standard Unix
	 * system shutdown cycle, init will SIGTERM all processes at once.	We
	 * want to wait for the backends to exit, whereupon the postmaster will
	 * tell us it's okay to shut down (via SIGUSR2).
	 *
	 * SIGUSR1 is presently unused; keep it spare in case someday we want this
	 * process to participate in sinval messaging.
	 */
	pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
	pqsignal(SIGINT, ReqCheckpointHandler);		/* request checkpoint */
	pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
	pqsignal(SIGUSR2, ReqShutdownHandler);		/* request shutdown */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
#ifdef HAVE_SIGPROCMASK
	sigdelset(&BlockSig, SIGQUIT);
#else
	BlockSig &= ~(sigmask(SIGQUIT));
#endif

	/*
	 * Initialize so that first time-driven event happens at the correct time.
	 */
	last_checkpoint_time = last_xlog_switch_time = time(NULL);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	bgwriter_context = AllocSetContextCreate(TopMemoryContext,
											 "Background Writer",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(bgwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().	We don't have very many resources to worry
		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/* Warn any waiting backends that the checkpoint failed. */
		if (ckpt_active)
		{
			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_failed++;
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			ckpt_active = false;
		}

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(bgwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(bgwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		do_checkpoint = false;
		int			flags = 0;
		time_t		now;
		int			elapsed_secs;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Process any requests or signals received recently.
		 */
		AbsorbFsyncRequests();

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (checkpoint_requested)
		{
			checkpoint_requested = false;
			do_checkpoint = true;
			BgWriterStats.m_requested_checkpoints++;
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Close down the database */
			ShutdownXLOG(0, 0);
			DumpFreeSpaceMap(0, 0);
			/* Normal exit from the bgwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Force a checkpoint if too much time has elapsed since the last one.
		 * Note that we count a timed checkpoint in stats only when this
		 * occurs without an external request, but we set the CAUSE_TIME flag
		 * bit even if there is also an external request.
		 */
		now = time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
		{
			if (!do_checkpoint)
				BgWriterStats.m_timed_checkpoints++;
			do_checkpoint = true;
			flags |= CHECKPOINT_CAUSE_TIME;
		}

		/*
		 * Do a checkpoint if requested, otherwise do one cycle of
		 * dirty-buffer writing.
		 */
		if (do_checkpoint)
		{
			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			/*
			 * Atomically fetch the request flags to figure out what kind of a
			 * checkpoint we should perform, and increase the started-counter
			 * to acknowledge that we've started a new checkpoint.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			flags |= bgs->ckpt_flags;
			bgs->ckpt_flags = 0;
			bgs->ckpt_started++;
			SpinLockRelease(&bgs->ckpt_lck);

			/*
			 * We will warn if (a) too soon since last checkpoint (whatever
			 * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
			 * since the last checkpoint start.  Note in particular that this
			 * implementation will not generate warnings caused by
			 * CheckPointTimeout < CheckPointWarning.
			 */
			if ((flags & CHECKPOINT_CAUSE_XLOG) &&
				elapsed_secs < CheckPointWarning)
				ereport(LOG,
						(errmsg("checkpoints are occurring too frequently (%d seconds apart)",
								elapsed_secs),
						 errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));

			/*
			 * Initialize bgwriter-private variables used during checkpoint.
			 */
			ckpt_active = true;
			ckpt_start_recptr = GetInsertRecPtr();
			ckpt_start_time = now;
			ckpt_cached_elapsed = 0;

			/*
			 * Do the checkpoint.
			 */
			CreateCheckPoint(flags);

			/*
			 * After any checkpoint, close all smgr files.	This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();

			/*
			 * Indicate checkpoint completion to any waiting backends.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			ckpt_active = false;

			/*
			 * Note we record the checkpoint start time not end time as
			 * last_checkpoint_time.  This is so that time-driven checkpoints
			 * happen at a predictable spacing.
			 */
			last_checkpoint_time = now;
		}
		else
			BgBufferSync();

		/* Check for archive_timeout and switch xlog files if necessary. */
		CheckArchiveTimeout();

		/* Nap for the configured time. */
		BgWriterNap();
	}
}
int MainHandlerLoop_RMSEG(void)
{
	int 		res 	  = FUNC_RETURN_OK;
	uint64_t    curtime   = 0;
	int			errorcode = FUNC_RETURN_OK;
	char		errorbuf[1024];

	while( DRMGlobalInstance->ResManagerMainKeepRun ) {

		if (!PostmasterIsAlive(true)) {
			DRMGlobalInstance->ResManagerMainKeepRun = false;
			elog(LOG, "Postmaster is not alive, resource manager exits");
			break;
		}

		/* PART1. Handle socket server inputs. */
		res = processAllCommFileDescs();
		if ( res != FUNC_RETURN_OK ) {
			/*
			 * The possible error here is the failure of poll(), we won't keep
			 * running HAWQ RM any longer, graceful quit is requested.
			 */
			DRMGlobalInstance->ResManagerMainKeepRun = false;
			elog(LOG, "System error cause resource manager not possible to track "
					  "network communications.");
		}

		/* PART2. Handle all BE submitted requests. */
		processSubmittedRequests();

		/* PART3. Fresh local host info and send IMAlive message to resource
		 * 		  manager server.											  */
		curtime = gettime_microsec();
		if ( DRMGlobalInstance->LocalHostStat == NULL ||
			 curtime - DRMGlobalInstance->LocalHostLastUpdateTime >
			 SEGMENT_HOSTCHECK_INTERVAL ) {
			refreshLocalHostInstance();
			checkLocalPostmasterStatus();
		}

		if ( DRMGlobalInstance->SendIMAlive ) {
			 if (DRMGlobalInstance->LocalHostStat != NULL &&
			     curtime - DRMGlobalInstance->HeartBeatLastSentTime >
			     SEGMENT_HEARTBEAT_INTERVAL ) {
				 sendIMAlive(&errorcode, errorbuf, sizeof(errorbuf));
				 DRMGlobalInstance->HeartBeatLastSentTime = gettime_microsec();
			 }
		}

		/* PART4. Send responses back to the clients. */
		sendResponseToClients();

		/* PART5. Resource enforcement work thread quit */
		if (g_enforcement_thread_quited) {
			elog(ERROR, "Resource enforcement thread quited");
		}
	}

	elog(RMLOG, "Resource manager main event handler exits.");

	return res;
}
예제 #20
0
파일: walwriter.c 프로젝트: GisKook/Gis
/*
 * Main entry point for walwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
WalWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext walwriter_context;

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(walwriter probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * We have no particular use for SIGINT at the moment, but seems
	 * reasonable to treat like SIGTERM.
	 */
	pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
	pqsignal(SIGINT, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGTERM, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGQUIT, wal_quickdie);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
	pqsignal(SIGUSR2, SIG_IGN); /* not used */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Create a resource owner to keep track of our resources (not clear that
	 * we need this, but may as well have one).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	walwriter_context = AllocSetContextCreate(TopMemoryContext,
											  "Wal Writer",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(walwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * This code is heavily based on bgwriter.c, q.v.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().	We don't have very many resources to worry
		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(walwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(walwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Loop forever
	 */
	for (;;)
	{
		long		udelay;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Process any requests or signals received recently.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (shutdown_requested)
		{
			/* Normal exit from the walwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Do what we're here for...
		 */
		XLogBackgroundFlush();

		/*
		 * Delay until time to do something more, but fall out of delay
		 * reasonably quickly if signaled.
		 */
		udelay = WalWriterDelay * 1000L;
		while (udelay > 999999L)
		{
			if (got_SIGHUP || shutdown_requested)
				break;
			pg_usleep(1000000L);
			udelay -= 1000000L;
		}
		if (!(got_SIGHUP || shutdown_requested))
			pg_usleep(udelay);
	}
}
예제 #21
0
/*
 * pgarch_MainLoop
 *
 * Main loop for archiver
 */
static void
pgarch_MainLoop(void)
{
	time_t		last_copy_time = 0;
	bool		time_to_stop;

	/*
	 * We run the copy loop immediately upon entry, in case there are
	 * unarchived files left over from a previous database run (or maybe the
	 * archiver died unexpectedly).  After that we wait for a signal or
	 * timeout before doing more.
	 */
	wakened = true;

	do
	{
		/* When we get SIGUSR2, we do one more archive cycle, then exit */
		time_to_stop = ready_to_stop;

		/* Check for config update */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		/*
		 * If we've gotten SIGTERM, we normally just sit and do nothing until
		 * SIGUSR2 arrives.  However, that means a random SIGTERM would
		 * disable archiving indefinitely, which doesn't seem like a good
		 * idea.  If more than 60 seconds pass since SIGTERM, exit anyway, so
		 * that the postmaster can start a new archiver if needed.
		 */
		if (got_SIGTERM)
		{
			time_t		curtime = time(NULL);

			if (last_sigterm_time == 0)
				last_sigterm_time = curtime;
			else if ((unsigned int) (curtime - last_sigterm_time) >=
					 (unsigned int) 60)
				break;
		}

		/* Do what we're here for */
		if (wakened || time_to_stop)
		{
			wakened = false;
			pgarch_ArchiverCopyLoop();
			last_copy_time = time(NULL);
		}

		/*
		 * There shouldn't be anything for the archiver to do except to wait
		 * for a signal ... however, the archiver exists to protect our data,
		 * so she wakes up occasionally to allow herself to be proactive.
		 *
		 * On some platforms, signals won't interrupt the sleep.  To ensure we
		 * respond reasonably promptly when someone signals us, break down the
		 * sleep into 1-second increments, and check for interrupts after each
		 * nap.
		 */
		while (!(wakened || ready_to_stop || got_SIGHUP ||
				 !PostmasterIsAlive(true)))
		{
			time_t		curtime;

			pg_usleep(1000000L);
			curtime = time(NULL);
			if ((unsigned int) (curtime - last_copy_time) >=
				(unsigned int) PGARCH_AUTOWAKE_INTERVAL)
				wakened = true;
		}

		/*
		 * The archiver quits either when the postmaster dies (not expected)
		 * or after completing one more archiving cycle after receiving
		 * SIGUSR2.
		 */
	} while (PostmasterIsAlive(true) && !time_to_stop);
}
예제 #22
0
/* Main entry point for walreceiver process */
void
WalReceiverMain(void)
{
	char		conninfo[MAXCONNINFO];
	XLogRecPtr	startpoint;

	/* use volatile pointer to prevent code rearrangement */
	volatile WalRcvData *walrcv = WalRcv;

	am_walreceiver = true;

	/*
	 * WalRcv should be set up already (if we are a backend, we inherit this
	 * by fork() or EXEC_BACKEND mechanism from the postmaster).
	 */
	Assert(walrcv != NULL);

	/*
	 * Mark walreceiver as running in shared memory.
	 *
	 * Do this as early as possible, so that if we fail later on, we'll set
	 * state to STOPPED. If we die before this, the startup process will keep
	 * waiting for us to start up, until it times out.
	 */
	SpinLockAcquire(&walrcv->mutex);
	Assert(walrcv->pid == 0);
	switch (walrcv->walRcvState)
	{
		case WALRCV_STOPPING:
			/* If we've already been requested to stop, don't start up. */
			walrcv->walRcvState = WALRCV_STOPPED;
			/* fall through */

		case WALRCV_STOPPED:
			SpinLockRelease(&walrcv->mutex);
			proc_exit(1);
			break;

		case WALRCV_STARTING:
			/* The usual case */
			break;

		case WALRCV_RUNNING:
			/* Shouldn't happen */
			elog(PANIC, "walreceiver still running according to shared memory state");
	}
	/* Advertise our PID so that the startup process can kill us */
	walrcv->pid = MyProcPid;
	walrcv->walRcvState = WALRCV_RUNNING;

	/* Fetch information required to start streaming */
	strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
	startpoint = walrcv->receivedUpto;
	SpinLockRelease(&walrcv->mutex);

	/* Arrange to clean up at walreceiver exit */
	on_shmem_exit(WalRcvDie, 0);

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(walreceiver probably never has
	 * any child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/* Properly accept or ignore signals the postmaster might send us */
	pqsignal(SIGHUP, WalRcvSigHupHandler);		/* set flag to read config
												 * file */
	pqsignal(SIGINT, SIG_IGN);
	pqsignal(SIGTERM, WalRcvShutdownHandler);	/* request shutdown */
	pqsignal(SIGQUIT, WalRcvQuickDieHandler);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN);
	pqsignal(SIGUSR2, SIG_IGN);

	/* Reset some signals that are accepted by postmaster but not here */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/* Load the libpq-specific functions */
	load_file("libpqwalreceiver", false);
	if (walrcv_connect == NULL || walrcv_receive == NULL ||
		walrcv_disconnect == NULL)
		elog(ERROR, "libpqwalreceiver didn't initialize correctly");

	/*
	 * Create a resource owner to keep track of our resources (not clear that
	 * we need this, but may as well have one).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Receiver");

	/* Unblock signals (they were blocked when the postmaster forked us) */
	PG_SETMASK(&UnBlockSig);

	/* Establish the connection to the primary for XLOG streaming */
	EnableWalRcvImmediateExit();
	walrcv_connect(conninfo, startpoint);
	DisableWalRcvImmediateExit();

	/* Loop until end-of-streaming or error */
	for (;;)
	{
		unsigned char type;
		char	   *buf;
		int			len;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Exit walreceiver if we're not in recovery. This should not happen,
		 * but cross-check the status here.
		 */
		if (!RecoveryInProgress())
			ereport(FATAL,
					(errmsg("cannot continue WAL streaming, recovery has already ended")));

		/* Process any requests or signals received recently */
		ProcessWalRcvInterrupts();

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		/* Wait a while for data to arrive */
		if (walrcv_receive(NAPTIME_PER_CYCLE, &type, &buf, &len))
		{
			/* Accept the received data, and process it */
			XLogWalRcvProcessMsg(type, buf, len);

			/* Receive any more data we can without sleeping */
			while (walrcv_receive(0, &type, &buf, &len))
				XLogWalRcvProcessMsg(type, buf, len);

			/*
			 * If we've written some records, flush them to disk and let the
			 * startup process know about them.
			 */
			XLogWalRcvFlush();
		}
	}
}
예제 #23
0
/*
 * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
 * conditions.
 *
 * When waiting on a socket, WL_SOCKET_READABLE *must* be included in
 * 'wakeEvents'; WL_SOCKET_WRITEABLE is optional.  The reason for this is
 * that EOF and error conditions are reported only via WL_SOCKET_READABLE.
 */
int
WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
				  long timeout)
{
	int			result = 0;
	int			rc;
	instr_time	start_time,
				cur_time;
	long		cur_timeout;

#ifdef HAVE_POLL
	struct pollfd pfds[3];
	int			nfds;
#else
	struct timeval tv,
			   *tvp;
	fd_set		input_mask;
	fd_set		output_mask;
	int			hifd;
#endif

	/* Ignore WL_SOCKET_* events if no valid socket is given */
	if (sock == PGINVALID_SOCKET)
		wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);

	Assert(wakeEvents != 0);	/* must have at least one wake event */
	/* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
	Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);

	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
		elog(ERROR, "cannot wait on a latch owned by another process");

	/*
	 * Initialize timeout if requested.  We must record the current time so
	 * that we can determine the remaining timeout if the poll() or select()
	 * is interrupted.	(On some platforms, select() will update the contents
	 * of "tv" for us, but unfortunately we can't rely on that.)
	 */
	if (wakeEvents & WL_TIMEOUT)
	{
		INSTR_TIME_SET_CURRENT(start_time);
		Assert(timeout >= 0 && timeout <= INT_MAX);
		cur_timeout = timeout;

#ifndef HAVE_POLL
		tv.tv_sec = cur_timeout / 1000L;
		tv.tv_usec = (cur_timeout % 1000L) * 1000L;
		tvp = &tv;
#endif
	}
	else
	{
		cur_timeout = -1;

#ifndef HAVE_POLL
		tvp = NULL;
#endif
	}

	waiting = true;
	do
	{
		/*
		 * Clear the pipe, then check if the latch is set already. If someone
		 * sets the latch between this and the poll()/select() below, the
		 * setter will write a byte to the pipe (or signal us and the signal
		 * handler will do that), and the poll()/select() will return
		 * immediately.
		 *
		 * Note: we assume that the kernel calls involved in drainSelfPipe()
		 * and SetLatch() will provide adequate synchronization on machines
		 * with weak memory ordering, so that we cannot miss seeing is_set if
		 * the signal byte is already in the pipe when we drain it.
		 */
		drainSelfPipe();

		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
		{
			result |= WL_LATCH_SET;

			/*
			 * Leave loop immediately, avoid blocking again. We don't attempt
			 * to report any other events that might also be satisfied.
			 */
			break;
		}

		/* Must wait ... we use poll(2) if available, otherwise select(2) */
#ifdef HAVE_POLL
		nfds = 0;
		if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
		{
			/* socket, if used, is always in pfds[0] */
			pfds[0].fd = sock;
			pfds[0].events = 0;
			if (wakeEvents & WL_SOCKET_READABLE)
				pfds[0].events |= POLLIN;
			if (wakeEvents & WL_SOCKET_WRITEABLE)
				pfds[0].events |= POLLOUT;
			pfds[0].revents = 0;
			nfds++;
		}

		pfds[nfds].fd = selfpipe_readfd;
		pfds[nfds].events = POLLIN;
		pfds[nfds].revents = 0;
		nfds++;

		if (wakeEvents & WL_POSTMASTER_DEATH)
		{
			/* postmaster fd, if used, is always in pfds[nfds - 1] */
			pfds[nfds].fd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
			pfds[nfds].events = POLLIN;
			pfds[nfds].revents = 0;
			nfds++;
		}

		/* Sleep */
		rc = poll(pfds, nfds, (int) cur_timeout);

		/* Check return code */
		if (rc < 0)
		{
			/* EINTR is okay, otherwise complain */
			if (errno != EINTR)
			{
				waiting = false;
				ereport(ERROR,
						(errcode_for_socket_access(),
						 errmsg("poll() failed: %m")));
			}
		}
		else if (rc == 0)
		{
			/* timeout exceeded */
			if (wakeEvents & WL_TIMEOUT)
				result |= WL_TIMEOUT;
		}
		else
		{
			/* at least one event occurred, so check revents values */
			if ((wakeEvents & WL_SOCKET_READABLE) &&
				(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
			{
				/* data available in socket, or EOF/error condition */
				result |= WL_SOCKET_READABLE;
			}
			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
				(pfds[0].revents & POLLOUT))
			{
				result |= WL_SOCKET_WRITEABLE;
			}

			/*
			 * We expect a POLLHUP when the remote end is closed, but because
			 * we don't expect the pipe to become readable or to have any
			 * errors either, treat those cases as postmaster death, too.
			 */
			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
				(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
			{
				/*
				 * According to the select(2) man page on Linux, select(2) may
				 * spuriously return and report a file descriptor as readable,
				 * when it's not; and presumably so can poll(2).  It's not
				 * clear that the relevant cases would ever apply to the
				 * postmaster pipe, but since the consequences of falsely
				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
				 * we take the trouble to positively verify EOF with
				 * PostmasterIsAlive().
				 */
				if (!PostmasterIsAlive())
					result |= WL_POSTMASTER_DEATH;
			}
		}
#else							/* !HAVE_POLL */

		FD_ZERO(&input_mask);
		FD_ZERO(&output_mask);

		FD_SET(selfpipe_readfd, &input_mask);
		hifd = selfpipe_readfd;

		if (wakeEvents & WL_POSTMASTER_DEATH)
		{
			FD_SET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask);
			if (postmaster_alive_fds[POSTMASTER_FD_WATCH] > hifd)
				hifd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
		}

		if (wakeEvents & WL_SOCKET_READABLE)
		{
			FD_SET(sock, &input_mask);
			if (sock > hifd)
				hifd = sock;
		}

		if (wakeEvents & WL_SOCKET_WRITEABLE)
		{
			FD_SET(sock, &output_mask);
			if (sock > hifd)
				hifd = sock;
		}

		/* Sleep */
		rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);

		/* Check return code */
		if (rc < 0)
		{
			/* EINTR is okay, otherwise complain */
			if (errno != EINTR)
			{
				waiting = false;
				ereport(ERROR,
						(errcode_for_socket_access(),
						 errmsg("select() failed: %m")));
			}
		}
		else if (rc == 0)
		{
			/* timeout exceeded */
			if (wakeEvents & WL_TIMEOUT)
				result |= WL_TIMEOUT;
		}
		else
		{
			/* at least one event occurred, so check masks */
			if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
			{
				/* data available in socket, or EOF */
				result |= WL_SOCKET_READABLE;
			}
			if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
			{
				result |= WL_SOCKET_WRITEABLE;
			}
			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
			FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
			{
				/*
				 * According to the select(2) man page on Linux, select(2) may
				 * spuriously return and report a file descriptor as readable,
				 * when it's not; and presumably so can poll(2).  It's not
				 * clear that the relevant cases would ever apply to the
				 * postmaster pipe, but since the consequences of falsely
				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
				 * we take the trouble to positively verify EOF with
				 * PostmasterIsAlive().
				 */
				if (!PostmasterIsAlive())
					result |= WL_POSTMASTER_DEATH;
			}
		}
#endif   /* HAVE_POLL */

		/* If we're not done, update cur_timeout for next iteration */
		if (result == 0 && cur_timeout >= 0)
		{
			INSTR_TIME_SET_CURRENT(cur_time);
			INSTR_TIME_SUBTRACT(cur_time, start_time);
			cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
			if (cur_timeout < 0)
				cur_timeout = 0;

#ifndef HAVE_POLL
			tv.tv_sec = cur_timeout / 1000L;
			tv.tv_usec = (cur_timeout % 1000L) * 1000L;
#endif
		}
	} while (result == 0);
	waiting = false;

	return result;
}
예제 #24
0
/* Main loop of walsender process */
static int
WalSndLoop(void)
{
    char	   *output_message;
    bool		caughtup = false;

    /*
     * Allocate buffer that will be used for each output message.  We do this
     * just once to reduce palloc overhead.  The buffer must be made large
     * enough for maximum-sized messages.
     */
    output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE);

    /*
     * Allocate buffer that will be used for processing reply messages.  As
     * above, do this just once to reduce palloc overhead.
     */
    initStringInfo(&reply_message);

    /* Initialize the last reply timestamp */
    last_reply_timestamp = GetCurrentTimestamp();

    /* Loop forever, unless we get an error */
    for (;;)
    {
        /*
         * Emergency bailout if postmaster has died.  This is to avoid the
         * necessity for manual cleanup of all postmaster children.
         */
        if (!PostmasterIsAlive(true))
            exit(1);

        /* Process any requests or signals received recently */
        if (got_SIGHUP)
        {
            got_SIGHUP = false;
            ProcessConfigFile(PGC_SIGHUP);
            SyncRepInitConfig();
        }

        /* Normal exit from the walsender is here */
        if (walsender_shutdown_requested)
        {
            /* Inform the standby that XLOG streaming was done */
            pq_puttextmessage('C', "COPY 0");
            pq_flush();

            proc_exit(0);
        }

        /*
         * If we don't have any pending data in the output buffer, try to send
         * some more.
         */
        if (!pq_is_send_pending())
        {
            XLogSend(output_message, &caughtup);

            /*
             * Even if we wrote all the WAL that was available when we started
             * sending, more might have arrived while we were sending this
             * batch. We had the latch set while sending, so we have not
             * received any signals from that time. Let's arm the latch again,
             * and after that check that we're still up-to-date.
             */
            if (caughtup && !pq_is_send_pending())
            {
                ResetLatch(&MyWalSnd->latch);

                XLogSend(output_message, &caughtup);
            }
        }

        /* Flush pending output to the client */
        if (pq_flush_if_writable() != 0)
            break;

        /*
         * When SIGUSR2 arrives, we send any outstanding logs up to the
         * shutdown checkpoint record (i.e., the latest record) and exit.
         */
        if (walsender_ready_to_stop && !pq_is_send_pending())
        {
            XLogSend(output_message, &caughtup);
            ProcessRepliesIfAny();
            if (caughtup && !pq_is_send_pending())
                walsender_shutdown_requested = true;
        }

        if ((caughtup || pq_is_send_pending()) &&
                !got_SIGHUP &&
                !walsender_shutdown_requested)
        {
            TimestampTz finish_time = 0;
            long		sleeptime;

            /* Reschedule replication timeout */
            if (replication_timeout > 0)
            {
                long		secs;
                int			usecs;

                finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp,
                              replication_timeout);
                TimestampDifference(GetCurrentTimestamp(),
                                    finish_time, &secs, &usecs);
                sleeptime = secs * 1000 + usecs / 1000;
                if (WalSndDelay < sleeptime)
                    sleeptime = WalSndDelay;
            }
            else
            {
                /*
                 * XXX: Without timeout, we don't really need the periodic
                 * wakeups anymore, WaitLatchOrSocket should reliably wake up
                 * as soon as something interesting happens.
                 */
                sleeptime = WalSndDelay;
            }

            /* Sleep */
            WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock,
                              true, pq_is_send_pending(),
                              sleeptime);

            /* Check for replication timeout */
            if (replication_timeout > 0 &&
                    GetCurrentTimestamp() >= finish_time)
            {
                /*
                 * Since typically expiration of replication timeout means
                 * communication problem, we don't send the error message to
                 * the standby.
                 */
                ereport(COMMERROR,
                        (errmsg("terminating walsender process due to replication timeout")));
                break;
            }
        }

        /*
         * If we're in catchup state, see if its time to move to streaming.
         * This is an important state change for users, since before this
         * point data loss might occur if the primary dies and we need to
         * failover to the standby. The state change is also important for
         * synchronous replication, since commits that started to wait at that
         * point might wait for some time.
         */
        if (MyWalSnd->state == WALSNDSTATE_CATCHUP && caughtup)
        {
            ereport(DEBUG1,
                    (errmsg("standby \"%s\" has now caught up with primary",
                            application_name)));
            WalSndSetState(WALSNDSTATE_STREAMING);
        }

        ProcessRepliesIfAny();
    }

    /*
     * Get here on send failure.  Clean up and exit.
     *
     * Reset whereToSendOutput to prevent ereport from attempting to send any
     * more messages to the standby.
     */
    if (whereToSendOutput == DestRemote)
        whereToSendOutput = DestNone;

    proc_exit(0);
    return 1;					/* keep the compiler quiet */
}
예제 #25
0
파일: win32_latch.c 프로젝트: botp/postgres
int
WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
				  long timeout)
{
	DWORD		rc;
	HANDLE		events[4];
	HANDLE		latchevent;
	HANDLE		sockevent = WSA_INVALID_EVENT;
	int			numevents;
	int			result = 0;
	int			pmdeath_eventno = 0;

	/* Ignore WL_SOCKET_* events if no valid socket is given */
	if (sock == PGINVALID_SOCKET)
		wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);

	Assert(wakeEvents != 0);	/* must have at least one wake event */
	/* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
	Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);

	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
		elog(ERROR, "cannot wait on a latch owned by another process");

	/* Convert timeout to form used by WaitForMultipleObjects() */
	if (wakeEvents & WL_TIMEOUT)
		Assert(timeout >= 0);
	else
		timeout = INFINITE;

	/*
	 * Construct an array of event handles for WaitforMultipleObjects().
	 *
	 * Note: pgwin32_signal_event should be first to ensure that it will be
	 * reported when multiple events are set.  We want to guarantee that
	 * pending signals are serviced.
	 */
	latchevent = latch->event;

	events[0] = pgwin32_signal_event;
	events[1] = latchevent;
	numevents = 2;
	if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
	{
		/* Need an event object to represent events on the socket */
		int			flags = 0;

		if (wakeEvents & WL_SOCKET_READABLE)
			flags |= (FD_READ | FD_CLOSE);
		if (wakeEvents & WL_SOCKET_WRITEABLE)
			flags |= FD_WRITE;

		sockevent = WSACreateEvent();
		if (sockevent == WSA_INVALID_EVENT)
			elog(ERROR, "failed to create event for socket: error code %u",
				 WSAGetLastError());
		if (WSAEventSelect(sock, sockevent, flags) != 0)
			elog(ERROR, "failed to set up event for socket: error code %u",
				 WSAGetLastError());

		events[numevents++] = sockevent;
	}
	if (wakeEvents & WL_POSTMASTER_DEATH)
	{
		pmdeath_eventno = numevents;
		events[numevents++] = PostmasterHandle;
	}

	/* Ensure that signals are serviced even if latch is already set */
	pgwin32_dispatch_queued_signals();

	do
	{
		/*
		 * Reset the event, and check if the latch is set already. If someone
		 * sets the latch between this and the WaitForMultipleObjects() call
		 * below, the setter will set the event and WaitForMultipleObjects()
		 * will return immediately.
		 */
		if (!ResetEvent(latchevent))
			elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());

		if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
		{
			result |= WL_LATCH_SET;

			/*
			 * Leave loop immediately, avoid blocking again. We don't attempt
			 * to report any other events that might also be satisfied.
			 */
			break;
		}

		rc = WaitForMultipleObjects(numevents, events, FALSE, timeout);

		if (rc == WAIT_FAILED)
			elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
				 GetLastError());
		else if (rc == WAIT_TIMEOUT)
		{
			result |= WL_TIMEOUT;
		}
		else if (rc == WAIT_OBJECT_0)
		{
			/* Service newly-arrived signals */
			pgwin32_dispatch_queued_signals();
		}
		else if (rc == WAIT_OBJECT_0 + 1)
		{
			/* Latch is set, we'll handle that on next iteration of loop */
		}
		else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
				 rc == WAIT_OBJECT_0 + 2)		/* socket is at event slot 2 */
		{
			WSANETWORKEVENTS resEvents;

			ZeroMemory(&resEvents, sizeof(resEvents));
			if (WSAEnumNetworkEvents(sock, sockevent, &resEvents) != 0)
				elog(ERROR, "failed to enumerate network events: error code %u",
					 WSAGetLastError());
			if ((wakeEvents & WL_SOCKET_READABLE) &&
				(resEvents.lNetworkEvents & (FD_READ | FD_CLOSE)))
			{
				result |= WL_SOCKET_READABLE;
			}
			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
				(resEvents.lNetworkEvents & FD_WRITE))
			{
				result |= WL_SOCKET_WRITEABLE;
			}
		}
		else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
				 rc == WAIT_OBJECT_0 + pmdeath_eventno)
		{
			/*
			 * Postmaster apparently died.	Since the consequences of falsely
			 * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we
			 * take the trouble to positively verify this with
			 * PostmasterIsAlive(), even though there is no known reason to
			 * think that the event could be falsely set on Windows.
			 */
			if (!PostmasterIsAlive())
				result |= WL_POSTMASTER_DEATH;
		}
		else
			elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
	}
	while (result == 0);

	/* Clean up the event object we created for the socket */
	if (sockevent != WSA_INVALID_EVENT)
	{
		WSAEventSelect(sock, NULL, 0);
		WSACloseEvent(sockevent);
	}

	return result;
}
예제 #26
0
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
    sigjmp_buf	local_sigjmp_buf;
    MemoryContext bgwriter_context;

    am_bg_writer = true;

    /*
     * If possible, make this process a group leader, so that the postmaster
     * can signal any child processes too.	(bgwriter probably never has any
     * child processes, but for consistency we make all postmaster child
     * processes do this.)
     */
#ifdef HAVE_SETSID
    if (setsid() < 0)
        elog(FATAL, "setsid() failed: %m");
#endif

    /*
     * Properly accept or ignore signals the postmaster might send us
     *
     * SIGUSR1 is presently unused; keep it spare in case someday we want this
     * process to participate in ProcSignal signalling.
     */
    pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
    pqsignal(SIGINT, SIG_IGN);			/* as of 9.2 no longer requests checkpoint */
    pqsignal(SIGTERM, ReqShutdownHandler); 	/* shutdown */
    pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
    pqsignal(SIGALRM, SIG_IGN);
    pqsignal(SIGPIPE, SIG_IGN);
    pqsignal(SIGUSR1, SIG_IGN);			/* reserve for ProcSignal */
    pqsignal(SIGUSR2, SIG_IGN);

    /*
     * Reset some signals that are accepted by postmaster but not here
     */
    pqsignal(SIGCHLD, SIG_DFL);
    pqsignal(SIGTTIN, SIG_DFL);
    pqsignal(SIGTTOU, SIG_DFL);
    pqsignal(SIGCONT, SIG_DFL);
    pqsignal(SIGWINCH, SIG_DFL);

    /* We allow SIGQUIT (quickdie) at all times */
    sigdelset(&BlockSig, SIGQUIT);

    /*
     * Create a resource owner to keep track of our resources (currently only
     * buffer pins).
     */
    CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

    /*
     * Create a memory context that we will do all our work in.  We do this so
     * that we can reset the context during error recovery and thereby avoid
     * possible memory leaks.  Formerly this code just ran in
     * TopMemoryContext, but resetting that would be a really bad idea.
     */
    bgwriter_context = AllocSetContextCreate(TopMemoryContext,
                       "Background Writer",
                       ALLOCSET_DEFAULT_MINSIZE,
                       ALLOCSET_DEFAULT_INITSIZE,
                       ALLOCSET_DEFAULT_MAXSIZE);
    MemoryContextSwitchTo(bgwriter_context);

    /*
     * If an exception is encountered, processing resumes here.
     *
     * See notes in postgres.c about the design of this coding.
     */
    if (sigsetjmp(local_sigjmp_buf, 1) != 0)
    {
        /* Since not using PG_TRY, must reset error stack by hand */
        error_context_stack = NULL;

        /* Prevent interrupts while cleaning up */
        HOLD_INTERRUPTS();

        /* Report the error to the server log */
        EmitErrorReport();

        /*
         * These operations are really just a minimal subset of
         * AbortTransaction().	We don't have very many resources to worry
         * about in bgwriter, but we do have LWLocks, buffers, and temp files.
         */
        LWLockReleaseAll();
        AbortBufferIO();
        UnlockBuffers();
        /* buffer pins are released here: */
        ResourceOwnerRelease(CurrentResourceOwner,
                             RESOURCE_RELEASE_BEFORE_LOCKS,
                             false, true);
        /* we needn't bother with the other ResourceOwnerRelease phases */
        AtEOXact_Buffers(false);
        AtEOXact_Files();
        AtEOXact_HashTables(false);

        /*
         * Now return to normal top-level context and clear ErrorContext for
         * next time.
         */
        MemoryContextSwitchTo(bgwriter_context);
        FlushErrorState();

        /* Flush any leaked data in the top-level context */
        MemoryContextResetAndDeleteChildren(bgwriter_context);

        /* Now we can allow interrupts again */
        RESUME_INTERRUPTS();

        /*
         * Sleep at least 1 second after any error.  A write error is likely
         * to be repeated, and we don't want to be filling the error logs as
         * fast as we can.
         */
        pg_usleep(1000000L);

        /*
         * Close all open files after any error.  This is helpful on Windows,
         * where holding deleted files open causes various strange errors.
         * It's not clear we need it elsewhere, but shouldn't hurt.
         */
        smgrcloseall();
    }

    /* We can now handle ereport(ERROR) */
    PG_exception_stack = &local_sigjmp_buf;

    /*
     * Unblock signals (they were blocked when the postmaster forked us)
     */
    PG_SETMASK(&UnBlockSig);

    /*
     * Use the recovery target timeline ID during recovery
     */
    if (RecoveryInProgress())
        ThisTimeLineID = GetRecoveryTargetTLI();

    /*
     * Loop forever
     */
    for (;;)
    {
        /*
         * Emergency bailout if postmaster has died.  This is to avoid the
         * necessity for manual cleanup of all postmaster children.
         */
        if (!PostmasterIsAlive())
            exit(1);

        if (got_SIGHUP)
        {
            got_SIGHUP = false;
            ProcessConfigFile(PGC_SIGHUP);
            /* update global shmem state for sync rep */
        }
        if (shutdown_requested)
        {
            /*
             * From here on, elog(ERROR) should end with exit(1), not send
             * control back to the sigsetjmp block above
             */
            ExitOnAnyError = true;
            /* Normal exit from the bgwriter is here */
            proc_exit(0);		/* done */
        }

        /*
         * Do one cycle of dirty-buffer writing.
         */
        BgBufferSync();

        /* Nap for the configured time. */
        BgWriterNap();
    }
}
예제 #27
0
/*
 * Execute commands from walreceiver, until we enter streaming mode.
 */
static void
WalSndHandshake(void)
{
	StringInfoData input_message;
	bool		replication_started = false;

	initStringInfo(&input_message);

	while (!replication_started)
	{
		int			firstchar;

		WalSndSetState(WALSNDSTATE_STARTUP);
		set_ps_display("idle", false);

		/* Wait for a command to arrive */
		firstchar = pq_getbyte();

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive())
			exit(1);

		/*
		 * Check for any other interesting events that happened while we
		 * slept.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		if (firstchar != EOF)
		{
			/*
			 * Read the message contents. This is expected to be done without
			 * blocking because we've been able to get message type code.
			 */
			if (pq_getmessage(&input_message, 0))
				firstchar = EOF;	/* suitable message already logged */
		}

		/* Handle the very limited subset of commands expected in this phase */
		switch (firstchar)
		{
			case 'Q':			/* Query message */
				{
					const char *query_string;

					query_string = pq_getmsgstring(&input_message);
					pq_getmsgend(&input_message);

					if (HandleReplicationCommand(query_string))
						replication_started = true;
				}
				break;

			case 'X':
				/* standby is closing the connection */
				proc_exit(0);

			case EOF:
				/* standby disconnected unexpectedly */
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("unexpected EOF on standby connection")));
				proc_exit(0);

			default:
				ereport(FATAL,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("invalid standby handshake message type %d", firstchar)));
		}
	}
}
예제 #28
0
/*
 * Execute commands from walreceiver, until we enter streaming mode.
 */
static void
WalSndHandshake(void)
{
	StringInfoData input_message;
	bool		replication_started = false;

	initStringInfo(&input_message);

	while (!replication_started)
	{
		int			firstchar;

		/* Wait for a command to arrive */
		firstchar = pq_getbyte();

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Check for any other interesting events that happened while we
		 * slept.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		if (firstchar != EOF)
		{
			/*
			 * Read the message contents. This is expected to be done without
			 * blocking because we've been able to get message type code.
			 */
			if (pq_getmessage(&input_message, 0))
				firstchar = EOF;	/* suitable message already logged */
		}

		/* Handle the very limited subset of commands expected in this phase */
		switch (firstchar)
		{
			case 'Q':			/* Query message */
				{
					const char *query_string;
					XLogRecPtr	recptr;

					query_string = pq_getmsgstring(&input_message);
					pq_getmsgend(&input_message);

					if (strcmp(query_string, "IDENTIFY_SYSTEM") == 0)
					{
						StringInfoData buf;
						char		sysid[32];
						char		tli[11];

						/*
						 * Reply with a result set with one row, two columns.
						 * First col is system ID, and second is timeline ID
						 */

						snprintf(sysid, sizeof(sysid), UINT64_FORMAT,
								 GetSystemIdentifier());
						snprintf(tli, sizeof(tli), "%u", ThisTimeLineID);

						/* Send a RowDescription message */
						pq_beginmessage(&buf, 'T');
						pq_sendint(&buf, 2, 2); /* 2 fields */

						/* first field */
						pq_sendstring(&buf, "systemid");		/* col name */
						pq_sendint(&buf, 0, 4); /* table oid */
						pq_sendint(&buf, 0, 2); /* attnum */
						pq_sendint(&buf, TEXTOID, 4);	/* type oid */
						pq_sendint(&buf, -1, 2);		/* typlen */
						pq_sendint(&buf, 0, 4); /* typmod */
						pq_sendint(&buf, 0, 2); /* format code */

						/* second field */
						pq_sendstring(&buf, "timeline");		/* col name */
						pq_sendint(&buf, 0, 4); /* table oid */
						pq_sendint(&buf, 0, 2); /* attnum */
						pq_sendint(&buf, INT4OID, 4);	/* type oid */
						pq_sendint(&buf, 4, 2); /* typlen */
						pq_sendint(&buf, 0, 4); /* typmod */
						pq_sendint(&buf, 0, 2); /* format code */
						pq_endmessage(&buf);

						/* Send a DataRow message */
						pq_beginmessage(&buf, 'D');
						pq_sendint(&buf, 2, 2); /* # of columns */
						pq_sendint(&buf, strlen(sysid), 4);		/* col1 len */
						pq_sendbytes(&buf, (char *) &sysid, strlen(sysid));
						pq_sendint(&buf, strlen(tli), 4);		/* col2 len */
						pq_sendbytes(&buf, (char *) tli, strlen(tli));
						pq_endmessage(&buf);

						/* Send CommandComplete and ReadyForQuery messages */
						EndCommand("SELECT", DestRemote);
						ReadyForQuery(DestRemote);
						/* ReadyForQuery did pq_flush for us */
					}
					else if (sscanf(query_string, "START_REPLICATION %X/%X",
									&recptr.xlogid, &recptr.xrecoff) == 2)
					{
						StringInfoData buf;

						/*
						 * Check that we're logging enough information in the
						 * WAL for log-shipping.
						 *
						 * NOTE: This only checks the current value of
						 * wal_level. Even if the current setting is not
						 * 'minimal', there can be old WAL in the pg_xlog
						 * directory that was created with 'minimal'. So this
						 * is not bulletproof, the purpose is just to give a
						 * user-friendly error message that hints how to
						 * configure the system correctly.
						 */
						if (wal_level == WAL_LEVEL_MINIMAL)
							ereport(FATAL,
									(errcode(ERRCODE_CANNOT_CONNECT_NOW),
									 errmsg("standby connections not allowed because wal_level=minimal")));

						/* Send a CopyOutResponse message, and start streaming */
						pq_beginmessage(&buf, 'H');
						pq_sendbyte(&buf, 0);
						pq_sendint(&buf, 0, 2);
						pq_endmessage(&buf);
						pq_flush();

						/*
						 * Initialize position to the received one, then the
						 * xlog records begin to be shipped from that position
						 */
						sentPtr = recptr;

						/* break out of the loop */
						replication_started = true;
					}
					else
					{
						ereport(FATAL,
								(errcode(ERRCODE_PROTOCOL_VIOLATION),
								 errmsg("invalid standby query string: %s", query_string)));
					}
					break;
				}

			case 'X':
				/* standby is closing the connection */
				proc_exit(0);

			case EOF:
				/* standby disconnected unexpectedly */
				ereport(COMMERROR,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("unexpected EOF on standby connection")));
				proc_exit(0);

			default:
				ereport(FATAL,
						(errcode(ERRCODE_PROTOCOL_VIOLATION),
						 errmsg("invalid standby handshake message type %d", firstchar)));
		}
	}
}
예제 #29
0
파일: pgarch.c 프로젝트: phpadmin/postgres
/*
 * pgarch_MainLoop
 *
 * Main loop for archiver
 */
static void
pgarch_MainLoop(void)
{
	pg_time_t	last_copy_time = 0;
	bool		time_to_stop;

	/*
	 * We run the copy loop immediately upon entry, in case there are
	 * unarchived files left over from a previous database run (or maybe the
	 * archiver died unexpectedly).  After that we wait for a signal or
	 * timeout before doing more.
	 */
	wakened = true;

	/*
	 * There shouldn't be anything for the archiver to do except to wait
	 * for a signal ... however, the archiver exists to protect our data,
	 * so she wakes up occasionally to allow herself to be proactive.
	 */
	do
	{
		ResetLatch(&mainloop_latch);

		/* When we get SIGUSR2, we do one more archive cycle, then exit */
		time_to_stop = ready_to_stop;

		/* Check for config update */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		/*
		 * If we've gotten SIGTERM, we normally just sit and do nothing until
		 * SIGUSR2 arrives.  However, that means a random SIGTERM would
		 * disable archiving indefinitely, which doesn't seem like a good
		 * idea.  If more than 60 seconds pass since SIGTERM, exit anyway, so
		 * that the postmaster can start a new archiver if needed.
		 */
		if (got_SIGTERM)
		{
			time_t		curtime = time(NULL);

			if (last_sigterm_time == 0)
				last_sigterm_time = curtime;
			else if ((unsigned int) (curtime - last_sigterm_time) >=
					 (unsigned int) 60)
				break;
		}

		/* Do what we're here for */
		if (wakened || time_to_stop)
		{
			wakened = false;
			pgarch_ArchiverCopyLoop();
			last_copy_time = time(NULL);
		}

		/*
		 * Sleep until a signal is received, or until a poll is forced by
		 * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or
		 * until postmaster dies.
		 */
		if (!time_to_stop) /* Don't wait during last iteration */
		{
			pg_time_t curtime = (pg_time_t) time(NULL);
			int		timeout;

			timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time);
			if (timeout > 0)
			{
				int rc;
				rc = WaitLatch(&mainloop_latch,
							   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
							   timeout * 1000000L);
				if (rc & WL_TIMEOUT)
					wakened = true;
			}
			else
				wakened = true;
		}

		/*
		 * The archiver quits either when the postmaster dies (not expected)
		 * or after completing one more archiving cycle after receiving
		 * SIGUSR2.
		 */
	} while (PostmasterIsAlive() && !time_to_stop);
}
예제 #30
0
/*
 * Wait for synchronous replication, if requested by user.
 *
 * Initially backends start in state SYNC_REP_NOT_WAITING and then
 * change that state to SYNC_REP_WAITING before adding ourselves
 * to the wait queue. During SyncRepWakeQueue() a WALSender changes
 * the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed.
 * This backend then resets its state to SYNC_REP_NOT_WAITING.
 */
void
SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
{
	char	   *new_status = NULL;
	const char *old_status;
	int			mode = SyncRepWaitMode;

	/*
	 * Fast exit if user has not requested sync replication, or there are no
	 * sync replication standby names defined. Note that those standbys don't
	 * need to be connected.
	 */
	if (!SyncRepRequested() || !SyncStandbysDefined())
		return;

	Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));
	Assert(WalSndCtl != NULL);

	LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
	Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING);

	/*
	 * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is not
	 * set.  See SyncRepUpdateSyncStandbysDefined.
	 *
	 * Also check that the standby hasn't already replied. Unlikely race
	 * condition but we'll be fetching that cache line anyway so its likely to
	 * be a low cost check.
	 */
	if (!WalSndCtl->sync_standbys_defined ||
		XLByteLE(XactCommitLSN, WalSndCtl->lsn[mode]))
	{
		LWLockRelease(SyncRepLock);
		return;
	}

	/*
	 * Set our waitLSN so WALSender will know when to wake us, and add
	 * ourselves to the queue.
	 */
	MyProc->waitLSN = XactCommitLSN;
	MyProc->syncRepState = SYNC_REP_WAITING;
	SyncRepQueueInsert(mode);
	Assert(SyncRepQueueIsOrderedByLSN(mode));
	LWLockRelease(SyncRepLock);

	/* Alter ps display to show waiting for sync rep. */
	if (update_process_title)
	{
		int			len;

		old_status = get_ps_display(&len);
		new_status = (char *) palloc(len + 32 + 1);
		memcpy(new_status, old_status, len);
		sprintf(new_status + len, " waiting for %X/%X",
				XactCommitLSN.xlogid, XactCommitLSN.xrecoff);
		set_ps_display(new_status, false);
		new_status[len] = '\0'; /* truncate off " waiting ..." */
	}

	/*
	 * Wait for specified LSN to be confirmed.
	 *
	 * Each proc has its own wait latch, so we perform a normal latch
	 * check/wait loop here.
	 */
	for (;;)
	{
		int			syncRepState;

		/* Must reset the latch before testing state. */
		ResetLatch(&MyProc->procLatch);

		/*
		 * Try checking the state without the lock first.  There's no
		 * guarantee that we'll read the most up-to-date value, so if it looks
		 * like we're still waiting, recheck while holding the lock.  But if
		 * it looks like we're done, we must really be done, because once
		 * walsender changes the state to SYNC_REP_WAIT_COMPLETE, it will
		 * never update it again, so we can't be seeing a stale value in that
		 * case.
		 *
		 * Note: on machines with weak memory ordering, the acquisition of the
		 * lock is essential to avoid race conditions: we cannot be sure the
		 * sender's state update has reached main memory until we acquire the
		 * lock.  We could get rid of this dance if SetLatch/ResetLatch
		 * contained memory barriers.
		 */
		syncRepState = MyProc->syncRepState;
		if (syncRepState == SYNC_REP_WAITING)
		{
			LWLockAcquire(SyncRepLock, LW_SHARED);
			syncRepState = MyProc->syncRepState;
			LWLockRelease(SyncRepLock);
		}
		if (syncRepState == SYNC_REP_WAIT_COMPLETE)
			break;

		/*
		 * If a wait for synchronous replication is pending, we can neither
		 * acknowledge the commit nor raise ERROR or FATAL.  The latter would
		 * lead the client to believe that that the transaction aborted, which
		 * is not true: it's already committed locally. The former is no good
		 * either: the client has requested synchronous replication, and is
		 * entitled to assume that an acknowledged commit is also replicated,
		 * which might not be true. So in this case we issue a WARNING (which
		 * some clients may be able to interpret) and shut off further output.
		 * We do NOT reset ProcDiePending, so that the process will die after
		 * the commit is cleaned up.
		 */
		if (ProcDiePending)
		{
			ereport(WARNING,
					(errcode(ERRCODE_ADMIN_SHUTDOWN),
					 errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
					 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
			whereToSendOutput = DestNone;
			SyncRepCancelWait();
			break;
		}

		/*
		 * It's unclear what to do if a query cancel interrupt arrives.  We
		 * can't actually abort at this point, but ignoring the interrupt
		 * altogether is not helpful, so we just terminate the wait with a
		 * suitable warning.
		 */
		if (QueryCancelPending)
		{
			QueryCancelPending = false;
			ereport(WARNING,
					(errmsg("canceling wait for synchronous replication due to user request"),
					 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
			SyncRepCancelWait();
			break;
		}

		/*
		 * If the postmaster dies, we'll probably never get an
		 * acknowledgement, because all the wal sender processes will exit. So
		 * just bail out.
		 */
		if (!PostmasterIsAlive())
		{
			ProcDiePending = true;
			whereToSendOutput = DestNone;
			SyncRepCancelWait();
			break;
		}

		/*
		 * Wait on latch.  Any condition that should wake us up will set the
		 * latch, so no need for timeout.
		 */
		WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1);
	}

	/*
	 * WalSender has checked our LSN and has removed us from queue. Clean up
	 * state and leave.  It's OK to reset these shared memory fields without
	 * holding SyncRepLock, because any walsenders will ignore us anyway when
	 * we're not on the queue.
	 */
	Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));
	MyProc->syncRepState = SYNC_REP_NOT_WAITING;
	MyProc->waitLSN.xlogid = 0;
	MyProc->waitLSN.xrecoff = 0;

	if (new_status)
	{
		/* Reset ps display */
		set_ps_display(new_status, false);
		pfree(new_status);
	}
}