static void
WalSendServer_ServiceShutdown(void)
{
	PG_TRY();
	{
		disableQDMirroring_ShutDown();
		if (disconnectMirrorQD_SendClose())
			elog(LOG,"Master mirror disconnected");
	}
	PG_CATCH();
	{
		/* 
		 * Report the error related to reading the primary's WAL
		 * to the server log 
		 */
	    if (!elog_demote(NOTICE))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}
		EmitErrorReport();
		FlushErrorState();
	}
	PG_END_TRY();
}
Beispiel #2
0
/*
 * Used by clients to send a request to a service.
 */
bool
ServiceClientSendRequest(ServiceClient *serviceClient, void* request, int requestLen)
{
	ServiceConfig *serviceConfig;
	char        *message;
	bool		result = false;
	DECLARE_SAVE_SUPPRESS_PANIC();

	Assert(serviceClient != NULL);
	Assert(request != NULL);

	PG_TRY();
	{
		SUPPRESS_PANIC();

		serviceConfig = serviceClient->serviceConfig;
		if (serviceConfig == NULL)
		{
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
						    errmsg("Not connected to '%s'",
						           serviceConfig->title)));
		}
		if (requestLen != serviceClient->serviceConfig->requestLen)
		{
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							 errmsg("Expecting request length %d and actual length is %d for '%s'",
				 					serviceClient->serviceConfig->requestLen, requestLen,
				 					serviceConfig->title)));
		}

		result = ServiceClientWrite(serviceClient, request, requestLen);
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();
		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;

}
static bool WalSendServer_ServiceRequest(ServiceCtrl *serviceCtrl, int sockfd, uint8 *request)
{
	WalSendRequest *walSendRequest = (WalSendRequest*)request;
	WalSendResponse walSendResponse;
	bool result = false;

	/* 
	 * Use a TRY block to catch unexpected errors that bubble up to this level
	 * and disable QD mirroring.
	 */
	PG_TRY();
	{
		if (Debug_print_qd_mirroring)
			elog(LOG, "request command %d = '%s'",
			     walSendRequest->command, 
			     WalSendRequestCommandToString(walSendRequest->command));

		WalSendServerDoRequest(walSendRequest);

		/*
		 * Currently, all requests need a response.
		 */
		walSendResponse.ok = true;

		result = ServiceProcessRespond(serviceCtrl, sockfd, (uint8*)&walSendResponse, sizeof(walSendResponse));
	}
	PG_CATCH();
	{
		/* 
		 * Report the unexpected error.
		 */
		EmitErrorReport();
		FlushErrorState();

		disableQDMirroring_UnexpectedError(
			"An unexpected error encountered.  Please report this problem to Greenplum");

		result = false;
	}
	PG_END_TRY();

	return result;
}
Beispiel #4
0
static void *
exec_func(void *arg)
{
	unsigned long rtn = (unsigned long)WD_NG;
	MemoryContext oldContext =  CurrentMemoryContext;
	WdThreadInfo* thread_arg = (WdThreadInfo*) arg;
	Assert(thread_arg != NULL);
	
	PG_TRY();
	{
		rtn = thread_arg->start_routine(thread_arg->arg);
	}
	PG_CATCH();
	{
		/* ignore the error message */
		EmitErrorReport();
		MemoryContextSwitchTo(oldContext);
		FlushErrorState();
	}
	PG_END_TRY();
	return rtn;
}
Beispiel #5
0
pid_t
wd_child(int fork_wait_time)
{
	int sock;
	volatile int fd;
	int rtn;
	pid_t pid = 0;
	sigjmp_buf	local_sigjmp_buf;

	pid = fork();
	if (pid != 0)
	{
		if (pid == -1)
			ereport(PANIC,
					(errmsg("failed to fork a watchdog process")));

		return pid;
	}

	on_exit_reset();
	processType = PT_WATCHDOG;

	if (fork_wait_time > 0)
	{
		sleep(fork_wait_time);
	}

	POOL_SETMASK(&UnBlockSig);

	signal(SIGTERM, wd_child_exit);
	signal(SIGINT, wd_child_exit);
	signal(SIGQUIT, wd_child_exit);
	signal(SIGCHLD, SIG_IGN);
	signal(SIGHUP, SIG_IGN);
	signal(SIGUSR1, SIG_IGN);
	signal(SIGUSR2, SIG_IGN);
	signal(SIGPIPE, SIG_IGN);
	signal(SIGALRM, SIG_IGN);

	init_ps_display("", "", "", "");

	if (WD_List == NULL)
	{
		/* memory allocate is not ready */
		wd_child_exit(15);
	}
	/* Create per loop iteration memory context */
	ProcessLoopContext = AllocSetContextCreate(TopMemoryContext,
											   "wd_child_main_loop",
											   ALLOCSET_DEFAULT_MINSIZE,
											   ALLOCSET_DEFAULT_INITSIZE,
											   ALLOCSET_DEFAULT_MAXSIZE);
	
	MemoryContextSwitchTo(TopMemoryContext);


	sock = wd_create_recv_socket(WD_MYSELF->wd_port);

	if (sock < 0)
	{
		/* socket create failed */
		wd_child_exit(15);
	}

	set_ps_display("watchdog", false);

	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		if(fd > 0)
			close(fd);

		error_context_stack = NULL;
		
		EmitErrorReport();
		MemoryContextSwitchTo(TopMemoryContext);
		FlushErrorState();
	}
	
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/* child loop */
	for(;;)
	{
		MemoryContextSwitchTo(ProcessLoopContext);
		MemoryContextResetAndDeleteChildren(ProcessLoopContext);
		fd = -1;
		WdPacket buf;

		fd = wd_accept(sock);
		if (fd < 0)
		{
			continue;
		}
		rtn = wd_recv_packet(fd, &buf);
		if (rtn == WD_OK)
		{
			wd_send_response(fd, &buf);
		}
		close(fd);
	}
	return pid;
}
Beispiel #6
0
/* 
 * main entry pont of pcp worker child process
 */
void
pcp_worker_main(int port)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext PCPMemoryContext;
	int authenticated = 0;

	char salt[4];
	int random_salt = 0;
	struct timeval uptime;
	char tos;
	int rsize;
	char *buf = NULL;

	ereport(DEBUG1,
			(errmsg("I am PCP worker child with pid:%d",getpid())));

	/* Identify myself via ps */
	init_ps_display("", "", "", "");

	gettimeofday(&uptime, NULL);
	srandom((unsigned int) (getpid() ^ uptime.tv_usec));

	/* set up signal handlers */
	signal(SIGTERM, die);
	signal(SIGINT, die);
	signal(SIGQUIT, die);
	signal(SIGCHLD, SIG_DFL);
	signal(SIGUSR2, wakeup_handler_child);
	signal(SIGUSR1, SIG_IGN);
	signal(SIGHUP, SIG_IGN);
	signal(SIGPIPE, SIG_IGN);
	signal(SIGALRM, SIG_IGN);
	/* Create per loop iteration memory context */
	PCPMemoryContext = AllocSetContextCreate(TopMemoryContext,
											 "PCP_worker_main_loop",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);

	MemoryContextSwitchTo(TopMemoryContext);

	/*
	 * install the call back for preparation of pcp worker child exit
	 */
	on_system_exit(pcp_worker_will_go_down, (Datum)NULL);

	/* Initialize my backend status */
	pool_initialize_private_backend_status();
	
	/* Initialize process context */
	pool_init_process_context();

	pcp_frontend = pcp_open(port);
	unset_nonblock(pcp_frontend->fd);

	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		error_context_stack = NULL;
		EmitErrorReport();

		MemoryContextSwitchTo(TopMemoryContext);
		FlushErrorState();
	}
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;
	
	for(;;)
	{
		MemoryContextSwitchTo(PCPMemoryContext);
		MemoryContextResetAndDeleteChildren(PCPMemoryContext);

		errno = 0;

		/* read a PCP packet */
		do_pcp_read(pcp_frontend, &tos, 1);
		do_pcp_read(pcp_frontend, &rsize, sizeof(int));

		rsize = ntohl(rsize);
		if ((rsize - sizeof(int)) > 0)
		{
			buf = (char *)palloc(rsize - sizeof(int));
			do_pcp_read(pcp_frontend, buf, rsize - sizeof(int));
		}

		ereport(DEBUG1,
			(errmsg("received PCP packet"),
				 errdetail("PCP packet type of service '%c'", tos)));

		if (tos == 'R') /* authentication */
		{
			set_ps_display("PCP: processing authentication", false);
			process_authentication(pcp_frontend, buf,salt, &random_salt);
			authenticated = 1;
			continue;
		}
		if (tos == 'M') /* md5 salt */
		{
			set_ps_display("PCP: processing authentication", false);
			send_md5salt(pcp_frontend, salt);
			random_salt = 1;
			continue;
		}
		/* is this connection authenticated? if not disconnect immediately*/
		if (!authenticated)
			ereport(FATAL,
				(errmsg("authentication failed for new PCP connection"),
					 errdetail("connection not authorized")));

		/* process a request */
		pcp_process_command(tos, buf, rsize);
	}
	exit(0);
}
Beispiel #7
0
/*
 * ContinuousQueryWorkerStartup
 *
 * Launches a CQ worker, which continuously generates partial query results to send
 * back to the combiner process.
 */
void
ContinuousQueryWorkerRun(Portal portal, ContinuousViewState *state, QueryDesc *queryDesc, ResourceOwner owner)
{
	EState	   *estate = NULL;
	DestReceiver *dest;
	CmdType		operation;
	MemoryContext oldcontext;
	int timeoutms = state->maxwaitms;
	MemoryContext runcontext;
	CQProcEntry *entry = GetCQProcEntry(MyCQId);
	ResourceOwner cqowner = ResourceOwnerCreate(NULL, "CQResourceOwner");
	bool savereadonly = XactReadOnly;

	cq_stat_initialize(state->viewid, MyProcPid);

	dest = CreateDestReceiver(DestCombiner);
	SetCombinerDestReceiverParams(dest, MyCQId);

	/* workers only need read-only transactions */
	XactReadOnly = true;

	runcontext = AllocSetContextCreate(TopMemoryContext, "CQRunContext",
			ALLOCSET_DEFAULT_MINSIZE,
			ALLOCSET_DEFAULT_INITSIZE,
			ALLOCSET_DEFAULT_MAXSIZE);

	elog(LOG, "\"%s\" worker %d running", queryDesc->plannedstmt->cq_target->relname, MyProcPid);
	MarkWorkerAsRunning(MyCQId, MyWorkerId);
	pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText);

	TupleBufferInitLatch(WorkerTupleBuffer, MyCQId, MyWorkerId, &MyProc->procLatch);

	oldcontext = MemoryContextSwitchTo(runcontext);

retry:
	PG_TRY();
	{
		bool xact_commit = true;
		TimestampTz last_process = GetCurrentTimestamp();
		TimestampTz last_commit = GetCurrentTimestamp();

		start_executor(queryDesc, runcontext, cqowner);

		CurrentResourceOwner = cqowner;

		estate = queryDesc->estate;
		operation = queryDesc->operation;

		/*
		 * Initialize context that lives for the duration of a single iteration
		 * of the main worker loop
		 */
		CQExecutionContext = AllocSetContextCreate(estate->es_query_cxt, "CQExecutionContext",
				ALLOCSET_DEFAULT_MINSIZE,
				ALLOCSET_DEFAULT_INITSIZE,
				ALLOCSET_DEFAULT_MAXSIZE);

		estate->es_lastoid = InvalidOid;

		/*
		 * Startup combiner receiver
		 */
		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);

		for (;;)
		{
			if (!TupleBufferHasUnreadSlots())
			{
				if (TimestampDifferenceExceeds(last_process, GetCurrentTimestamp(), state->emptysleepms))
				{
					/* force stats flush */
					cq_stat_report(true);

					pgstat_report_activity(STATE_IDLE, queryDesc->sourceText);
					TupleBufferWait(WorkerTupleBuffer, MyCQId, MyWorkerId);
					pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText);
				}
				else
					pg_usleep(Min(WAIT_SLEEP_MS, state->emptysleepms) * 1000);
			}

			TupleBufferResetNotify(WorkerTupleBuffer, MyCQId, MyWorkerId);

			if (xact_commit)
				StartTransactionCommand();

			set_snapshot(estate, cqowner);
			CurrentResourceOwner = cqowner;
			MemoryContextSwitchTo(estate->es_query_cxt);

			estate->es_processed = 0;
			estate->es_filtered = 0;

			/*
			 * Run plan on a microbatch
			 */
			ExecutePlan(estate, queryDesc->planstate, operation,
					true, 0, timeoutms, ForwardScanDirection, dest);

			IncrementCQExecutions(1);
			TupleBufferClearPinnedSlots();

			if (state->long_xact)
			{
				if (TimestampDifferenceExceeds(last_commit, GetCurrentTimestamp(), LONG_RUNNING_XACT_DURATION))
					xact_commit = true;
				else
					xact_commit = false;
			}

			unset_snapshot(estate, cqowner);
			if (xact_commit)
			{
				CommitTransactionCommand();
				last_commit = GetCurrentTimestamp();
			}

			MemoryContextResetAndDeleteChildren(CQExecutionContext);
			MemoryContextSwitchTo(runcontext);
			CurrentResourceOwner = cqowner;

			if (estate->es_processed || estate->es_filtered)
			{
				/*
				 * If the CV query is such that the select does not return any tuples
				 * ex: select id where id=99; and id=99 does not exist, then this reset
				 * will fail. What will happen is that the worker will block at the latch for every
				 * allocated slot, TILL a cv returns a non-zero tuple, at which point
				 * the worker will resume a simple sleep for the threshold time.
				 */
				last_process = GetCurrentTimestamp();

				/*
				 * Send stats to the collector
				 */
				cq_stat_report(false);
			}

			/* Has the CQ been deactivated? */
			if (!entry->active)
			{
				if (ActiveSnapshotSet())
					unset_snapshot(estate, cqowner);
				if (IsTransactionState())
					CommitTransactionCommand();
				break;
			}
		}

		CurrentResourceOwner = cqowner;

		/*
		 * The cleanup functions below expect these things to be registered
		 */
		RegisterSnapshotOnOwner(estate->es_snapshot, cqowner);
		RegisterSnapshotOnOwner(queryDesc->snapshot, cqowner);
		RegisterSnapshotOnOwner(queryDesc->crosscheck_snapshot, cqowner);

		/* cleanup */
		ExecutorFinish(queryDesc);
		ExecutorEnd(queryDesc);
		FreeQueryDesc(queryDesc);
	}
	PG_CATCH();
	{
		EmitErrorReport();
		FlushErrorState();

		/* Since the worker is read-only, we can simply commit the transaction. */
		if (ActiveSnapshotSet())
			unset_snapshot(estate, cqowner);
		if (IsTransactionState())
			CommitTransactionCommand();

		TupleBufferUnpinAllPinnedSlots();
		TupleBufferClearReaders();

		/* This resets the es_query_ctx and in turn the CQExecutionContext */
		MemoryContextResetAndDeleteChildren(runcontext);

		IncrementCQErrors(1);

		if (continuous_query_crash_recovery)
			goto retry;
	}
	PG_END_TRY();

	(*dest->rShutdown) (dest);

	MemoryContextSwitchTo(oldcontext);
	MemoryContextDelete(runcontext);

	XactReadOnly = savereadonly;

	/*
	 * Remove proc-level stats
	 */
	cq_stat_report(true);
	cq_stat_send_purge(state->viewid, MyProcPid, CQ_STAT_WORKER);

	CurrentResourceOwner = owner;
}
Beispiel #8
0
static bool
ServiceDoConnect(ServiceConfig *serviceConfig, int listenerPort, ServiceClient *serviceClient, bool complain)
{
	int  n;
	struct sockaddr_in addr;
	int saved_err;
	char        *message;
	bool		result = false;
	DECLARE_SAVE_SUPPRESS_PANIC();

	PG_TRY();
	{
		SUPPRESS_PANIC();

		for (;;)
		{
			/*
			 * Open a connection to the service.
			 */
			serviceClient->sockfd = socket(AF_INET, SOCK_STREAM, 0);

			addr.sin_family = AF_INET;
			addr.sin_port = htons(listenerPort);
			addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);

			if ((n = connect(serviceClient->sockfd, (struct sockaddr *)&addr, sizeof(addr))) < 0)
			{
				saved_err = errno;

				close(serviceClient->sockfd);
				serviceClient->sockfd = -1;

				if (errno == EINTR)
					continue;

				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								errmsg("Could not connect to '%s': %s",
									   serviceConfig->title,
									   strerror(saved_err))));
			}
			else
			{
				//success. we're done here!
				break;
			}
		}

		/* make socket non-blocking BEFORE we connect. */
		if (!pg_set_noblock(serviceClient->sockfd))
		{
			saved_err = errno;

			close(serviceClient->sockfd);
			serviceClient->sockfd = -1;
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							errmsg("Could not set '%s' socket to non-blocking mode: %s",
								   serviceConfig->title,
								   strerror(saved_err))));
		}

		result = true;
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();

		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		if (complain)
			EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;
}
Beispiel #9
0
/*
 * Main entry point for checkpointer process
 *
 * This is invoked from AuxiliaryProcessMain, which has already created the
 * basic execution environment, but not enabled signals yet.
 */
void
CheckpointerMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext checkpointer_context;

	CheckpointerShmem->checkpointer_pid = MyProcPid;

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * Note: we deliberately ignore SIGTERM, because during a standard Unix
	 * system shutdown cycle, init will SIGTERM all processes at once.  We
	 * want to wait for the backends to exit, whereupon the postmaster will
	 * tell us it's okay to shut down (via SIGUSR2).
	 */
	pqsignal(SIGHUP, ChkptSigHupHandler);		/* set flag to read config
												 * file */
	pqsignal(SIGINT, ReqCheckpointHandler);		/* request checkpoint */
	pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
	pqsignal(SIGQUIT, chkpt_quickdie);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, chkpt_sigusr1_handler);
	pqsignal(SIGUSR2, ReqShutdownHandler);		/* request shutdown */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Initialize so that first time-driven event happens at the correct time.
	 */
	last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	checkpointer_context = AllocSetContextCreate(TopMemoryContext,
												 "Checkpointer",
												 ALLOCSET_DEFAULT_SIZES);
	MemoryContextSwitchTo(checkpointer_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().  We don't have very many resources to worry
		 * about in checkpointer, but we do have LWLocks, buffers, and temp
		 * files.
		 */
		LWLockReleaseAll();
		ConditionVariableCancelSleep();
		pgstat_report_wait_end();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_SMgr();
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/* Warn any waiting backends that the checkpoint failed. */
		if (ckpt_active)
		{
			SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
			CheckpointerShmem->ckpt_failed++;
			CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
			SpinLockRelease(&CheckpointerShmem->ckpt_lck);

			ckpt_active = false;
		}

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(checkpointer_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(checkpointer_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Ensure all shared memory values are set correctly for the config. Doing
	 * this here ensures no race conditions from other concurrent updaters.
	 */
	UpdateSharedMemoryConfig();

	/*
	 * Advertise our latch that backends can use to wake us up while we're
	 * sleeping.
	 */
	ProcGlobal->checkpointerLatch = &MyProc->procLatch;

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		do_checkpoint = false;
		int			flags = 0;
		pg_time_t	now;
		int			elapsed_secs;
		int			cur_timeout;
		int			rc;

		/* Clear any already-pending wakeups */
		ResetLatch(MyLatch);

		/*
		 * Process any requests or signals received recently.
		 */
		AbsorbFsyncRequests();

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);

			/*
			 * Checkpointer is the last process to shut down, so we ask it to
			 * hold the keys for a range of other tasks required most of which
			 * have nothing to do with checkpointing at all.
			 *
			 * For various reasons, some config values can change dynamically
			 * so the primary copy of them is held in shared memory to make
			 * sure all backends see the same value.  We make Checkpointer
			 * responsible for updating the shared memory copy if the
			 * parameter setting changes because of SIGHUP.
			 */
			UpdateSharedMemoryConfig();
		}
		if (checkpoint_requested)
		{
			checkpoint_requested = false;
			do_checkpoint = true;
			BgWriterStats.m_requested_checkpoints++;
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Close down the database */
			ShutdownXLOG(0, 0);
			/* Normal exit from the checkpointer is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Force a checkpoint if too much time has elapsed since the last one.
		 * Note that we count a timed checkpoint in stats only when this
		 * occurs without an external request, but we set the CAUSE_TIME flag
		 * bit even if there is also an external request.
		 */
		now = (pg_time_t) time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
		{
			if (!do_checkpoint)
				BgWriterStats.m_timed_checkpoints++;
			do_checkpoint = true;
			flags |= CHECKPOINT_CAUSE_TIME;
		}

		/*
		 * Do a checkpoint if requested.
		 */
		if (do_checkpoint)
		{
			bool		ckpt_performed = false;
			bool		do_restartpoint;

			/*
			 * Check if we should perform a checkpoint or a restartpoint. As a
			 * side-effect, RecoveryInProgress() initializes TimeLineID if
			 * it's not set yet.
			 */
			do_restartpoint = RecoveryInProgress();

			/*
			 * Atomically fetch the request flags to figure out what kind of a
			 * checkpoint we should perform, and increase the started-counter
			 * to acknowledge that we've started a new checkpoint.
			 */
			SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
			flags |= CheckpointerShmem->ckpt_flags;
			CheckpointerShmem->ckpt_flags = 0;
			CheckpointerShmem->ckpt_started++;
			SpinLockRelease(&CheckpointerShmem->ckpt_lck);

			/*
			 * The end-of-recovery checkpoint is a real checkpoint that's
			 * performed while we're still in recovery.
			 */
			if (flags & CHECKPOINT_END_OF_RECOVERY)
				do_restartpoint = false;

			/*
			 * We will warn if (a) too soon since last checkpoint (whatever
			 * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
			 * since the last checkpoint start.  Note in particular that this
			 * implementation will not generate warnings caused by
			 * CheckPointTimeout < CheckPointWarning.
			 */
			if (!do_restartpoint &&
				(flags & CHECKPOINT_CAUSE_XLOG) &&
				elapsed_secs < CheckPointWarning)
				ereport(LOG,
						(errmsg_plural("checkpoints are occurring too frequently (%d second apart)",
				"checkpoints are occurring too frequently (%d seconds apart)",
									   elapsed_secs,
									   elapsed_secs),
						 errhint("Consider increasing the configuration parameter \"max_wal_size\".")));

			/*
			 * Initialize checkpointer-private variables used during
			 * checkpoint.
			 */
			ckpt_active = true;
			if (do_restartpoint)
				ckpt_start_recptr = GetXLogReplayRecPtr(NULL);
			else
				ckpt_start_recptr = GetInsertRecPtr();
			ckpt_start_time = now;
			ckpt_cached_elapsed = 0;

			/*
			 * Do the checkpoint.
			 */
			if (!do_restartpoint)
			{
				CreateCheckPoint(flags);
				ckpt_performed = true;
			}
			else
				ckpt_performed = CreateRestartPoint(flags);

			/*
			 * After any checkpoint, close all smgr files.  This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();

			/*
			 * Indicate checkpoint completion to any waiting backends.
			 */
			SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
			CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
			SpinLockRelease(&CheckpointerShmem->ckpt_lck);

			if (ckpt_performed)
			{
				/*
				 * Note we record the checkpoint start time not end time as
				 * last_checkpoint_time.  This is so that time-driven
				 * checkpoints happen at a predictable spacing.
				 */
				last_checkpoint_time = now;
			}
			else
			{
				/*
				 * We were not able to perform the restartpoint (checkpoints
				 * throw an ERROR in case of error).  Most likely because we
				 * have not received any new checkpoint WAL records since the
				 * last restartpoint. Try again in 15 s.
				 */
				last_checkpoint_time = now - CheckPointTimeout + 15;
			}

			ckpt_active = false;
		}

		/* Check for archive_timeout and switch xlog files if necessary. */
		CheckArchiveTimeout();

		/*
		 * Send off activity statistics to the stats collector.  (The reason
		 * why we re-use bgwriter-related code for this is that the bgwriter
		 * and checkpointer used to be just one process.  It's probably not
		 * worth the trouble to split the stats support into two independent
		 * stats message types.)
		 */
		pgstat_send_bgwriter();

		/*
		 * Sleep until we are signaled or it's time for another checkpoint or
		 * xlog file switch.
		 */
		now = (pg_time_t) time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
			continue;			/* no sleep for us ... */
		cur_timeout = CheckPointTimeout - elapsed_secs;
		if (XLogArchiveTimeout > 0 && !RecoveryInProgress())
		{
			elapsed_secs = now - last_xlog_switch_time;
			if (elapsed_secs >= XLogArchiveTimeout)
				continue;		/* no sleep for us ... */
			cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs);
		}

		rc = WaitLatch(MyLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   cur_timeout * 1000L /* convert to ms */,
					   WAIT_EVENT_CHECKPOINTER_MAIN);

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (rc & WL_POSTMASTER_DEATH)
			exit(1);
	}
}
Beispiel #10
0
/*
 * kafka_consume_main
 *
 * Main function for Kafka consumers running as background workers
 */
void
kafka_consume_main(Datum arg)
{
	char err_msg[512];
	rd_kafka_topic_conf_t *topic_conf;
	rd_kafka_t *kafka;
	rd_kafka_topic_t *topic;
	rd_kafka_message_t **messages;
	const struct rd_kafka_metadata *meta;
	struct rd_kafka_metadata_topic topic_meta;
	rd_kafka_resp_err_t err;
	bool found;
	Oid id = (Oid) arg;
	ListCell *lc;
	KafkaConsumerProc *proc = hash_search(consumer_procs, &id, HASH_FIND, &found);
	KafkaConsumer consumer;
	CopyStmt *copy;
	int valid_brokers = 0;
	int i;
	int my_partitions = 0;

	if (!found)
		elog(ERROR, "kafka consumer %d not found", id);

	pqsignal(SIGTERM, kafka_consume_main_sigterm);
#define BACKTRACE_SEGFAULTS
#ifdef BACKTRACE_SEGFAULTS
	pqsignal(SIGSEGV, debug_segfault);
#endif

	/* we're now ready to receive signals */
	BackgroundWorkerUnblockSignals();

	/* give this proc access to the database */
	BackgroundWorkerInitializeConnection(NameStr(proc->dbname), NULL);

	/* load saved consumer state */
	StartTransactionCommand();
	load_consumer_state(proc->consumer_id, &consumer);
	copy = get_copy_statement(&consumer);

	topic_conf = rd_kafka_topic_conf_new();
	kafka = rd_kafka_new(RD_KAFKA_CONSUMER, NULL, err_msg, sizeof(err_msg));
	rd_kafka_set_logger(kafka, logger);

	/*
	 * Add all brokers currently in pipeline_kafka_brokers
	 */
	if (consumer.brokers == NIL)
		elog(ERROR, "no valid brokers were found");

	foreach(lc, consumer.brokers)
		valid_brokers += rd_kafka_brokers_add(kafka, lfirst(lc));

	if (!valid_brokers)
		elog(ERROR, "no valid brokers were found");

	/*
	 * Set up our topic to read from
	 */
	topic = rd_kafka_topic_new(kafka, consumer.topic, topic_conf);
	err = rd_kafka_metadata(kafka, false, topic, &meta, CONSUMER_TIMEOUT);

	if (err != RD_KAFKA_RESP_ERR_NO_ERROR)
		elog(ERROR, "failed to acquire metadata: %s", rd_kafka_err2str(err));

	Assert(meta->topic_cnt == 1);
	topic_meta = meta->topics[0];

	load_consumer_offsets(&consumer, &topic_meta, proc->offset);
	CommitTransactionCommand();

	/*
	* Begin consuming all partitions that this process is responsible for
	*/
	for (i = 0; i < topic_meta.partition_cnt; i++)
	{
		int partition = topic_meta.partitions[i].id;

		Assert(partition <= consumer.num_partitions);
		if (partition % consumer.parallelism != proc->partition_group)
			continue;

		elog(LOG, "[kafka consumer] %s <- %s consuming partition %d from offset %ld",
				consumer.rel->relname, consumer.topic, partition, consumer.offsets[partition]);

		if (rd_kafka_consume_start(topic, partition, consumer.offsets[partition]) == -1)
			elog(ERROR, "failed to start consuming: %s", rd_kafka_err2str(rd_kafka_errno2err(errno)));

		my_partitions++;
	}

	/*
	* No point doing anything if we don't have any partitions assigned to us
	*/
	if (my_partitions == 0)
	{
		elog(LOG, "[kafka consumer] %s <- %s consumer %d doesn't have any partitions to read from",
				consumer.rel->relname, consumer.topic, MyProcPid);
		goto done;
	}

	messages = palloc0(sizeof(rd_kafka_message_t) * consumer.batch_size);

	/*
	 * Consume messages until we are terminated
	 */
	while (!got_sigterm)
	{
		ssize_t num_consumed;
		int i;
		int messages_buffered = 0;
		int partition;
		StringInfoData buf;
		bool xact = false;

		for (partition = 0; partition < consumer.num_partitions; partition++)
		{
			if (partition % consumer.parallelism != proc->partition_group)
				continue;

			num_consumed = rd_kafka_consume_batch(topic, partition,
					CONSUMER_TIMEOUT, messages, consumer.batch_size);

			if (num_consumed <= 0)
				continue;

			if (!xact)
			{
				StartTransactionCommand();
				xact = true;
			}

			initStringInfo(&buf);
			for (i = 0; i < num_consumed; i++)
			{
				if (messages[i]->payload != NULL)
				{
					appendBinaryStringInfo(&buf, messages[i]->payload, messages[i]->len);
					if (buf.len > 0 && buf.data[buf.len - 1] != '\n')
						appendStringInfoChar(&buf, '\n');
					messages_buffered++;
				}
				consumer.offsets[partition] = messages[i]->offset;
				rd_kafka_message_destroy(messages[i]);
			}
		}

		if (!xact)
		{
			pg_usleep(1 * 1000);
			continue;
		}

		/* we don't want to die in the event of any errors */
		PG_TRY();
		{
			if (messages_buffered)
				execute_copy(copy, &buf);
		}
		PG_CATCH();
		{
			elog(LOG, "[kafka consumer] %s <- %s failed to process batch, dropped %d message%s:",
					consumer.rel->relname, consumer.topic, (int) num_consumed, (num_consumed == 1 ? "" : "s"));
			EmitErrorReport();
			FlushErrorState();

			AbortCurrentTransaction();
			xact = false;
		}
		PG_END_TRY();

		if (!xact)
			StartTransactionCommand();

		if (messages_buffered)
			save_consumer_state(&consumer, proc->partition_group);

		CommitTransactionCommand();
	}

done:

	hash_search(consumer_procs, &id, HASH_REMOVE, NULL);

	rd_kafka_topic_destroy(topic);
	rd_kafka_destroy(kafka);
	rd_kafka_wait_destroyed(CONSUMER_TIMEOUT);
}
Beispiel #11
0
/*
 * Main entry point for walwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
WalWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext walwriter_context;

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(walwriter probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * We have no particular use for SIGINT at the moment, but seems
	 * reasonable to treat like SIGTERM.
	 */
	pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
	pqsignal(SIGINT, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGTERM, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGQUIT, wal_quickdie);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
	pqsignal(SIGUSR2, SIG_IGN); /* not used */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Create a resource owner to keep track of our resources (not clear that
	 * we need this, but may as well have one).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	walwriter_context = AllocSetContextCreate(TopMemoryContext,
											  "Wal Writer",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(walwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * This code is heavily based on bgwriter.c, q.v.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().	We don't have very many resources to worry
		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(walwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(walwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Loop forever
	 */
	for (;;)
	{
		long		udelay;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Process any requests or signals received recently.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (shutdown_requested)
		{
			/* Normal exit from the walwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Do what we're here for...
		 */
		XLogBackgroundFlush();

		/*
		 * Delay until time to do something more, but fall out of delay
		 * reasonably quickly if signaled.
		 */
		udelay = WalWriterDelay * 1000L;
		while (udelay > 999999L)
		{
			if (got_SIGHUP || shutdown_requested)
				break;
			pg_usleep(1000000L);
			udelay -= 1000000L;
		}
		if (!(got_SIGHUP || shutdown_requested))
			pg_usleep(udelay);
	}
}
Beispiel #12
0
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
    sigjmp_buf	local_sigjmp_buf;
    MemoryContext bgwriter_context;

    am_bg_writer = true;

    /*
     * If possible, make this process a group leader, so that the postmaster
     * can signal any child processes too.	(bgwriter probably never has any
     * child processes, but for consistency we make all postmaster child
     * processes do this.)
     */
#ifdef HAVE_SETSID
    if (setsid() < 0)
        elog(FATAL, "setsid() failed: %m");
#endif

    /*
     * Properly accept or ignore signals the postmaster might send us
     *
     * SIGUSR1 is presently unused; keep it spare in case someday we want this
     * process to participate in ProcSignal signalling.
     */
    pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
    pqsignal(SIGINT, SIG_IGN);			/* as of 9.2 no longer requests checkpoint */
    pqsignal(SIGTERM, ReqShutdownHandler); 	/* shutdown */
    pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
    pqsignal(SIGALRM, SIG_IGN);
    pqsignal(SIGPIPE, SIG_IGN);
    pqsignal(SIGUSR1, SIG_IGN);			/* reserve for ProcSignal */
    pqsignal(SIGUSR2, SIG_IGN);

    /*
     * Reset some signals that are accepted by postmaster but not here
     */
    pqsignal(SIGCHLD, SIG_DFL);
    pqsignal(SIGTTIN, SIG_DFL);
    pqsignal(SIGTTOU, SIG_DFL);
    pqsignal(SIGCONT, SIG_DFL);
    pqsignal(SIGWINCH, SIG_DFL);

    /* We allow SIGQUIT (quickdie) at all times */
    sigdelset(&BlockSig, SIGQUIT);

    /*
     * Create a resource owner to keep track of our resources (currently only
     * buffer pins).
     */
    CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

    /*
     * Create a memory context that we will do all our work in.  We do this so
     * that we can reset the context during error recovery and thereby avoid
     * possible memory leaks.  Formerly this code just ran in
     * TopMemoryContext, but resetting that would be a really bad idea.
     */
    bgwriter_context = AllocSetContextCreate(TopMemoryContext,
                       "Background Writer",
                       ALLOCSET_DEFAULT_MINSIZE,
                       ALLOCSET_DEFAULT_INITSIZE,
                       ALLOCSET_DEFAULT_MAXSIZE);
    MemoryContextSwitchTo(bgwriter_context);

    /*
     * If an exception is encountered, processing resumes here.
     *
     * See notes in postgres.c about the design of this coding.
     */
    if (sigsetjmp(local_sigjmp_buf, 1) != 0)
    {
        /* Since not using PG_TRY, must reset error stack by hand */
        error_context_stack = NULL;

        /* Prevent interrupts while cleaning up */
        HOLD_INTERRUPTS();

        /* Report the error to the server log */
        EmitErrorReport();

        /*
         * These operations are really just a minimal subset of
         * AbortTransaction().	We don't have very many resources to worry
         * about in bgwriter, but we do have LWLocks, buffers, and temp files.
         */
        LWLockReleaseAll();
        AbortBufferIO();
        UnlockBuffers();
        /* buffer pins are released here: */
        ResourceOwnerRelease(CurrentResourceOwner,
                             RESOURCE_RELEASE_BEFORE_LOCKS,
                             false, true);
        /* we needn't bother with the other ResourceOwnerRelease phases */
        AtEOXact_Buffers(false);
        AtEOXact_Files();
        AtEOXact_HashTables(false);

        /*
         * Now return to normal top-level context and clear ErrorContext for
         * next time.
         */
        MemoryContextSwitchTo(bgwriter_context);
        FlushErrorState();

        /* Flush any leaked data in the top-level context */
        MemoryContextResetAndDeleteChildren(bgwriter_context);

        /* Now we can allow interrupts again */
        RESUME_INTERRUPTS();

        /*
         * Sleep at least 1 second after any error.  A write error is likely
         * to be repeated, and we don't want to be filling the error logs as
         * fast as we can.
         */
        pg_usleep(1000000L);

        /*
         * Close all open files after any error.  This is helpful on Windows,
         * where holding deleted files open causes various strange errors.
         * It's not clear we need it elsewhere, but shouldn't hurt.
         */
        smgrcloseall();
    }

    /* We can now handle ereport(ERROR) */
    PG_exception_stack = &local_sigjmp_buf;

    /*
     * Unblock signals (they were blocked when the postmaster forked us)
     */
    PG_SETMASK(&UnBlockSig);

    /*
     * Use the recovery target timeline ID during recovery
     */
    if (RecoveryInProgress())
        ThisTimeLineID = GetRecoveryTargetTLI();

    /*
     * Loop forever
     */
    for (;;)
    {
        /*
         * Emergency bailout if postmaster has died.  This is to avoid the
         * necessity for manual cleanup of all postmaster children.
         */
        if (!PostmasterIsAlive())
            exit(1);

        if (got_SIGHUP)
        {
            got_SIGHUP = false;
            ProcessConfigFile(PGC_SIGHUP);
            /* update global shmem state for sync rep */
        }
        if (shutdown_requested)
        {
            /*
             * From here on, elog(ERROR) should end with exit(1), not send
             * control back to the sigsetjmp block above
             */
            ExitOnAnyError = true;
            /* Normal exit from the bgwriter is here */
            proc_exit(0);		/* done */
        }

        /*
         * Do one cycle of dirty-buffer writing.
         */
        BgBufferSync();

        /* Nap for the configured time. */
        BgWriterNap();
    }
}
Beispiel #13
0
void
ContQuerySchedulerMain(int argc, char *argv[])
{
	sigjmp_buf local_sigjmp_buf;
	List *dbs = NIL;

	/* we are a postmaster subprocess now */
	IsUnderPostmaster = true;
	am_cont_scheduler = true;

	/* reset MyProcPid */
	MyProcPid = getpid();
	MyPMChildSlot = AssignPostmasterChildSlot();

	/* record Start Time for logging */
	MyStartTime = time(NULL);

	/* Identify myself via ps */
	init_ps_display("continuous query scheduler process", "", "", "");

	ereport(LOG, (errmsg("continuous query scheduler started")));

	if (PostAuthDelay)
		pg_usleep(PostAuthDelay * 1000000L);

	SetProcessingMode(InitProcessing);

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too. This is only for consistency sake, we
	 * never fork the scheduler process. Instead dynamic bgworkers are used.
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Set up signal handlers.  We operate on databases much like a regular
	 * backend, so we use the same signal handling.  See equivalent code in
	 * tcop/postgres.c.
	 */
	pqsignal(SIGHUP, sighup_handler);
	pqsignal(SIGINT, sigint_handler);
	pqsignal(SIGTERM, sigterm_handler);

	pqsignal(SIGQUIT, quickdie);
	InitializeTimeouts(); /* establishes SIGALRM handler */

	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
	pqsignal(SIGUSR2, sigusr2_handler);
	pqsignal(SIGFPE, FloatExceptionHandler);
	pqsignal(SIGCHLD, SIG_DFL);

#define BACKTRACE_SEGFAULTS
#ifdef BACKTRACE_SEGFAULTS
	pqsignal(SIGSEGV, debug_segfault);
#endif

	/* Early initialization */
	BaseInit();

	/*
	 * Create a per-backend PGPROC struct in shared memory, except in the
	 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
	 * this before we can use LWLocks (and in the EXEC_BACKEND case we already
	 * had to do some stuff with LWLocks).
	 */
#ifndef EXEC_BACKEND
	InitProcess();
#endif

	InitPostgres(NULL, InvalidOid, NULL, NULL);

	SetProcessingMode(NormalProcessing);

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.
	 */
	ContQuerySchedulerMemCxt = AllocSetContextCreate(TopMemoryContext,
			"ContQuerySchedulerCtx",
			ALLOCSET_DEFAULT_MINSIZE,
			ALLOCSET_DEFAULT_INITSIZE,
			ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(ContQuerySchedulerMemCxt);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * This code is a stripped down version of PostgresMain error recovery.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Forget any pending QueryCancel or timeout request */
		disable_all_timeouts(false);
		QueryCancelPending = false; /* second to avoid race condition */

		/* Report the error to the server log */
		EmitErrorReport();

		/* Abort the current transaction in order to recover */
		AbortCurrentTransaction();

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(ContQuerySchedulerMemCxt);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(ContQuerySchedulerMemCxt);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  We don't want to be
		 * filling the error logs as fast as we can.
		 */
		pg_usleep(1000000L);
	}
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/* must unblock signals before calling rebuild_database_list */
	PG_SETMASK(&UnBlockSig);

	ContQuerySchedulerShmem->scheduler_pid = MyProcPid;

	dbs = get_database_list();

	/* Loop forever */
	for (;;)
	{
		ListCell *lc;
		int rc;

		foreach(lc, dbs)
		{
			DatabaseEntry *db_entry = lfirst(lc);
			bool found;
			ContQueryProcGroup *grp = hash_search(ContQuerySchedulerShmem->proc_table, &db_entry->oid, HASH_ENTER, &found);

			/* If we don't have an entry for this dboid, initialize a new one and fire off bg procs */
			if (!found)
			{
				grp->db_oid = db_entry->oid;
				namestrcpy(&grp->db_name, NameStr(db_entry->name));

				start_group(grp);
			}
		}

		/* Allow sinval catchup interrupts while sleeping */
		EnableCatchupInterrupt();

		/*
		 * Wait until naptime expires or we get some type of signal (all the
		 * signal handlers will wake us by calling SetLatch).
		 */
		rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, 0);

		ResetLatch(&MyProc->procLatch);

		DisableCatchupInterrupt();

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (rc & WL_POSTMASTER_DEATH)
			proc_exit(1);

		/* the normal shutdown case */
		if (got_SIGTERM)
			break;

		/* update config? */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);

			/* update tuning parameters, so that they can be read downstream by background processes */
			update_tuning_params();
		}

		/* terminate a proc group? */
		if (got_SIGUSR2)
		{
			HASH_SEQ_STATUS status;
			ContQueryProcGroup *grp;

			got_SIGUSR2 = false;

			hash_seq_init(&status, ContQuerySchedulerShmem->proc_table);
			while ((grp = (ContQueryProcGroup *) hash_seq_search(&status)) != NULL)
			{
				ListCell *lc;

				if (!grp->terminate)
					continue;

				foreach(lc, dbs)
				{
					DatabaseEntry *entry = lfirst(lc);
					if (entry->oid == grp->db_oid)
					{
						dbs = list_delete(dbs, entry);
						break;
					}
				}

				terminate_group(grp);
			}
		}
static void
WalSendServerDoRequest(WalSendRequest *walSendRequest)
{
	bool successful;
	struct timeval standbyTimeout;

	WalSendServerGetStandbyTimeout(&standbyTimeout);
	
	switch (walSendRequest->command)
	{
	case PositionToEnd:
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "PositionToEnd");

		successful = write_position_to_end(&originalEndLocation,
			                               NULL, &walsend_shutdown_requested);
		if (successful)
			elog(LOG,"Standby master returned transaction log end location %s",
				 XLogLocationToString(&originalEndLocation));
		else
		{
			disableQDMirroring_ConnectionError(
				"Unable to connect to standby master and determine transaction log end location",
				GetStandbyErrorString());
			disconnectMirrorQD_SendClose();
		}
		break;
		
	case Catchup:
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Catchup");

        if (isQDMirroringCatchingUp())
    	{
    		bool tooFarBehind = false;

			elog(LOG,"Current master transaction log is flushed through location %s",
				 XLogLocationToString(&walSendRequest->flushedLocation));
			
			if (XLByteLT(originalEndLocation, walSendRequest->flushedLocation))
			{
				/*
				 * Standby master is behind the primary.  Send catchup WAL.
				 */
				 
				/* 
				 * Use a TRY block to catch errors from our attempt to read
				 * the primary's WAL.  Errors from sending to the standby
				 * come up as a boolean return (successful).
				 */
				PG_TRY();
				{
					successful = XLogCatchupQDMirror(
									&originalEndLocation, 
									&walSendRequest->flushedLocation,
									&standbyTimeout,
									&walsend_shutdown_requested);
				}
				PG_CATCH();
				{
					/* 
					 * Report the error related to reading the primary's WAL
					 * to the server log 
					 */
					 
					/* 
					 * But first demote the error to something much less
					 * scary.
					 */
				    if (!elog_demote(WARNING))
			    	{
			    		elog(LOG,"unable to demote error");
			        	PG_RE_THROW();
			    	}
					
					EmitErrorReport();
					FlushErrorState();

					successful = false;
					tooFarBehind = true;
				}
				PG_END_TRY();
				
				if (successful)
				{
					elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
						 "catchup send from standby end %s through primary flushed location %s",
						 XLogLocationToString(&originalEndLocation),
						 XLogLocationToString2(&walSendRequest->flushedLocation));
				}

			}
			else if (XLByteEQ(originalEndLocation, walSendRequest->flushedLocation))
			{
				elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Mirror was already caught up");
				successful = true;
			}
			else
			{
				elog(WARNING,"Standby master transaction log location %s is beyond the current master end location %s",
				     XLogLocationToString(&originalEndLocation),
				     XLogLocationToString2(&walSendRequest->flushedLocation));
				successful = false;
			}
			
			if (successful)
			{
				char detail[200];
				int count;
				
				count = snprintf(
							 detail, sizeof(detail),
							 "Transaction log copied from locations %s through %s to the standby master",
						     XLogLocationToString(&originalEndLocation),
						     XLogLocationToString2(&walSendRequest->flushedLocation));
				if (count >= sizeof(detail))
				{
					ereport(ERROR,
							(errcode(ERRCODE_INTERNAL_ERROR),
							 errmsg("format command string failure")));
				}

				enableQDMirroring("Master mirroring is now synchronized", detail);

				currentEndLocation = walSendRequest->flushedLocation;

				periodicLen = 0;
				periodicLocation = currentEndLocation;
			}
			else
			{
				if (tooFarBehind)
				{
					disableQDMirroring_TooFarBehind(
						"The current master was unable to synchronize the standby master "
						"because the transaction logs on the current master were recycled.  "
						"A gpinitstandby (at an appropriate time) will be necessary to copy "
						"over the whole master database to the standby master so it may be synchronized");
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost during transaction log catchup",
						GetStandbyErrorString());
				}
				disconnectMirrorQD_SendClose();
			}
		}
		else if (isQDMirroringDisabled())
		{
			elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not catching-up (state is disabled)");
		}
		else
		{
			elog(ERROR,"unexpected master mirroring state %s",
				 QDMirroringStateString());
		}
		
		break;
		
	case WriteWalPages:
		if (Debug_print_qd_mirroring)
			elog(LOG, "WriteWalPages");
		
        if (isQDMirroringEnabled())
        {
			char	   *from;
			Size		nbytes;
			bool		more= false;

			/*
			 * For now, save copy of data until flush.  This could be
			 * optimized.
			 */
			if (saveBuffer == NULL)
			{
				uint32 totalBufferLen = XLOGbuffers * XLOG_BLCKSZ;
				
				saveBuffer = malloc(totalBufferLen);
				if (saveBuffer == NULL)
					elog(ERROR,"Could not allocate buffer for xlog data (%d bytes)",
					     totalBufferLen);
				
				saveBufferLen = 0;
			}

			XLogGetBuffer(walSendRequest->startidx, walSendRequest->npages,
				          &from, &nbytes);

			if (saveBufferLen == 0)
			{
				more = false;
				writeLogId = walSendRequest->logId;
				writeLogSeg = walSendRequest->logSeg;
				writeLogOff = walSendRequest->logOff;

				memcpy(saveBuffer, from, nbytes);
				saveBufferLen = nbytes;
			}
			else
			{
				more = true;
				memcpy(&saveBuffer[saveBufferLen], from, nbytes);
				saveBufferLen += nbytes;
			}
			
			if (Debug_print_qd_mirroring)
				elog(LOG,
					 "Master Mirror Send: WriteWalPages (%s) startidx %d, npages %d, timeLineID %d, logId %u, logSeg %u, logOff 0x%X, nbytes 0x%X",
					 (more ? "more" : "new"),
					 walSendRequest->startidx,
					 walSendRequest->npages,
					 walSendRequest->timeLineID,
					 walSendRequest->logId,
					 walSendRequest->logSeg,
					 walSendRequest->logOff,
					 (int)nbytes);
    	}

	case FlushWalPages:
		if (Debug_print_qd_mirroring)
			elog(LOG, "FlushWalPages");
		
        if (isQDMirroringEnabled())
        {
			char 		cmd[MAXFNAMELEN + 50];

			if (saveBufferLen == 0)
				successful = true;
			else
			{
				if (snprintf(cmd, sizeof(cmd),"xlog %d %d %d %d", 
							 writeLogId, writeLogSeg, writeLogOff, 
							 (int)saveBufferLen) >= sizeof(cmd))
					elog(ERROR,"could not create cmd for qd mirror logid %d seg %d", 
					     writeLogId, writeLogSeg);
				
				successful = write_qd_sync(cmd, saveBuffer, saveBufferLen, 
							               &standbyTimeout,
							               &walsend_shutdown_requested);
				if (successful)
				{
					XLogRecPtr oldEndLocation;
					
					oldEndLocation = currentEndLocation;

					currentEndLocation.xlogid = writeLogId;
					currentEndLocation.xrecoff = writeLogSeg * XLogSegSize + writeLogOff;
					if (currentEndLocation.xrecoff >= XLogFileSize)
					{
						(currentEndLocation.xlogid)++;
						currentEndLocation.xrecoff = 0;
					}

					if (XLByteLT(oldEndLocation,currentEndLocation))
					{
						periodicLen += saveBufferLen;
						if (periodicLen > periodicReportLen)
						{
							elog(LOG,
								 "Master mirroring periodic report: %d bytes successfully send to standby master for locations %s through %s",
								 periodicLen,
								 XLogLocationToString(&periodicLocation),
								 XLogLocationToString2(&currentEndLocation));

							periodicLen = 0;
							periodicLocation = currentEndLocation;
						}
					}
					else
					{
						if (Debug_print_qd_mirroring)
							elog(LOG,
							     "Send to Master mirror successful.  New end location %s (old %s)",
							     XLogLocationToString(&currentEndLocation),
							     XLogLocationToString2(&oldEndLocation));
					}
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost attempting to send new transaction log",
						GetStandbyErrorString());
					disconnectMirrorQD_SendClose();
				}

				/*
				 * Reset so WriteWalPages can fill the buffer again.
				 */
				saveBufferLen = 0;
				writeLogId = 0;
				writeLogSeg = 0;
				writeLogOff = 0;
			}
			
			if (successful && walSendRequest->haveNewCheckpointLocation)
			{
				uint32 logid;
				uint32 seg;
				uint32 offset;
				
	        	elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"New previous checkpoint location %s",
				     XLogLocationToString(&walSendRequest->newCheckpointLocation));
				XLByteToSeg(walSendRequest->newCheckpointLocation, logid, seg);
				offset = walSendRequest->newCheckpointLocation.xrecoff % XLogSegSize;
				
				if (snprintf(cmd, sizeof(cmd),"new_checkpoint_location %d %d %d", 
							 logid, seg, offset) >= sizeof(cmd))
					elog(ERROR,"could not create cmd for qd mirror logid %d seg %d offset %d", 
					     logid, seg, offset);
				
				successful = write_qd_sync(cmd, NULL, 0, 
					                       NULL, &walsend_shutdown_requested);
				if (successful)
				{
					elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Send of new checkpoint location to master mirror successful");
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost attempting to send new checkpoint location",
						GetStandbyErrorString());
					disconnectMirrorQD_SendClose();
				}
			}
			
        }
		else if (isQDMirroringDisabled())
		{
			elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not enabled");
		}
		else
		{
			elog(ERROR,"unexpected master mirroring state %s",
				 QDMirroringStateString());
		}
		
		break;

	case CloseForShutdown:
		if (Debug_print_qd_mirroring)
			elog(LOG, "CloseForShutdown");

		/*
		 * Do the work we would normally do when signaled to stop.
		 */
		WalSendServer_ServiceShutdown();
		break;

	default:
		elog(ERROR, "Unknown WalSendRequestCommand %d", walSendRequest->command);
	}

}
Beispiel #15
0
/* fork heartbeat sender child */
pid_t
wd_hb_sender(int fork_wait_time, WdHbIf *hb_if)
{
	int sock;
	pid_t pid = 0;
	WdHbPacket pkt;
	WdInfo * p = WD_List;
	char pack_str[WD_MAX_PACKET_STRING];
	int pack_str_len;
	sigjmp_buf	local_sigjmp_buf;

	pid = fork();
	if (pid != 0)
	{
		if (pid == -1)
			ereport(PANIC,
					(errmsg("failed to fork a heartbeat sender child")));
		return pid;
	}

	on_exit_reset();
	processType = PT_HB_SENDER;

	if (fork_wait_time > 0)
	{
		sleep(fork_wait_time);
	}

	POOL_SETMASK(&UnBlockSig);

	signal(SIGTERM, hb_sender_exit);
	signal(SIGINT, hb_sender_exit);
	signal(SIGQUIT, hb_sender_exit);
	signal(SIGCHLD, SIG_IGN);
	signal(SIGHUP, SIG_IGN);
	signal(SIGUSR1, SIG_IGN);
	signal(SIGUSR2, SIG_IGN);
	signal(SIGPIPE, SIG_IGN);
	signal(SIGALRM, SIG_IGN);

	init_ps_display("", "", "", "");
	/* Create per loop iteration memory context */
	ProcessLoopContext = AllocSetContextCreate(TopMemoryContext,
											   "wdhb_sender",
											   ALLOCSET_DEFAULT_MINSIZE,
											   ALLOCSET_DEFAULT_INITSIZE,
											   ALLOCSET_DEFAULT_MAXSIZE);
	
	MemoryContextSwitchTo(TopMemoryContext);

	sock = wd_create_hb_send_socket(hb_if);

	set_ps_display("heartbeat sender", false);

	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;
		
		EmitErrorReport();
		MemoryContextSwitchTo(TopMemoryContext);
		FlushErrorState();
		sleep(pool_config->wd_heartbeat_keepalive);
	}
	
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	for(;;)
	{
		MemoryContextSwitchTo(ProcessLoopContext);
		MemoryContextResetAndDeleteChildren(ProcessLoopContext);

		/* contents of packet */
		gettimeofday(&pkt.send_time, NULL);
		strlcpy(pkt.from, pool_config->wd_hostname, sizeof(pkt.from));
		pkt.from_pgpool_port = pool_config->port;

		pkt.status = p->status;

		/* authentication key */
		if (strlen(pool_config->wd_authkey))
		{
			/* calculate hash from packet */
			pack_str_len = packet_to_string_hb(&pkt, pack_str, sizeof(pack_str));
			wd_calc_hash(pack_str, pack_str_len, pkt.hash);
		}

		/* send heartbeat signal */
		wd_hb_send(sock, &pkt, sizeof(pkt), hb_if->addr, hb_if->dest_port);
		ereport(DEBUG1,
				(errmsg("watchdog heartbeat: send heartbeat signal to %s:%d", hb_if->addr, hb_if->dest_port)));
		sleep(pool_config->wd_heartbeat_keepalive);
	}

	return pid;
}
Beispiel #16
0
/* fork heartbeat receiver child */
pid_t
wd_hb_receiver(int fork_wait_time, WdHbIf *hb_if)
{
	int sock;
	pid_t pid = 0;
	WdHbPacket pkt;
	struct timeval tv;
	char from[WD_MAX_HOST_NAMELEN];
	int from_pgpool_port;
	char buf[(MD5_PASSWD_LEN+1)*2];
	char pack_str[WD_MAX_PACKET_STRING];
	int pack_str_len;
	sigjmp_buf	local_sigjmp_buf;


	WdInfo * p;

	pid = fork();
	if (pid != 0)
	{
		if (pid == -1)
			ereport(PANIC,
					(errmsg("failed to fork a heartbeat receiver child")));
		return pid;
	}

	on_exit_reset();
	processType = PT_HB_RECEIVER;

	if (fork_wait_time > 0)
	{
		sleep(fork_wait_time);
	}

	POOL_SETMASK(&UnBlockSig);

	signal(SIGTERM, hb_receiver_exit);
	signal(SIGINT, hb_receiver_exit);
	signal(SIGQUIT, hb_receiver_exit);
	signal(SIGCHLD, SIG_IGN);
	signal(SIGHUP, SIG_IGN);
	signal(SIGUSR1, SIG_IGN);
	signal(SIGUSR2, SIG_IGN);
	signal(SIGPIPE, SIG_IGN);
	signal(SIGALRM, SIG_IGN);

	init_ps_display("", "", "", "");
	/* Create per loop iteration memory context */
	ProcessLoopContext = AllocSetContextCreate(TopMemoryContext,
											   "wdhb_hb_receiver",
											   ALLOCSET_DEFAULT_MINSIZE,
											   ALLOCSET_DEFAULT_INITSIZE,
											   ALLOCSET_DEFAULT_MAXSIZE);
	
	MemoryContextSwitchTo(TopMemoryContext);

	sock = wd_create_hb_recv_socket(hb_if);

	set_ps_display("heartbeat receiver", false);

	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;
		
		EmitErrorReport();
		MemoryContextSwitchTo(TopMemoryContext);
		FlushErrorState();
	}
	
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	for(;;)
	{
		MemoryContextSwitchTo(ProcessLoopContext);
		MemoryContextResetAndDeleteChildren(ProcessLoopContext);

		/* receive heartbeat signal */
		wd_hb_recv(sock, &pkt);
			/* authentication */
		if (strlen(pool_config->wd_authkey))
		{
			/* calculate hash from packet */
			pack_str_len = packet_to_string_hb(&pkt, pack_str, sizeof(pack_str));
			wd_calc_hash(pack_str, pack_str_len, buf);

			if (strcmp(pkt.hash, buf))
				ereport(ERROR,
					(errmsg("watchdog heartbeat receive"),
						 errdetail("authentication failed")));
		}

		/* get current time */
		gettimeofday(&tv, NULL);

		/* who send this packet? */
		strlcpy(from, pkt.from, sizeof(from));
		from_pgpool_port = pkt.from_pgpool_port;

		p = WD_List;
		while (p->status != WD_END)
		{
			if (!strcmp(p->hostname, from) && p->pgpool_port == from_pgpool_port)
			{
				/* ignore the packet from down pgpool */
				if (pkt.status == WD_DOWN)
				{
					ereport(DEBUG1,
						(errmsg("watchdog heartbeat: received heartbeat signal from \"%s:%d\" whose status is down. ignored",
									from, from_pgpool_port)));
					break;
				}

				/* this is the first packet or the latest packet */
				if (!WD_TIME_ISSET(p->hb_send_time) ||
					WD_TIME_BEFORE(p->hb_send_time, pkt.send_time))
				{
					ereport(DEBUG1,
							(errmsg("watchdog heartbeat: received heartbeat signal from \"%s:%d\"",
									from, from_pgpool_port)));

					p->hb_send_time = pkt.send_time;
					p->hb_last_recv_time = tv;
				}
				else
				{
					ereport(DEBUG1,
							(errmsg("watchdog heartbeat: received heartbeat signal is older than the latest, ignored")));
				}
				break;
			}
			p++;
		}

	}

	return pid;
}
Beispiel #17
0
/*
 * Common service main.
 */
void
ServiceMain(ServiceCtrl *serviceCtrl)
{
	ServiceConfig *serviceConfig;

	sigjmp_buf	local_sigjmp_buf;

	Assert(serviceCtrl != NULL);
	serviceConfig = (ServiceConfig*)serviceCtrl->serviceConfig;
	Assert(serviceConfig != NULL);

	IsUnderPostmaster = true;

	/* reset MyProcPid */
	MyProcPid = getpid();


	/* Lose the postmaster's on-exit routines */
	on_exit_reset();

	/* Identify myself via ps */
	init_ps_display(serviceConfig->psTitle, "", "", "");

	if (serviceConfig->ServiceEarlyInit != NULL)
	{
		serviceConfig->ServiceEarlyInit();
	}
	else
	{
		SetProcessingMode(InitProcessing);
	}

	/*
	 * Set up signal handlers.	We operate on databases much like a regular
	 * backend, so we use the same signal handling.  See equivalent code in
	 * tcop/postgres.c.
	 *
	 * Currently, we don't pay attention to postgresql.conf changes that
	 * happen during a single daemon iteration, so we can ignore SIGHUP.
	 */
	pqsignal(SIGHUP, SIG_IGN);

	/*
	 * Presently, SIGINT will lead to autovacuum shutdown, because that's how
	 * we handle ereport(ERROR).  It could be improved however.
	 */
	pqsignal(SIGINT, StatementCancelHandler);
	pqsignal(SIGTERM, ServiceDie);
	pqsignal(SIGQUIT, ServiceQuickDie);
	pqsignal(SIGALRM, handle_sig_alarm);

	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
	/* We don't listen for async notifies */
	pqsignal(SIGUSR2, serviceConfig->ServiceRequestShutdown);
	pqsignal(SIGFPE, FloatExceptionHandler);
	pqsignal(SIGCHLD, SIG_DFL);

#ifdef SIGBUS
    pqsignal(SIGBUS, HandleCrash);
#endif
#ifdef SIGILL
    pqsignal(SIGILL, HandleCrash);
#endif
#ifdef SIGSEGV
    pqsignal(SIGSEGV, HandleCrash);
#endif


	/* Early initialization */
	BaseInit();

	if (serviceConfig->ServicePostgresInit != NULL)
	{
		serviceConfig->ServicePostgresInit();
	}

	SetProcessingMode(NormalProcessing);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * We can now go away.	Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	/* set up a listener port and put it in shmem*/
	serviceCtrl->listenerPort = ServiceListenerSetup(serviceCtrl);

	if (serviceConfig->ServiceInit != NULL)
	{
		serviceConfig->ServiceInit(serviceCtrl->listenerPort);
	}

	/* listen loop */
	ServiceListenLoop(serviceCtrl);

	proc_exit(0);
}
Beispiel #18
0
static bool
ServiceClientPollRead(ServiceClient *serviceClient, void* response, int responseLen, bool *pollResponseReceived)
{
	ServiceConfig 	*serviceConfig;
	int				n;
	int				saved_err;
	char            *message;
	bool		    result = false;
	DECLARE_SAVE_SUPPRESS_PANIC();

	Assert(serviceClient != NULL);
	serviceConfig = serviceClient->serviceConfig;
	Assert(serviceConfig != NULL);
	Assert(response != NULL);

	PG_TRY();
	{
		SUPPRESS_PANIC();

		/*
		 * Attempt to read the response
		 */
		while (true)
		{
			n = read(serviceClient->sockfd,
				     ((char *)response),
				     responseLen);
			saved_err = errno;

			if (n == 0)
			{
				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								 errmsg("Connection to '%s' is closed",
								         serviceConfig->title)));
			}
			else if (n < 0)
			{
				if (saved_err == EWOULDBLOCK)
				{
					*pollResponseReceived = false;
					break;
				}

				if (saved_err == EINTR)
					continue;

				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								 errmsg("Read error from '%s': %s",
								        serviceConfig->title,
								        strerror(saved_err))));
			}

			if (n != responseLen)
			{
				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								 errmsg("Expecting message length %d and actual read length was %d from '%s'",
					 			        responseLen, n,
					 			        serviceConfig->title)));
				return false;
			}

			*pollResponseReceived = true;
			break;
		}

		result = true;
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();

		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;
}
Beispiel #19
0
static bool
ServiceClientRead(ServiceClient *serviceClient, void* response, int responseLen, struct timeval *timeout)
{
	ServiceConfig *serviceConfig;
	int		n;
	int	   bytesRead = 0;
	int			saved_err;
	char        *message;
	bool		result = false;
	mpp_fd_set	rset;
	struct timeval rundownTimeout = {0,0};
	// Use local variable since select modifies
	// the timeout parameter with remaining time.
	DECLARE_SAVE_SUPPRESS_PANIC();

	Assert(serviceClient != NULL);
	serviceConfig = serviceClient->serviceConfig;
	Assert(serviceConfig != NULL);
	Assert(response != NULL);
	if (timeout != NULL)
		rundownTimeout = *timeout;

	PG_TRY();
	{
		SUPPRESS_PANIC();

		/*
		 * read the response
		 */
		while (bytesRead < responseLen)
		{
			n = read(serviceClient->sockfd,
				     ((char *)response) + bytesRead,
				     responseLen - bytesRead);
			saved_err = errno;

			if (n == 0)
			{
				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								errmsg("Connection to '%s' is closed (%d)",
									   serviceConfig->title, serviceClient->sockfd)));
			}

			if (n < 0)
			{
				if (saved_err != EINTR && saved_err != EWOULDBLOCK)
				{
					ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
									errmsg("Read error from '%s': %s (%d)",
										   serviceConfig->title,
										   strerror(saved_err), serviceClient->sockfd)));
				}

				if (saved_err == EWOULDBLOCK)
				{
					/* we shouldn't really get here since we are dealing with
					 * small messages, but once we've read a bit of data we
					 * need to finish out reading till we get the message (or error)
					 */
					do
					{
						MPP_FD_ZERO(&rset);
						MPP_FD_SET(serviceClient->sockfd, &rset);
						n = select(serviceClient->sockfd + 1, (fd_set *)&rset, NULL, NULL, (timeout == NULL ? NULL : &rundownTimeout));
						if (n == 0)
						{
							if (timeout != NULL)
							{
								ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
												errmsg("Read from '%s' timed out after %d.%03d seconds",
													   serviceConfig->title,
													   (int)timeout->tv_sec,
													   (int)timeout->tv_usec / 1000)));
							}
						}
						else if (n < 0 && errno == EINTR)
							continue;
						else if (n < 0)
						{
							saved_err = errno;

							ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
											errmsg("Read error from '%s': %s (%d)",
												   serviceConfig->title,
												   strerror(saved_err), serviceClient->sockfd)));
						}
					}
					while (n < 1);
				}
				/* else saved_err == EINTR */

				continue;
			}
			else
				bytesRead += n;
		}

		result = true;
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();

		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;
}
Beispiel #20
0
/*
 * Main entry point for walwriter process
 *
 * This is invoked from AuxiliaryProcessMain, which has already created the
 * basic execution environment, but not enabled signals yet.
 */
void
WalWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext walwriter_context;
	int			left_till_hibernate;
	bool		hibernating;

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * We have no particular use for SIGINT at the moment, but seems
	 * reasonable to treat like SIGTERM.
	 */
	pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
	pqsignal(SIGINT, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGTERM, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGQUIT, wal_quickdie);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, walwriter_sigusr1_handler);
	pqsignal(SIGUSR2, SIG_IGN); /* not used */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Create a resource owner to keep track of our resources (not clear that
	 * we need this, but may as well have one).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	walwriter_context = AllocSetContextCreate(TopMemoryContext,
											  "Wal Writer",
											  ALLOCSET_DEFAULT_SIZES);
	MemoryContextSwitchTo(walwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * This code is heavily based on bgwriter.c, q.v.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().  We don't have very many resources to worry
		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
		 */
		LWLockReleaseAll();
		ConditionVariableCancelSleep();
		pgstat_report_wait_end();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_SMgr();
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(walwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(walwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Reset hibernation state after any error.
	 */
	left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
	hibernating = false;
	SetWalWriterSleeping(false);

	/*
	 * Advertise our latch that backends can use to wake us up while we're
	 * sleeping.
	 */
	ProcGlobal->walwriterLatch = &MyProc->procLatch;

	/*
	 * Loop forever
	 */
	for (;;)
	{
		long		cur_timeout;
		int			rc;

		/*
		 * Advertise whether we might hibernate in this cycle.  We do this
		 * before resetting the latch to ensure that any async commits will
		 * see the flag set if they might possibly need to wake us up, and
		 * that we won't miss any signal they send us.  (If we discover work
		 * to do in the last cycle before we would hibernate, the global flag
		 * will be set unnecessarily, but little harm is done.)  But avoid
		 * touching the global flag if it doesn't need to change.
		 */
		if (hibernating != (left_till_hibernate <= 1))
		{
			hibernating = (left_till_hibernate <= 1);
			SetWalWriterSleeping(hibernating);
		}

		/* Clear any already-pending wakeups */
		ResetLatch(MyLatch);

		/*
		 * Process any requests or signals received recently.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (shutdown_requested)
		{
			/* Normal exit from the walwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Do what we're here for; then, if XLogBackgroundFlush() found useful
		 * work to do, reset hibernation counter.
		 */
		if (XLogBackgroundFlush())
			left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
		else if (left_till_hibernate > 0)
			left_till_hibernate--;

		/*
		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
		 * haven't done anything useful for quite some time, lengthen the
		 * sleep time so as to reduce the server's idle power consumption.
		 */
		if (left_till_hibernate > 0)
			cur_timeout = WalWriterDelay;		/* in ms */
		else
			cur_timeout = WalWriterDelay * HIBERNATE_FACTOR;

		rc = WaitLatch(MyLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   cur_timeout,
					   WAIT_EVENT_WAL_WRITER_MAIN);

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (rc & WL_POSTMASTER_DEATH)
			exit(1);
	}
}
void FileRepResetPeer_Main(void)
{
	/* BASIC PROCESS SETUP */

	FileRepReset_ConfigureSignals();

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding and comments about how the error
	 * handling works.
	 */
	sigjmp_buf		local_sigjmp_buf;
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		HOLD_INTERRUPTS();
		EmitErrorReport();
		proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT);
	}
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;
	PG_SETMASK(&UnBlockSig);


	/** NOW DO THE ACTUAL WORK */ 
	char messageFromPeer[MESSAGE_FROM_PEER_BUF_SIZE];
	char resetNumberFromPeer[MESSAGE_FROM_PEER_BUF_SIZE];
	char resetNumberThatIndicatesResetComplete[MESSAGE_FROM_PEER_BUF_SIZE];
	struct addrinfo *addrList = NULL;
	char portStr[100];

	PrimaryMirrorModeTransitionArguments args = primaryMirrorGetArgumentsFromLocalMemory();
	Assert(args.mode == PMModePrimarySegment || args.mode == PMModeMirrorSegment);

	snprintf(portStr, sizeof(portStr), "%d", args.peerPostmasterPort);
	if (! determineTargetHost(&addrList, args.peerAddress, portStr))
	{
		elog(WARNING, "during reset, unable to look up address for peer host to coordinate reset; "
				"will transition to fault state.");
		proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT);
	}

	sendMessageToPeerAndExitIfProblem(addrList, "beginPostmasterReset", messageFromPeer,
		resetNumberThatIndicatesResetComplete);

	for ( ;; )
	{
		pg_usleep(10 * 1000L); /* 10 ms */
		sendMessageToPeerAndExitIfProblem(addrList, "getPostmasterResetStatus", messageFromPeer, resetNumberFromPeer );
		if (strequals(messageFromPeer, RESET_STATUS_IS_IN_RESET_PIVOT_POINT))
		{
			if (args.mode == PMModeMirrorSegment)
			{
				/**
				 * peer is in the reset pivot point, we can break out of our checking loop and
				 *   thus exit with a code telling the postmaster to begin the startup sequence again
				 *
				 * this is only done on the mirror as currently the mirror must execute the startup sequence
				 *   before the primary
				 */
				elog(DEBUG1, "peer reset: primary peer has reached reset point");
				break;
			}
		}
		else if (strequals(messageFromPeer, RESET_STATUS_IS_RUNNING))
		{
			/** it's running -- is it >= than the reset number that indicates reset complete one */
			if (strcmp( resetNumberFromPeer, resetNumberThatIndicatesResetComplete) >= 0)
			{
				/** yes, the reset is complete and so we can quit and do a restart */
				elog(DEBUG1, "peer reset: mirror peer reset is complete");
				break;
			}
		}
	}

	proc_exit(EXIT_CODE_SHOULD_RESTART_SHMEM_CLEANLY);
}
Beispiel #22
0
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext bgwriter_context;

	BgWriterShmem->bgwriter_pid = MyProcPid;
	am_bg_writer = true;

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(bgwriter probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * Note: we deliberately ignore SIGTERM, because during a standard Unix
	 * system shutdown cycle, init will SIGTERM all processes at once.	We
	 * want to wait for the backends to exit, whereupon the postmaster will
	 * tell us it's okay to shut down (via SIGUSR2).
	 *
	 * SIGUSR1 is presently unused; keep it spare in case someday we want this
	 * process to participate in sinval messaging.
	 */
	pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
	pqsignal(SIGINT, ReqCheckpointHandler);		/* request checkpoint */
	pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
	pqsignal(SIGUSR2, ReqShutdownHandler);		/* request shutdown */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
#ifdef HAVE_SIGPROCMASK
	sigdelset(&BlockSig, SIGQUIT);
#else
	BlockSig &= ~(sigmask(SIGQUIT));
#endif

	/*
	 * Initialize so that first time-driven event happens at the correct time.
	 */
	last_checkpoint_time = last_xlog_switch_time = time(NULL);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	bgwriter_context = AllocSetContextCreate(TopMemoryContext,
											 "Background Writer",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(bgwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().	We don't have very many resources to worry
		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/* Warn any waiting backends that the checkpoint failed. */
		if (ckpt_active)
		{
			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_failed++;
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			ckpt_active = false;
		}

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(bgwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(bgwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		do_checkpoint = false;
		int			flags = 0;
		time_t		now;
		int			elapsed_secs;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Process any requests or signals received recently.
		 */
		AbsorbFsyncRequests();

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (checkpoint_requested)
		{
			checkpoint_requested = false;
			do_checkpoint = true;
			BgWriterStats.m_requested_checkpoints++;
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Close down the database */
			ShutdownXLOG(0, 0);
			DumpFreeSpaceMap(0, 0);
			/* Normal exit from the bgwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Force a checkpoint if too much time has elapsed since the last one.
		 * Note that we count a timed checkpoint in stats only when this
		 * occurs without an external request, but we set the CAUSE_TIME flag
		 * bit even if there is also an external request.
		 */
		now = time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
		{
			if (!do_checkpoint)
				BgWriterStats.m_timed_checkpoints++;
			do_checkpoint = true;
			flags |= CHECKPOINT_CAUSE_TIME;
		}

		/*
		 * Do a checkpoint if requested, otherwise do one cycle of
		 * dirty-buffer writing.
		 */
		if (do_checkpoint)
		{
			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			/*
			 * Atomically fetch the request flags to figure out what kind of a
			 * checkpoint we should perform, and increase the started-counter
			 * to acknowledge that we've started a new checkpoint.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			flags |= bgs->ckpt_flags;
			bgs->ckpt_flags = 0;
			bgs->ckpt_started++;
			SpinLockRelease(&bgs->ckpt_lck);

			/*
			 * We will warn if (a) too soon since last checkpoint (whatever
			 * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
			 * since the last checkpoint start.  Note in particular that this
			 * implementation will not generate warnings caused by
			 * CheckPointTimeout < CheckPointWarning.
			 */
			if ((flags & CHECKPOINT_CAUSE_XLOG) &&
				elapsed_secs < CheckPointWarning)
				ereport(LOG,
						(errmsg("checkpoints are occurring too frequently (%d seconds apart)",
								elapsed_secs),
						 errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));

			/*
			 * Initialize bgwriter-private variables used during checkpoint.
			 */
			ckpt_active = true;
			ckpt_start_recptr = GetInsertRecPtr();
			ckpt_start_time = now;
			ckpt_cached_elapsed = 0;

			/*
			 * Do the checkpoint.
			 */
			CreateCheckPoint(flags);

			/*
			 * After any checkpoint, close all smgr files.	This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();

			/*
			 * Indicate checkpoint completion to any waiting backends.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			ckpt_active = false;

			/*
			 * Note we record the checkpoint start time not end time as
			 * last_checkpoint_time.  This is so that time-driven checkpoints
			 * happen at a predictable spacing.
			 */
			last_checkpoint_time = now;
		}
		else
			BgBufferSync();

		/* Check for archive_timeout and switch xlog files if necessary. */
		CheckArchiveTimeout();

		/* Nap for the configured time. */
		BgWriterNap();
	}
}
Beispiel #23
0
/**
 * This method is called after fork of the sweeper process. It sets up signal
 * handlers and does initialization that is required by a postgres backend.
 */
NON_EXEC_STATIC void
BackoffSweeperMain(int argc, char *argv[])
{
	sigjmp_buf	local_sigjmp_buf;

	IsUnderPostmaster = true;
	isSweeperProcess = true;

	/* Stay away from PMChildSlot */
	MyPMChildSlot = -1;

	/* reset MyProcPid */
	MyProcPid = getpid();

	/* Lose the postmaster's on-exit routines */
	on_exit_reset();

	/* Identify myself via ps */
	init_ps_display("sweeper process", "", "", "");

	SetProcessingMode(InitProcessing);

	/*
	 * Set up signal handlers.  We operate on databases much like a regular
	 * backend, so we use the same signal handling.  See equivalent code in
	 * tcop/postgres.c.
	 */
	pqsignal(SIGHUP, SIG_IGN);
	pqsignal(SIGINT, SIG_IGN);
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN);

	pqsignal(SIGTERM, die);
	pqsignal(SIGQUIT, quickdie);
	pqsignal(SIGUSR2, BackoffRequestShutdown);

	pqsignal(SIGFPE, FloatExceptionHandler);
	pqsignal(SIGCHLD, SIG_DFL);

	/*
	 * Copied from bgwriter
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Sweeper process");

	/* Early initialization */
	BaseInit();

	/* See InitPostgres()... */
	InitProcess();

	SetProcessingMode(NormalProcessing);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * We can now go away.  Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	MyBackendId = InvalidBackendId;

	/* main loop */
	BackoffSweeperLoop();

	/* One iteration done, go away */
	proc_exit(0);
}
Beispiel #24
0
/*
 * AutoVacMain
 */
NON_EXEC_STATIC void
AutoVacMain(int argc, char *argv[])
{
    ListCell   *cell;
    List	   *dblist;
    autovac_dbase *db;
    TransactionId xidForceLimit;
    bool		for_xid_wrap;
    sigjmp_buf	local_sigjmp_buf;

    /* we are a postmaster subprocess now */
    IsUnderPostmaster = true;
    am_autovacuum = true;

    /* MPP-4990: Autovacuum always runs as utility-mode */
    Gp_role = GP_ROLE_UTILITY;

    /* reset MyProcPid */
    MyProcPid = getpid();

    /* record Start Time for logging */
    MyStartTime = time(NULL);

    /* Identify myself via ps */
    init_ps_display("autovacuum process", "", "", "");

    SetProcessingMode(InitProcessing);

    /*
     * If possible, make this process a group leader, so that the postmaster
     * can signal any child processes too.  (autovacuum probably never has
     * any child processes, but for consistency we make all postmaster
     * child processes do this.)
     */
#ifdef HAVE_SETSID
    if (setsid() < 0)
        elog(FATAL, "setsid() failed: %m");
#endif

    /*
     * Set up signal handlers.	We operate on databases much like a regular
     * backend, so we use the same signal handling.  See equivalent code in
     * tcop/postgres.c.
     *
     * Currently, we don't pay attention to postgresql.conf changes that
     * happen during a single daemon iteration, so we can ignore SIGHUP.
     */
    pqsignal(SIGHUP, SIG_IGN);

    /*
     * SIGINT is used to signal cancelling the current table's vacuum; SIGTERM
     * means abort and exit cleanly, and SIGQUIT means abandon ship.
     */
    pqsignal(SIGINT, StatementCancelHandler);
    pqsignal(SIGTERM, die);
    pqsignal(SIGQUIT, quickdie);
    pqsignal(SIGALRM, handle_sig_alarm);

    pqsignal(SIGPIPE, SIG_IGN);
    pqsignal(SIGUSR1, procsignal_sigusr1_handler);
    /* We don't listen for async notifies */
    pqsignal(SIGUSR2, SIG_IGN);
    pqsignal(SIGFPE, FloatExceptionHandler);
    pqsignal(SIGCHLD, SIG_DFL);

    /* Early initialization */
    BaseInit();

    /*
     * Create a per-backend PGPROC struct in shared memory, except in the
     * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
     * this before we can use LWLocks (and in the EXEC_BACKEND case we already
     * had to do some stuff with LWLocks).
     */
#ifndef EXEC_BACKEND
    InitProcess();
#endif

    /*
     * If an exception is encountered, processing resumes here.
     *
     * See notes in postgres.c about the design of this coding.
     */
    if (sigsetjmp(local_sigjmp_buf, 1) != 0)
    {
        /* Prevents interrupts while cleaning up */
        HOLD_INTERRUPTS();

        /* Report the error to the server log */
        EmitErrorReport();

        /*
         * We can now go away.	Note that because we called InitProcess, a
         * callback was registered to do ProcKill, which will clean up
         * necessary state.
         */
        proc_exit(0);
    }

    /* We can now handle ereport(ERROR) */
    PG_exception_stack = &local_sigjmp_buf;

    PG_SETMASK(&UnBlockSig);

    /*
     * Force zero_damaged_pages OFF in the autovac process, even if it is set
     * in postgresql.conf.	We don't really want such a dangerous option being
     * applied non-interactively.
     */
    SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);

    /* Get a list of databases */
    dblist = autovac_get_database_list();

    /*
     * Determine the oldest datfrozenxid/relfrozenxid that we will allow
     * to pass without forcing a vacuum.  (This limit can be tightened for
     * particular tables, but not loosened.)
     */
    recentXid = ReadNewTransactionId();
    xidForceLimit = recentXid - autovacuum_freeze_max_age;
    /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
    if (xidForceLimit < FirstNormalTransactionId)
        xidForceLimit -= FirstNormalTransactionId;

    /*
     * Choose a database to connect to.  We pick the database that was least
     * recently auto-vacuumed, or one that needs vacuuming to prevent Xid
     * wraparound-related data loss.  If any db at risk of wraparound is
     * found, we pick the one with oldest datfrozenxid,
     * independently of autovacuum times.
     *
     * Note that a database with no stats entry is not considered, except for
     * Xid wraparound purposes.  The theory is that if no one has ever
     * connected to it since the stats were last initialized, it doesn't need
     * vacuuming.
     *
     * XXX This could be improved if we had more info about whether it needs
     * vacuuming before connecting to it.  Perhaps look through the pgstats
     * data for the database's tables?  One idea is to keep track of the
     * number of new and dead tuples per database in pgstats.  However it
     * isn't clear how to construct a metric that measures that and not cause
     * starvation for less busy databases.
     */
    db = NULL;
    for_xid_wrap = false;
    foreach(cell, dblist)
    {
        autovac_dbase *tmp = lfirst(cell);

        /* Find pgstat entry if any */
        tmp->entry = pgstat_fetch_stat_dbentry(tmp->oid);

        /* Check to see if this one is at risk of wraparound */
        if (TransactionIdPrecedes(tmp->frozenxid, xidForceLimit))
        {
            if (db == NULL ||
                    TransactionIdPrecedes(tmp->frozenxid, db->frozenxid))
                db = tmp;
            for_xid_wrap = true;
            continue;
        }
        else if (for_xid_wrap)
            continue;			/* ignore not-at-risk DBs */

        /*
         * Otherwise, skip a database with no pgstat entry; it means it
         * hasn't seen any activity.
         */
        if (!tmp->entry)
            continue;

        /*
         * Remember the db with oldest autovac time.  (If we are here,
         * both tmp->entry and db->entry must be non-null.)
         */
        if (db == NULL ||
                tmp->entry->last_autovac_time < db->entry->last_autovac_time)
            db = tmp;
    }
Beispiel #25
0
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from AuxiliaryProcessMain, which has already created the
 * basic execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext bgwriter_context;
	bool		prev_hibernate;

	/*
	 * Properly accept or ignore signals the postmaster might send us.
	 *
	 * bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1
	 * handler is still needed for latch wakeups.
	 */
	pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
	pqsignal(SIGINT, SIG_IGN);
	pqsignal(SIGTERM, ReqShutdownHandler);		/* shutdown */
	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, bgwriter_sigusr1_handler);
	pqsignal(SIGUSR2, SIG_IGN);

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

	/*
	 * We just started, assume there has been either a shutdown or
	 * end-of-recovery snapshot.
	 */
	last_snapshot_ts = GetCurrentTimestamp();

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	bgwriter_context = AllocSetContextCreate(TopMemoryContext,
											 "Background Writer",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(bgwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().  We don't have very many resources to worry
		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_SMgr();
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(bgwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(bgwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();

		/* Report wait end here, when there is no further possibility of wait */
		pgstat_report_wait_end();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Reset hibernation state after any error.
	 */
	prev_hibernate = false;

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		can_hibernate;
		int			rc;

		/* Clear any already-pending wakeups */
		ResetLatch(MyLatch);

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Normal exit from the bgwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Do one cycle of dirty-buffer writing.
		 */
		can_hibernate = BgBufferSync();

		/*
		 * Send off activity statistics to the stats collector
		 */
		pgstat_send_bgwriter();

		if (FirstCallSinceLastCheckpoint())
		{
			/*
			 * After any checkpoint, close all smgr files.  This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();
		}

		/*
		 * Log a new xl_running_xacts every now and then so replication can
		 * get into a consistent state faster (think of suboverflowed
		 * snapshots) and clean up resources (locks, KnownXids*) more
		 * frequently. The costs of this are relatively low, so doing it 4
		 * times (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
		 *
		 * We assume the interval for writing xl_running_xacts is
		 * significantly bigger than BgWriterDelay, so we don't complicate the
		 * overall timeout handling but just assume we're going to get called
		 * often enough even if hibernation mode is active. It's not that
		 * important that log_snap_interval_ms is met strictly. To make sure
		 * we're not waking the disk up unnecessarily on an idle system we
		 * check whether there has been any WAL inserted since the last time
		 * we've logged a running xacts.
		 *
		 * We do this logging in the bgwriter as its the only process that is
		 * run regularly and returns to its mainloop all the time. E.g.
		 * Checkpointer, when active, is barely ever in its mainloop and thus
		 * makes it hard to log regularly.
		 */
		if (XLogStandbyInfoActive() && !RecoveryInProgress())
		{
			TimestampTz timeout = 0;
			TimestampTz now = GetCurrentTimestamp();

			timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
												  LOG_SNAPSHOT_INTERVAL_MS);

			/*
			 * only log if enough time has passed and some xlog record has
			 * been inserted.
			 */
			if (now >= timeout &&
				last_snapshot_lsn != GetXLogInsertRecPtr())
			{
				last_snapshot_lsn = LogStandbySnapshot();
				last_snapshot_ts = now;
			}
		}

		/*
		 * Sleep until we are signaled or BgWriterDelay has elapsed.
		 *
		 * Note: the feedback control loop in BgBufferSync() expects that we
		 * will call it every BgWriterDelay msec.  While it's not critical for
		 * correctness that that be exact, the feedback loop might misbehave
		 * if we stray too far from that.  Hence, avoid loading this process
		 * down with latch events that are likely to happen frequently during
		 * normal operation.
		 */
		rc = WaitLatch(MyLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   BgWriterDelay /* ms */ );

		/*
		 * If no latch event and BgBufferSync says nothing's happening, extend
		 * the sleep in "hibernation" mode, where we sleep for much longer
		 * than bgwriter_delay says.  Fewer wakeups save electricity.  When a
		 * backend starts using buffers again, it will wake us up by setting
		 * our latch.  Because the extra sleep will persist only as long as no
		 * buffer allocations happen, this should not distort the behavior of
		 * BgBufferSync's control loop too badly; essentially, it will think
		 * that the system-wide idle interval didn't exist.
		 *
		 * There is a race condition here, in that a backend might allocate a
		 * buffer between the time BgBufferSync saw the alloc count as zero
		 * and the time we call StrategyNotifyBgWriter.  While it's not
		 * critical that we not hibernate anyway, we try to reduce the odds of
		 * that by only hibernating when BgBufferSync says nothing's happening
		 * for two consecutive cycles.  Also, we mitigate any possible
		 * consequences of a missed wakeup by not hibernating forever.
		 */
		if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate)
		{
			/* Ask for notification at next buffer allocation */
			StrategyNotifyBgWriter(MyProc->pgprocno);
			/* Sleep ... */
			rc = WaitLatch(MyLatch,
						   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
						   BgWriterDelay * HIBERNATE_FACTOR);
			/* Reset the notification request in case we timed out */
			StrategyNotifyBgWriter(-1);
		}

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (rc & WL_POSTMASTER_DEATH)
			exit(1);

		prev_hibernate = can_hibernate;
	}
}
Beispiel #26
0
/*
 * There are a few ways to arrive in the initsequencer.
 * 1. From _PG_init (called exactly once when the library is loaded for ANY
 *    reason).
 *    1a. Because of the command LOAD 'libraryname';
 *        This case can be distinguished because _PG_init will have found the
 *        LOAD command and saved the 'libraryname' in pljavaLoadPath.
 *    1b. Because of a CREATE FUNCTION naming this library. pljavaLoadPath will
 *        be NULL.
 *    1c. By the first actual use of a PL/Java function, causing this library
 *        to be loaded. pljavaLoadPath will be NULL. The called function's Oid
 *        will be available to the call handler once we return from _PG_init,
 *        but it isn't (easily) available here.
 * 2. From the call handler, if initialization isn't complete yet. That can only
 *    mean something failed in the earlier call to _PG_init, and whatever it was
 *    is highly likely to fail again. That may lead to the untidyness of
 *    duplicated diagnostic messages, but for now I like the belt-and-suspenders
 *    approach of making sure the init sequence gets as many chances as possible
 *    to succeed.
 * 3. From a GUC assign hook, if the user has updated a setting that might allow
 *    initialization to succeed. It resumes from where it left off.
 *
 * In all cases, the sequence must progress as far as starting the VM and
 * initializing the PL/Java classes. In all cases except 1a, that's enough,
 * assuming the language handlers and schema have all been set up already (or,
 * in case 1b, the user is intent on setting them up explicitly).
 *
 * In case 1a, we can go ahead and test for, and create, the schema, functions,
 * and language entries as needed, using pljavaLoadPath as the library path
 * if creating the language handler functions. One-stop shopping. (The presence
 * of pljavaLoadPath in any of the other cases, such as resumption by an assign
 * hook, indicates it is really a continuation of case 1a.)
 */
static void initsequencer(enum initstage is, bool tolerant)
{
	JVMOptList optList;
	Invocation ctx;
	jint JNIresult;
	char *greeting;

	switch (is)
	{
	case IS_FORMLESS_VOID:
		initstage = IS_GUCS_REGISTERED;

	case IS_GUCS_REGISTERED:
		libjvmlocation = strdup("libjvm.so");

		initstage = IS_PLJAVA_ENABLED;

	case IS_PLJAVA_ENABLED:
		libjvm_handle = pg_dlopen(libjvmlocation);
		if ( NULL == libjvm_handle )
		{
			ereport(ERROR, (
				errmsg("Cannot load libjvm.so library, check that it is available in LD_LIBRARY_PATH"),
				errdetail("%s", (char *)pg_dlerror())));
			goto check_tolerant;
		}
		initstage = IS_CAND_JVMOPENED;

	case IS_CAND_JVMOPENED:
		pljava_createvm =
			(jint (JNICALL *)(JavaVM **, void **, void *))
			pg_dlsym(libjvm_handle, "JNI_CreateJavaVM");
		if ( NULL == pljava_createvm )
		{
			/*
			 * If it hasn't got the symbol, it can't be the right
			 * library, so close/unload it so another can be tried.
			 * Format the dlerror string first: dlclose may clobber it.
			 */
			char *dle = MemoryContextStrdup(ErrorContext, pg_dlerror());
			pg_dlclose(libjvm_handle);
			initstage = IS_CAND_JVMLOCATION;
			ereport(ERROR, (
				errmsg("Cannot start Java VM"),
				errdetail("%s", dle),
				errhint("Check that libjvm.so is available in LD_LIBRARY_PATH")));
			goto check_tolerant;
		}
		initstage = IS_CREATEVM_SYM_FOUND;

	case IS_CREATEVM_SYM_FOUND:
		s_javaLogLevel = INFO;
		checkIntTimeType();
		HashMap_initialize(); /* creates things in TopMemoryContext */
#ifdef PLJAVA_DEBUG
		/* Hard setting for debug. Don't forget to recompile...
		 */
		pljava_debug = 1;
#endif
		initstage = IS_MISC_ONCE_DONE;

	case IS_MISC_ONCE_DONE:
		JVMOptList_init(&optList); /* uses CurrentMemoryContext */
		seenVisualVMName = false;
		addUserJVMOptions(&optList);
		if ( ! seenVisualVMName )
			JVMOptList_addVisualVMName(&optList);
		JVMOptList_add(&optList, "vfprintf", (void*)my_vfprintf, true);
#ifndef GCJ
		JVMOptList_add(&optList, "-Xrs", 0, true);
#endif
		effectiveClassPath = getClassPath("-Djava.class.path=");
		if(effectiveClassPath != 0)
		{
			JVMOptList_add(&optList, effectiveClassPath, 0, true);
		}
		initstage = IS_JAVAVM_OPTLIST;

	case IS_JAVAVM_OPTLIST:
		JNIresult = initializeJavaVM(&optList); /* frees the optList */
		if( JNI_OK != JNIresult )
		{
			initstage = IS_MISC_ONCE_DONE; /* optList has been freed */
			StaticAssertStmt(sizeof(jint) <= sizeof(long int),
				"jint wider than long int?!");
			ereport(WARNING,
				(errmsg("failed to create Java virtual machine"),
				 errdetail("JNI_CreateJavaVM returned an error code: %ld",
					(long int)JNIresult),
				 jvmStartedAtLeastOnce ?
					errhint("Because an earlier attempt during this session "
					"did start a VM before failing, this probably means your "
					"Java runtime environment does not support more than one "
					"VM creation per session.  You may need to exit this "
					"session and start a new one.") : 0));
			goto check_tolerant;
		}
		jvmStartedAtLeastOnce = true;
		elog(DEBUG2, "successfully created Java virtual machine");
		initstage = IS_JAVAVM_STARTED;

	case IS_JAVAVM_STARTED:
#ifdef USE_PLJAVA_SIGHANDLERS
		pqsignal(SIGINT,  pljavaStatementCancelHandler);
		pqsignal(SIGTERM, pljavaDieHandler);
#endif
		/* Register an on_proc_exit handler that destroys the VM
		 */
		on_proc_exit(_destroyJavaVM, 0);
		initstage = IS_SIGHANDLERS;

	case IS_SIGHANDLERS:
		Invocation_pushBootContext(&ctx);
		PG_TRY();
		{
			initPLJavaClasses();
			initJavaSession();
			Invocation_popBootContext();
			initstage = IS_PLJAVA_FOUND;
		}
		PG_CATCH();
		{
			MemoryContextSwitchTo(ctx.upperContext); /* leave ErrorContext */
			Invocation_popBootContext();
			initstage = IS_MISC_ONCE_DONE;
			/* We can't stay here...
			 */
			if ( tolerant )
				reLogWithChangedLevel(WARNING); /* so xact is not aborted */
			else
			{
				EmitErrorReport(); /* no more unwinding, just log it */
				/* Seeing an ERROR emitted to the log, without leaving the
				 * transaction aborted, would violate the principle of least
				 * astonishment. But at check_tolerant below, another ERROR will
				 * be thrown immediately, so the transaction effect will be as
				 * expected and this ERROR will contribute information beyond
				 * what is in the generic one thrown down there.
				 */
				FlushErrorState();
			}
		}
		PG_END_TRY();
		if ( IS_PLJAVA_FOUND != initstage )
		{
			/* JVM initialization failed for some reason. Destroy
			 * the VM if it exists. Perhaps the user will try
			 * fixing the pljava.classpath and make a new attempt.
			 */
			ereport(WARNING, (
				errmsg("failed to load initial PL/Java classes"),
				errhint("The most common reason is that \"pljava_classpath\" "
					"needs to be set, naming the proper \"pljava.jar\" file.")
					));
			_destroyJavaVM(0, 0);
			goto check_tolerant;
		}

	case IS_PLJAVA_FOUND:
		greeting = InstallHelper_hello();
		ereport(NULL != pljavaLoadPath ? NOTICE : DEBUG1, (
				errmsg("PL/Java loaded"),
				errdetail("versions:\n%s", greeting)));
		pfree(greeting);
		if ( NULL != pljavaLoadPath )
			InstallHelper_groundwork(); /* sqlj schema, language handlers, ...*/
		initstage = IS_COMPLETE;

	case IS_COMPLETE:
		pljavaLoadingAsExtension = false;
		if ( alteredSettingsWereNeeded )
		{
			/* Use this StringInfoData to conditionally construct part of the
			 * hint string suggesting ALTER DATABASE ... SET ... FROM CURRENT
			 * provided the server is >= 9.2 where that will actually work.
			 * In 9.3, psprintf appeared, which would make this all simpler,
			 * but if 9.3+ were all that had to be supported, this would all
			 * be moot anyway. Doing the initStringInfo inside the ereport
			 * ensures the string is allocated in ErrorContext and won't leak.
			 * Don't remove the extra parens grouping
			 * (initStringInfo, appendStringInfo, errhint) ... with the parens,
			 * that's a comma expression, which is sequenced; without them, they
			 * are just function parameters with evaluation order unknown.
			 */
			StringInfoData buf;
#if PG_VERSION_NUM >= 90200
#define MOREHINT \
				appendStringInfo(&buf, \
					"using ALTER DATABASE %s SET ... FROM CURRENT or ", \
					pljavaDbName()),
#else
#define MOREHINT
#endif
			ereport(NOTICE, (
				errmsg("PL/Java successfully started after adjusting settings"),
				(initStringInfo(&buf),
				MOREHINT
				errhint("The settings that worked should be saved (%s"
					"in the \"%s\" file). For a reminder of what has been set, "
					"try: SELECT name, setting FROM pg_settings WHERE name LIKE"
					" 'pljava.%%' AND source = 'session'",
					buf.data,
					superuser()
						? PG_GETCONFIGOPTION("config_file")
						: "postgresql.conf"))));
#undef MOREHINT
			if ( loadAsExtensionFailed )
			{
				ereport(NOTICE, (errmsg(
					"PL/Java load successful after failed CREATE EXTENSION"),
					errdetail(
					"PL/Java is now installed, but not as an extension."),
					errhint(
					"To correct that, either COMMIT or ROLLBACK, make sure "
					"the working settings are saved, exit this session, and "
					"in a new session, either: "
					"1. if committed, run "
					"\"CREATE EXTENSION pljava FROM unpackaged\", or 2. "
					"if rolled back, simply \"CREATE EXTENSION pljava\" again."
					)));
			}
		}
		return;

	default:
		ereport(ERROR, (
			errmsg("cannot set up PL/Java"),
			errdetail(
				"An unexpected stage was reached in the startup sequence."),
			errhint(
				"Please report the circumstances to the PL/Java maintainers.")
			));
	}

check_tolerant:
	if ( pljavaLoadingAsExtension )
	{
		tolerant = false;
		loadAsExtensionFailed = true;
		pljavaLoadingAsExtension = false;
	}
	if ( !tolerant )
	{
		ereport(ERROR, (
			errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
			errmsg(
				"cannot use PL/Java before successfully completing its setup"),
			errhint(
				"Check the log for messages closely preceding this one, "
				"detailing what step of setup failed and what will be needed, "
				"probably setting one of the \"pljava.\" configuration "
				"variables, to complete the setup. If there is not enough "
				"help in the log, try again with different settings for "
				"\"log_min_messages\" or \"log_error_verbosity\".")));
	}
}
Beispiel #27
0
/* fork lifecheck process*/
static pid_t
fork_a_lifecheck(int fork_wait_time)
{
	pid_t pid;
	sigjmp_buf	local_sigjmp_buf;

	pid = fork();
	if (pid != 0)
	{
		if (pid == -1)
			ereport(ERROR,
					(errmsg("failed to fork a lifecheck process")));
		return pid;
	}
    on_exit_reset();
	processType = PT_LIFECHECK;

	if (fork_wait_time > 0) {
		sleep(fork_wait_time);
	}

	POOL_SETMASK(&UnBlockSig);

	init_ps_display("", "", "", "");

	signal(SIGTERM, wd_exit);	
	signal(SIGINT, wd_exit);	
	signal(SIGQUIT, wd_exit);	
	signal(SIGCHLD, SIG_DFL);
	signal(SIGHUP, SIG_IGN);	
	signal(SIGPIPE, SIG_IGN);

	/* Create per loop iteration memory context */
	ProcessLoopContext = AllocSetContextCreate(TopMemoryContext,
											   "wd_lifecheck_main_loop",
											   ALLOCSET_DEFAULT_MINSIZE,
											   ALLOCSET_DEFAULT_INITSIZE,
											   ALLOCSET_DEFAULT_MAXSIZE);
	
	MemoryContextSwitchTo(TopMemoryContext);

	set_ps_display("lifecheck",false);

	/* wait until ready to go */
	while (WD_OK != is_wd_lifecheck_ready())
	{
		sleep(pool_config->wd_interval * 10);
	}
	ereport(LOG,
			(errmsg("watchdog: lifecheck started")));

	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;
		
		EmitErrorReport();
		MemoryContextSwitchTo(TopMemoryContext);
		FlushErrorState();
		sleep(pool_config->wd_heartbeat_keepalive);
	}
	
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/* watchdog loop */
	for (;;)
	{
		MemoryContextSwitchTo(ProcessLoopContext);
		MemoryContextResetAndDeleteChildren(ProcessLoopContext);

		/* pgpool life check */
		wd_lifecheck();
		sleep(pool_config->wd_interval);
	}

	return pid;
}
void
FileRepSubProcess_Main()
{
	const char *statmsg;

	MemoryContext fileRepSubProcessMemoryContext;

	sigjmp_buf	local_sigjmp_buf;

	MyProcPid = getpid();

	MyStartTime = time(NULL);

	/*
	 * Create a PGPROC so we can use LWLocks in FileRep sub-processes.  The
	 * routine also register clean up at process exit
	 */
	InitAuxiliaryProcess();

	InitBufferPoolBackend();

	FileRepSubProcess_ConfigureSignals();

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		LWLockReleaseAll();

		if (FileRepPrimary_IsResyncManagerOrWorker())
		{
			LockReleaseAll(DEFAULT_LOCKMETHOD, false);
		}

		if (FileRepIsBackendSubProcess(fileRepProcessType))
		{
			AbortBufferIO();
			UnlockBuffers();

			/* buffer pins are released here: */
			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
		}

		/*
		 * We can now go away.	Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	/*
	 * Identify myself via ps
	 */

	statmsg = FileRepProcessTypeToString[fileRepProcessType];

	init_ps_display(statmsg, "", "", "");

	/* Create the memory context where cross-transaction state is stored */
	fileRepSubProcessMemoryContext = AllocSetContextCreate(TopMemoryContext,
														   "filerep subprocess memory context",
														   ALLOCSET_DEFAULT_MINSIZE,
														   ALLOCSET_DEFAULT_INITSIZE,
														   ALLOCSET_DEFAULT_MAXSIZE);

	MemoryContextSwitchTo(fileRepSubProcessMemoryContext);

	stateChangeRequestCounter++;

	FileRepSubProcess_ProcessSignals();

	switch (fileRepProcessType)
	{
		case FileRepProcessTypePrimarySender:
			FileRepPrimary_StartSender();
			break;

		case FileRepProcessTypeMirrorReceiver:
			FileRepMirror_StartReceiver();
			break;

		case FileRepProcessTypeMirrorConsumer:
		case FileRepProcessTypeMirrorConsumerWriter:
		case FileRepProcessTypeMirrorConsumerAppendOnly1:
			FileRepMirror_StartConsumer();
			break;

		case FileRepProcessTypeMirrorSenderAck:
			FileRepAckMirror_StartSender();
			break;

		case FileRepProcessTypePrimaryReceiverAck:
			FileRepAckPrimary_StartReceiver();
			break;

		case FileRepProcessTypePrimaryConsumerAck:
			FileRepAckPrimary_StartConsumer();
			break;

		case FileRepProcessTypePrimaryRecovery:
			FileRepSubProcess_InitProcess();

			/*
			 * At this point, database is starting up and xlog is not yet
			 * replayed.  Initializing relcache now is dangerous, a sequential
			 * scan of catalog tables may end up with incorrect hint bits.
			 * E.g. a committed transaction's dirty heap pages made it to disk
			 * but pg_clog update was still in memory and we crashed.  If a
			 * tuple inserted by this transaction is read during relcache
			 * initialization, status of the tuple's xmin will be incorrectly
			 * determined as "not commited" from pg_clog. And
			 * HEAP_XMIN_INVALID hint bit will be set, rendering the tuple
			 * perpetually invisible.  Relcache initialization must be
			 * deferred to only after all of xlog has been replayed.
			 */
			FileRepPrimary_StartRecovery();

			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
			break;

		case FileRepProcessTypeResyncManager:
			FileRepSubProcess_InitProcess();
			FileRepPrimary_StartResyncManager();

			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
			break;

		case FileRepProcessTypeResyncWorker1:
		case FileRepProcessTypeResyncWorker2:
		case FileRepProcessTypeResyncWorker3:
		case FileRepProcessTypeResyncWorker4:
			FileRepSubProcess_InitProcess();
			FileRepPrimary_StartResyncWorker();

			ResourceOwnerRelease(CurrentResourceOwner,
								 RESOURCE_RELEASE_BEFORE_LOCKS,
								 false, true);
			break;

		default:
			elog(PANIC, "unrecognized process type: %s(%d)",
				 statmsg, fileRepProcessType);
			break;
	}

	switch (FileRepSubProcess_GetState())
	{
		case FileRepStateShutdown:
		case FileRepStateReady:
			proc_exit(0);
			break;

		default:
			proc_exit(2);
			break;
	}
}
Beispiel #29
0
Datei: fts.c Projekt: LJoNe/gpdb
/*
 * FtsProbeMain
 */
NON_EXEC_STATIC void
ftsMain(int argc, char *argv[])
{
	sigjmp_buf	local_sigjmp_buf;
	char	   *fullpath;

	IsUnderPostmaster = true;
	am_ftsprobe = true;

	/* Stay away from PMChildSlot */
	MyPMChildSlot = -1;

	/* reset MyProcPid */
	MyProcPid = getpid();
	
	/* Lose the postmaster's on-exit routines */
	on_exit_reset();

	/* Identify myself via ps */
	init_ps_display("ftsprobe process", "", "", "");

	SetProcessingMode(InitProcessing);

	/*
	 * reread postgresql.conf if requested
	 */
	pqsignal(SIGHUP, sigHupHandler);

	/*
	 * Presently, SIGINT will lead to autovacuum shutdown, because that's how
	 * we handle ereport(ERROR).  It could be improved however.
	 */
	pqsignal(SIGINT, ReqFtsFullScan);		/* request full-scan */
	pqsignal(SIGTERM, die);
	pqsignal(SIGQUIT, quickdie); /* we don't do any ftsprobe specific cleanup, just use the standard. */
	pqsignal(SIGALRM, handle_sig_alarm);

	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
	/* We don't listen for async notifies */
	pqsignal(SIGUSR2, RequestShutdown);
	pqsignal(SIGFPE, FloatExceptionHandler);
	pqsignal(SIGCHLD, SIG_DFL);

	/*
	 * Copied from bgwriter
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "FTS Probe");

	/* Early initialization */
	BaseInit();

	/* See InitPostgres()... */
	InitProcess();	
	InitBufferPoolBackend();
	InitXLOGAccess();

	SetProcessingMode(NormalProcessing);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * We can now go away.	Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	/*
	 * Add my PGPROC struct to the ProcArray.
	 *
	 * Once I have done this, I am visible to other backends!
	 */
	InitProcessPhase2();

	/*
	 * Initialize my entry in the shared-invalidation manager's array of
	 * per-backend data.
	 *
	 * Sets up MyBackendId, a unique backend identifier.
	 */
	MyBackendId = InvalidBackendId;

	SharedInvalBackendInit(false);

	if (MyBackendId > MaxBackends || MyBackendId <= 0)
		elog(FATAL, "bad backend id: %d", MyBackendId);

	/*
	 * bufmgr needs another initialization call too
	 */
	InitBufferPoolBackend();

	/* heap access requires the rel-cache */
	RelationCacheInitialize();
	InitCatalogCache();

	/*
	 * It's now possible to do real access to the system catalogs.
	 *
	 * Load relcache entries for the system catalogs.  This must create at
	 * least the minimum set of "nailed-in" cache entries.
	 */
	RelationCacheInitializePhase2();

	/*
	 * In order to access the catalog, we need a database, and a
	 * tablespace; our access to the heap is going to be slightly
	 * limited, so we'll just use some defaults.
	 */
	if (!FindMyDatabase(probeDatabase, &MyDatabaseId, &MyDatabaseTableSpace))
		ereport(FATAL,
				(errcode(ERRCODE_UNDEFINED_DATABASE),
				 errmsg("database \"%s\" does not exit", probeDatabase)));

	/* Now we can mark our PGPROC entry with the database ID */
	/* (We assume this is an atomic store so no lock is needed) */
	MyProc->databaseId = MyDatabaseId;

	fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);

	SetDatabasePath(fullpath);

	RelationCacheInitializePhase3();

	/* shmem: publish probe pid */
	ftsProbeInfo->fts_probePid = MyProcPid;

	/* main loop */
	FtsLoop();

	/* One iteration done, go away */
	proc_exit(0);
}