Esempio n. 1
0
/*
 * Initialize workspace for a worker process: create the schema if it doesn't
 * already exist.
 */
static void
initialize_worker_spi(worktable *table)
{
	int			ret;
	int			ntup;
	bool		isnull;
	StringInfoData buf;

	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	SPI_connect();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, "initializing spi_worker schema");

	/* XXX could we use CREATE SCHEMA IF NOT EXISTS? */
	initStringInfo(&buf);
	appendStringInfo(&buf, "select count(*) from pg_namespace where nspname = '%s'",
					 table->schema);

	ret = SPI_execute(buf.data, true, 0);
	if (ret != SPI_OK_SELECT)
		elog(FATAL, "SPI_execute failed: error code %d", ret);

	if (SPI_processed != 1)
		elog(FATAL, "not a singleton result");

	ntup = DatumGetInt64(SPI_getbinval(SPI_tuptable->vals[0],
									   SPI_tuptable->tupdesc,
									   1, &isnull));
	if (isnull)
		elog(FATAL, "null result");

	if (ntup == 0)
	{
		resetStringInfo(&buf);
		appendStringInfo(&buf,
						 "CREATE SCHEMA \"%s\" "
						 "CREATE TABLE \"%s\" ("
			   "		type text CHECK (type IN ('total', 'delta')), "
						 "		value	integer)"
				  "CREATE UNIQUE INDEX \"%s_unique_total\" ON \"%s\" (type) "
						 "WHERE type = 'total'",
					   table->schema, table->name, table->name, table->name);

		/* set statement start time */
		SetCurrentStatementStartTimestamp();

		ret = SPI_execute(buf.data, false, 0);

		if (ret != SPI_OK_UTILITY)
			elog(FATAL, "failed to create my schema");
	}

	SPI_finish();
	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);
}
Esempio n. 2
0
static void
execute_pg_settings_logger(config_log_objects *objects) {

	int		ret;
	bool	isnull;
	StringInfoData	buf;


	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	SPI_connect();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, "executing configuration logger function");

	initStringInfo(&buf);

	appendStringInfo(
		&buf,
		"SELECT %s.%s()",
		config_log_schema,
		objects->function_name
		);

	ret = SPI_execute(buf.data, false, 0);
	if (ret != SPI_OK_SELECT)
	{
		elog(FATAL, "SPI_execute failed: error code %d", ret);
	}

	if (SPI_processed != 1)
	{
		elog(FATAL, "not a singleton result");
	}

	log_info("pg_settings_logger() executed");

	if(DatumGetBool(SPI_getbinval(SPI_tuptable->vals[0],
				  SPI_tuptable->tupdesc,
				  1, &isnull)))
	{
		log_info("Configuration changes recorded");
	}
	else
	{
		log_info("No configuration changes detected");
	}

   	SPI_finish();
	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);
}
Esempio n. 3
0
void worker_main(Datum arg)
{
	int 			ret;
	StringInfoData 	buf;
	uint32 			segment = UInt32GetDatum(arg);

	/* Setup signal handlers */
	pqsignal(SIGHUP, worker_sighup);
	pqsignal(SIGTERM, worker_sigterm);

	/* Allow signals */
	BackgroundWorkerUnblockSignals();

	initialize_worker(segment);

	/* Connect to the database */
	BackgroundWorkerInitializeConnection(job->datname, job->rolname);

	elog(LOG, "%s initialized running job id %d", MyBgworkerEntry->bgw_name, job->job_id);
	pgstat_report_appname(MyBgworkerEntry->bgw_name);

	/* Initialize the query text */
	initStringInfo(&buf);
	appendStringInfo(&buf,
					"SELECT * FROM %s.%s(%d, NULL)",
					job_run_function.schema,
					job_run_function.name,
					job->job_id);

	/* Initialize the SPI subsystem */
	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	SPI_connect();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, buf.data);

	SetCurrentStatementStartTimestamp();

	/* And run the query */
	ret = SPI_execute(buf.data, true, 0);
	if (ret < 0)
		elog(FATAL, "errors while executing %s", buf.data);

	/* Commmit the transaction */
	SPI_finish();
	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);

	proc_exit(0);
}
Esempio n. 4
0
static void
cq_bgproc_main(Datum arg)
{
	void (*run) (void);
	int default_priority = getpriority(PRIO_PROCESS, MyProcPid);
	int priority;

	MyContQueryProc = (ContQueryProc *) DatumGetPointer(arg);

	BackgroundWorkerUnblockSignals();
	BackgroundWorkerInitializeConnection(NameStr(MyContQueryProc->group->db_name), NULL);

	/* if we got a cancel signal in prior command, quit */
	CHECK_FOR_INTERRUPTS();

	MyContQueryProc->latch = &MyProc->procLatch;

	ereport(LOG, (errmsg("continuous query process \"%s\" running with pid %d", GetContQueryProcName(MyContQueryProc), MyProcPid)));
	pgstat_report_activity(STATE_RUNNING, GetContQueryProcName(MyContQueryProc));

	/*
	 * The BackgroundWorkerHandle's slot is always in the range [0, max_worker_processes)
	 * and will be unique for any background process being run. We use this knowledge to
	 * assign our continuous query processes's a unique ID that fits within any TupleBuffer's
	 * waiters Bitmapset.
	 */
	MyContQueryProc->id = MyContQueryProc->handle.slot;

	/*
	 * Be nice!
	 *
	 * More is less here. A higher number indicates a lower scheduling priority.
	 */
	priority = Max(default_priority, MAX_PRIORITY - ceil(continuous_query_proc_priority * (MAX_PRIORITY - default_priority)));
	priority = nice(priority);

	switch (MyContQueryProc->type)
	{
	case Combiner:
		am_cont_combiner = true;
		run = &ContinuousQueryCombinerMain;
		break;
	case Worker:
		am_cont_worker = true;
		run = &ContinuousQueryWorkerMain;
		break;
	default:
		ereport(ERROR, (errmsg("continuous queries can only be run as worker or combiner processes")));
	}

	/* initialize process level CQ stats */
	cq_stat_init(&MyProcCQStats, 0, MyProcPid);

	run();

	/* purge proc level CQ stats */
	cq_stat_send_purge(0, MyProcPid, IsContQueryWorkerProcess() ? CQ_STAT_WORKER : CQ_STAT_COMBINER);
}
Esempio n. 5
0
/*
 *  Ensure that the environment is sane.  
 *    This involves checking the Postgresql version, and if in network mode
 *      also establishing a connection to a receiver.
*/
int ensure_valid_environment(void) {
  StringInfoData buf;
  int     retval;
	char* pgversion;
	
  SPITupleTable *coltuptable;
 
  SetCurrentStatementStartTimestamp();
  StartTransactionCommand();
  SPI_connect();
  PushActiveSnapshot(GetTransactionSnapshot());
  
  /* Ensure compatible version */
  pgstat_report_activity(STATE_RUNNING, "verifying compatible postgres version");

  initStringInfo(&buf);
  appendStringInfo(&buf, 
    "select version();"
  );
  retval = SPI_execute(buf.data, false, 0);
  if (retval != SPI_OK_SELECT) {
    elog(FATAL, "Unable to query postgres version %d", retval);
    SPI_finish();
		PopActiveSnapshot();
		CommitTransactionCommand();
  	return 1;  
  }
  
	coltuptable = SPI_tuptable;
	pgversion = SPI_getvalue(coltuptable->vals[0], coltuptable->tupdesc, 1);
	
  if(strstr(pgversion, "PostgreSQL 9.3") == NULL) {
    elog(FATAL, "Unsupported Postgresql version");
    SPI_finish();
		PopActiveSnapshot();
		CommitTransactionCommand();
  	return 1;
	}
  
	SPI_finish();
	PopActiveSnapshot();
	CommitTransactionCommand();
  
  /*
   * Attempt to establish a connection if the output mode is network.
   */
  if (strcmp(output_mode, "network") == 0) {
		retval = establish_connection();
		if (retval == 2) {
			elog(LOG, "Error : Failed to connect to antenna please check domain is available from host.");
		}
	}
	
	//TODO verify logging directory is accessible when csv mode.
	
  elog(LOG, "Pgsampler Initialized");
  return 0;
}
Esempio n. 6
0
/*
 * Handle BEGIN message.
 */
static void
apply_handle_begin(StringInfo s)
{
	LogicalRepBeginData begin_data;

	logicalrep_read_begin(s, &begin_data);

	remote_final_lsn = begin_data.final_lsn;

	in_remote_transaction = true;

	pgstat_report_activity(STATE_RUNNING, NULL);
}
Esempio n. 7
0
/*
 * Handle COMMIT message.
 *
 * TODO, support tracking of multiple origins
 */
static void
apply_handle_commit(StringInfo s)
{
	LogicalRepCommitData commit_data;

	logicalrep_read_commit(s, &commit_data);

	Assert(commit_data.commit_lsn == remote_final_lsn);

	/* The synchronization worker runs in single transaction. */
	if (IsTransactionState() && !am_tablesync_worker())
	{
		/*
		 * Update origin state so we can restart streaming from correct
		 * position in case of crash.
		 */
		replorigin_session_origin_lsn = commit_data.end_lsn;
		replorigin_session_origin_timestamp = commit_data.committime;

		CommitTransactionCommand();
		pgstat_report_stat(false);

		store_flush_position(commit_data.end_lsn);
	}
	else
	{
		/* Process any invalidation messages that might have accumulated. */
		AcceptInvalidationMessages();
		maybe_reread_subscription();
	}

	in_remote_transaction = false;

	/* Process any tables that are being synchronized in parallel. */
	process_syncing_tables(commit_data.end_lsn);

	pgstat_report_activity(STATE_IDLE, NULL);
}
Esempio n. 8
0
void
worker_spi_main(Datum main_arg)
{
	int			index = DatumGetInt32(main_arg);
	worktable  *table;
	StringInfoData buf;
	char		name[20];

	table = palloc(sizeof(worktable));
	sprintf(name, "schema%d", index);
	table->schema = pstrdup(name);
	table->name = pstrdup("counted");

	/* Establish signal handlers before unblocking signals. */
	pqsignal(SIGHUP, worker_spi_sighup);
	pqsignal(SIGTERM, worker_spi_sigterm);

	/* We're now ready to receive signals */
	BackgroundWorkerUnblockSignals();

	/* Connect to our database */
	BackgroundWorkerInitializeConnection("postgres", NULL);

	elog(LOG, "%s initialized with %s.%s",
		 MyBgworkerEntry->bgw_name, table->schema, table->name);
	initialize_worker_spi(table);

	/*
	 * Quote identifiers passed to us.  Note that this must be done after
	 * initialize_worker_spi, because that routine assumes the names are not
	 * quoted.
	 *
	 * Note some memory might be leaked here.
	 */
	table->schema = quote_identifier(table->schema);
	table->name = quote_identifier(table->name);

	initStringInfo(&buf);
	appendStringInfo(&buf,
					 "WITH deleted AS (DELETE "
					 "FROM %s.%s "
					 "WHERE type = 'delta' RETURNING value), "
					 "total AS (SELECT coalesce(sum(value), 0) as sum "
					 "FROM deleted) "
					 "UPDATE %s.%s "
					 "SET value = %s.value + total.sum "
					 "FROM total WHERE type = 'total' "
					 "RETURNING %s.value",
					 table->schema, table->name,
					 table->schema, table->name,
					 table->name,
					 table->name);

	/*
	 * Main loop: do this until the SIGTERM handler tells us to terminate
	 */
	while (!got_sigterm)
	{
		int			ret;
		int			rc;

		/*
		 * Background workers mustn't call usleep() or any direct equivalent:
		 * instead, they may wait on their process latch, which sleeps as
		 * necessary, but is awakened if postmaster dies.  That way the
		 * background process goes away immediately in an emergency.
		 */
		rc = WaitLatch(&MyProc->procLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   worker_spi_naptime * 1000L);
		ResetLatch(&MyProc->procLatch);

		/* emergency bailout if postmaster has died */
		if (rc & WL_POSTMASTER_DEATH)
			proc_exit(1);

		/*
		 * In case of a SIGHUP, just reload the configuration.
		 */
		if (got_sighup)
		{
			got_sighup = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		/*
		 * Start a transaction on which we can run queries.  Note that each
		 * StartTransactionCommand() call should be preceded by a
		 * SetCurrentStatementStartTimestamp() call, which sets both the time
		 * for the statement we're about the run, and also the transaction
		 * start time.  Also, each other query sent to SPI should probably be
		 * preceded by SetCurrentStatementStartTimestamp(), so that statement
		 * start time is always up to date.
		 *
		 * The SPI_connect() call lets us run queries through the SPI manager,
		 * and the PushActiveSnapshot() call creates an "active" snapshot
		 * which is necessary for queries to have MVCC data to work on.
		 *
		 * The pgstat_report_activity() call makes our activity visible
		 * through the pgstat views.
		 */
		SetCurrentStatementStartTimestamp();
		StartTransactionCommand();
		SPI_connect();
		PushActiveSnapshot(GetTransactionSnapshot());
		pgstat_report_activity(STATE_RUNNING, buf.data);

		/* We can now execute queries via SPI */
		ret = SPI_execute(buf.data, false, 0);

		if (ret != SPI_OK_UPDATE_RETURNING)
			elog(FATAL, "cannot select from table %s.%s: error code %d",
				 table->schema, table->name, ret);

		if (SPI_processed > 0)
		{
			bool		isnull;
			int32		val;

			val = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
											  SPI_tuptable->tupdesc,
											  1, &isnull));
			if (!isnull)
				elog(LOG, "%s: count in %s.%s is now %d",
					 MyBgworkerEntry->bgw_name,
					 table->schema, table->name, val);
		}

		/*
		 * And finish our transaction.
		 */
		SPI_finish();
		PopActiveSnapshot();
		CommitTransactionCommand();
		pgstat_report_activity(STATE_IDLE, NULL);
	}

	proc_exit(1);
}
Esempio n. 9
0
/*
 * This is the function called from the main polling loop.
 *
 * It calls command functions to get strings of data, and sends them to the server.
 *
*/
int collect_and_send_metrics(int cycle) {
	int retval;
	char* command;
	
	StringInfoData commands;
	
	pgstat_report_activity(STATE_RUNNING, "Collecting metrics");
	
	initStringInfo(&commands);
	/*
	 * Populate first cycle command data.	These are executed on the first cycle
	 *	 of a restart.	The bgworker restars every N cycles, as listed at the bottom
	 *	 of the main loop in pgsampler.c.	
	 */
	if (cycle == 0) {
	
		command = restart_gucs();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("restart_gucs", command);			
		pfree(command);

		command = relation_info();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_class", command);
		pfree(command);

		command = database_list();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("databases", command);
		pfree(command);
		
		command = column_info();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("columns", command);
		pfree(command);

		command = index_info();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("indexes", command);
		pfree(command);

		command = column_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_column", command);
		pfree(command);

		command = db_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_database", command);
		pfree(command);

	}

	/* HEARTBEAT */
	if (cycle % heartbeat_seconds == 0) {
		command = heartbeat();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("heartbeats", command);
		pfree(command);
	}

	/* SYSTEM INFO */
	if (cycle % system_seconds == 0) {
		command = system_info();	
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_system", command);
		pfree(command);

		command = fs_info();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_filesystem", command);
		pfree(command);
	}

	/* */
	if (cycle % activity_seconds == 0) {
		command = activity_stats(); 
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_activity", command);
		pfree(command);
	}

	if (cycle % replication_seconds == 0) {
		command = replication_stats(); 
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_replication", command);
		pfree(command);
	}

	/*	*/
	if (cycle % bgwriter_seconds == 0) {
		command = bgwriter_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_bgwriter", command);
		pfree(command);
	}
	
	if (cycle % guc_seconds == 0) {
		command = transient_gucs();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("transient_gucs", command);
		pfree(command);
	}
	
	if (cycle % statements_seconds == 0) {
		command = stat_statements();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_statements", command);
		pfree(command);
	}

	/* */	
	if (cycle % relation_seconds == 0) {
		command = table_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_table", command);
		pfree(command);
		
		command = index_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_index", command);
		pfree(command);
		
		command = table_io_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("statio_user_tables", command);
		pfree(command);

		command = index_io_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("statio_user_indexes", command);
		pfree(command);

		command = function_stats();
		appendStringInfoString(&commands, command);
		if (strcmp(output_mode, "csv") == 0) 
			write_to_csv("stat_function", command);
		pfree(command);
	}



	/* Send / Write metrics based on output_mode */
	if (strcmp(output_mode, "network") == 0) {
		pgstat_report_activity(STATE_RUNNING, "Sending metrics to antenna");
		retval = send_data(commands.data);
		if (retval == NO_DATA_SENT) { //close socket and retry establishing connection and sending data.
		 	// elog(LOG, "reseting..."); //just a note to say reseting socket
			if (sockfd != 0) 
				shutdown(sockfd, SHUT_RDWR);

			sockfd = 0;
			retval = send_data(commands.data); // we ignore success or failure here.	drops data if fails.
		}
	}

	return 0;
}
Esempio n. 10
0
void
worker_test_main(Datum main_arg)
{
	dsm_segment *seg;
	volatile test_shm_mq_header *hdr;
	PGPROC *registrant;

	pqsignal(SIGHUP,  handle_sighup);
	pqsignal(SIGTERM, handle_sigterm);

	BackgroundWorkerUnblockSignals();

	printf("worker_test_main: %d\n", DatumGetInt32(main_arg));

	CurrentResourceOwner = ResourceOwnerCreate(NULL, "worker test");

	seg = dsm_attach(DatumGetInt32(main_arg));

	if (seg == NULL)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("unable to map dynamic shared memory segment")));

	hdr = dsm_segment_address(seg);

	/* 開始 */
	SpinLockAcquire(&hdr->mutex);
	hdr->workers_ready++;
	hdr->workers_attached++;
	SpinLockRelease(&hdr->mutex);

	registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid);
	if (registrant == NULL)
	{
		elog(DEBUG1, "registrant backend has exited prematurely");
		proc_exit(1);
	}
	SetLatch(&registrant->procLatch);

	/* Do the work */

	BackgroundWorkerInitializeConnection(hdr->dbname, NULL);

	printf("DSM: %p\n", dsm_segment_address);

#if 0
	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	SPI_connect();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, "initializing spi_worker schema");	

	SPI_finish();
	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);
#endif

	dsm_detach(seg);

	proc_exit(0);
}
Esempio n. 11
0
/*
 * ContinuousQueryWorkerStartup
 *
 * Launches a CQ worker, which continuously generates partial query results to send
 * back to the combiner process.
 */
void
ContinuousQueryWorkerRun(Portal portal, ContinuousViewState *state, QueryDesc *queryDesc, ResourceOwner owner)
{
	EState	   *estate = NULL;
	DestReceiver *dest;
	CmdType		operation;
	MemoryContext oldcontext;
	int timeoutms = state->maxwaitms;
	MemoryContext runcontext;
	CQProcEntry *entry = GetCQProcEntry(MyCQId);
	ResourceOwner cqowner = ResourceOwnerCreate(NULL, "CQResourceOwner");
	bool savereadonly = XactReadOnly;

	cq_stat_initialize(state->viewid, MyProcPid);

	dest = CreateDestReceiver(DestCombiner);
	SetCombinerDestReceiverParams(dest, MyCQId);

	/* workers only need read-only transactions */
	XactReadOnly = true;

	runcontext = AllocSetContextCreate(TopMemoryContext, "CQRunContext",
			ALLOCSET_DEFAULT_MINSIZE,
			ALLOCSET_DEFAULT_INITSIZE,
			ALLOCSET_DEFAULT_MAXSIZE);

	elog(LOG, "\"%s\" worker %d running", queryDesc->plannedstmt->cq_target->relname, MyProcPid);
	MarkWorkerAsRunning(MyCQId, MyWorkerId);
	pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText);

	TupleBufferInitLatch(WorkerTupleBuffer, MyCQId, MyWorkerId, &MyProc->procLatch);

	oldcontext = MemoryContextSwitchTo(runcontext);

retry:
	PG_TRY();
	{
		bool xact_commit = true;
		TimestampTz last_process = GetCurrentTimestamp();
		TimestampTz last_commit = GetCurrentTimestamp();

		start_executor(queryDesc, runcontext, cqowner);

		CurrentResourceOwner = cqowner;

		estate = queryDesc->estate;
		operation = queryDesc->operation;

		/*
		 * Initialize context that lives for the duration of a single iteration
		 * of the main worker loop
		 */
		CQExecutionContext = AllocSetContextCreate(estate->es_query_cxt, "CQExecutionContext",
				ALLOCSET_DEFAULT_MINSIZE,
				ALLOCSET_DEFAULT_INITSIZE,
				ALLOCSET_DEFAULT_MAXSIZE);

		estate->es_lastoid = InvalidOid;

		/*
		 * Startup combiner receiver
		 */
		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);

		for (;;)
		{
			if (!TupleBufferHasUnreadSlots())
			{
				if (TimestampDifferenceExceeds(last_process, GetCurrentTimestamp(), state->emptysleepms))
				{
					/* force stats flush */
					cq_stat_report(true);

					pgstat_report_activity(STATE_IDLE, queryDesc->sourceText);
					TupleBufferWait(WorkerTupleBuffer, MyCQId, MyWorkerId);
					pgstat_report_activity(STATE_RUNNING, queryDesc->sourceText);
				}
				else
					pg_usleep(Min(WAIT_SLEEP_MS, state->emptysleepms) * 1000);
			}

			TupleBufferResetNotify(WorkerTupleBuffer, MyCQId, MyWorkerId);

			if (xact_commit)
				StartTransactionCommand();

			set_snapshot(estate, cqowner);
			CurrentResourceOwner = cqowner;
			MemoryContextSwitchTo(estate->es_query_cxt);

			estate->es_processed = 0;
			estate->es_filtered = 0;

			/*
			 * Run plan on a microbatch
			 */
			ExecutePlan(estate, queryDesc->planstate, operation,
					true, 0, timeoutms, ForwardScanDirection, dest);

			IncrementCQExecutions(1);
			TupleBufferClearPinnedSlots();

			if (state->long_xact)
			{
				if (TimestampDifferenceExceeds(last_commit, GetCurrentTimestamp(), LONG_RUNNING_XACT_DURATION))
					xact_commit = true;
				else
					xact_commit = false;
			}

			unset_snapshot(estate, cqowner);
			if (xact_commit)
			{
				CommitTransactionCommand();
				last_commit = GetCurrentTimestamp();
			}

			MemoryContextResetAndDeleteChildren(CQExecutionContext);
			MemoryContextSwitchTo(runcontext);
			CurrentResourceOwner = cqowner;

			if (estate->es_processed || estate->es_filtered)
			{
				/*
				 * If the CV query is such that the select does not return any tuples
				 * ex: select id where id=99; and id=99 does not exist, then this reset
				 * will fail. What will happen is that the worker will block at the latch for every
				 * allocated slot, TILL a cv returns a non-zero tuple, at which point
				 * the worker will resume a simple sleep for the threshold time.
				 */
				last_process = GetCurrentTimestamp();

				/*
				 * Send stats to the collector
				 */
				cq_stat_report(false);
			}

			/* Has the CQ been deactivated? */
			if (!entry->active)
			{
				if (ActiveSnapshotSet())
					unset_snapshot(estate, cqowner);
				if (IsTransactionState())
					CommitTransactionCommand();
				break;
			}
		}

		CurrentResourceOwner = cqowner;

		/*
		 * The cleanup functions below expect these things to be registered
		 */
		RegisterSnapshotOnOwner(estate->es_snapshot, cqowner);
		RegisterSnapshotOnOwner(queryDesc->snapshot, cqowner);
		RegisterSnapshotOnOwner(queryDesc->crosscheck_snapshot, cqowner);

		/* cleanup */
		ExecutorFinish(queryDesc);
		ExecutorEnd(queryDesc);
		FreeQueryDesc(queryDesc);
	}
	PG_CATCH();
	{
		EmitErrorReport();
		FlushErrorState();

		/* Since the worker is read-only, we can simply commit the transaction. */
		if (ActiveSnapshotSet())
			unset_snapshot(estate, cqowner);
		if (IsTransactionState())
			CommitTransactionCommand();

		TupleBufferUnpinAllPinnedSlots();
		TupleBufferClearReaders();

		/* This resets the es_query_ctx and in turn the CQExecutionContext */
		MemoryContextResetAndDeleteChildren(runcontext);

		IncrementCQErrors(1);

		if (continuous_query_crash_recovery)
			goto retry;
	}
	PG_END_TRY();

	(*dest->rShutdown) (dest);

	MemoryContextSwitchTo(oldcontext);
	MemoryContextDelete(runcontext);

	XactReadOnly = savereadonly;

	/*
	 * Remove proc-level stats
	 */
	cq_stat_report(true);
	cq_stat_send_purge(state->viewid, MyProcPid, CQ_STAT_WORKER);

	CurrentResourceOwner = owner;
}
Esempio n. 12
0
static void
kill_idle_main(Datum main_arg)
{
	StringInfoData buf;

	/* Register functions for SIGTERM/SIGHUP management */
	pqsignal(SIGHUP, kill_idle_sighup);
	pqsignal(SIGTERM, kill_idle_sigterm);

	/* We're now ready to receive signals */
	BackgroundWorkerUnblockSignals();

	/* Connect to a database */
	BackgroundWorkerInitializeConnection("postgres", NULL);

	/* Build query for process */
	initStringInfo(&buf);
	kill_idle_build_query(&buf);

	while (!got_sigterm)
	{
		int rc, ret, i;

		/* Wait necessary amount of time */
		rc = WaitLatch(&MyProc->procLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   kill_max_idle_time * 1000L,
					   PG_WAIT_EXTENSION);
		ResetLatch(&MyProc->procLatch);

		/* Emergency bailout if postmaster has died */
		if (rc & WL_POSTMASTER_DEATH)
			proc_exit(1);

		/* Process signals */
		if (got_sighup)
		{
			int old_interval;
			/* Save old value of kill interval */
			old_interval = kill_max_idle_time;

			/* Process config file */
			ProcessConfigFile(PGC_SIGHUP);
			got_sighup = false;
			ereport(LOG, (errmsg("bgworker kill_idle signal: processed SIGHUP")));

			/* Rebuild query if necessary */
			if (old_interval != kill_max_idle_time)
			{
				resetStringInfo(&buf);
				initStringInfo(&buf);
				kill_idle_build_query(&buf);
			}
		}

		if (got_sigterm)
		{
			/* Simply exit */
			ereport(LOG, (errmsg("bgworker kill_idle signal: processed SIGTERM")));
			proc_exit(0);
		}

		/* Process idle connection kill */
		SetCurrentStatementStartTimestamp();
		StartTransactionCommand();
		SPI_connect();
		PushActiveSnapshot(GetTransactionSnapshot());
		pgstat_report_activity(STATE_RUNNING, buf.data);

		/* Statement start time */
		SetCurrentStatementStartTimestamp();

		/* Execute query */
		ret = SPI_execute(buf.data, false, 0);

		/* Some error handling */
		if (ret != SPI_OK_SELECT)
			elog(FATAL, "Error when trying to kill idle connections");

		/* Do some processing and log stuff disconnected */
		for (i = 0; i < SPI_processed; i++)
		{
			int32 pidValue;
			bool isnull;
			char *datname = NULL;
			char *usename = NULL;
			char *client_addr = NULL;

			/* Fetch values */
			pidValue = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i],
												   SPI_tuptable->tupdesc,
												   1, &isnull));
			usename = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i],
													SPI_tuptable->tupdesc,
													3, &isnull));
			datname = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i],
													SPI_tuptable->tupdesc,
													4, &isnull));
			client_addr = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i],
														SPI_tuptable->tupdesc,
														5, &isnull));

			/* Log what has been disconnected */
			elog(LOG, "Disconnected idle connection: PID %d %s/%s/%s",
				 pidValue, datname ? datname : "none",
				 usename ? usename : "none",
				 client_addr ? client_addr : "none");
		}

		SPI_finish();
		PopActiveSnapshot();
		CommitTransactionCommand();
		pgstat_report_activity(STATE_IDLE, NULL);
	}

	/* No problems, so clean exit */
	proc_exit(0);
}
Esempio n. 13
0
/*
 * worker logic
 */
void
wed_worker_main(Datum main_arg)
{
	StringInfoData buf;
    
    
	/* Establish signal handlers before unblocking signals. */
	pqsignal(SIGHUP, wed_worker_sighup);
	pqsignal(SIGTERM, wed_worker_sigterm);

	/* We're now ready to receive signals */
	BackgroundWorkerUnblockSignals();

	/* Connect to our database */
	BackgroundWorkerInitializeConnection(wed_worker_db_name, NULL);

	elog(LOG, "%s initialized in: %s",
		 MyBgworkerEntry->bgw_name, wed_worker_db_name);

	initStringInfo(&buf);
	appendStringInfo(&buf, "SELECT trcheck()");

	/*
	 * Main loop: do this until the SIGTERM handler tells us to terminate
	 */
	while (!got_sigterm)
	{
		int			ret;
		int			rc;

		/*
		 * Background workers mustn't call usleep() or any direct equivalent:
		 * instead, they may wait on their process latch, which sleeps as
		 * necessary, but is awakened if postmaster dies.  That way the
		 * background process goes away immediately in an emergency.
		 */
		rc = WaitLatch(&MyProc->procLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   wed_worker_naptime * 1000L);
		ResetLatch(&MyProc->procLatch);

		/* emergency bailout if postmaster has died */
		if (rc & WL_POSTMASTER_DEATH)
			proc_exit(1);

		/*
		 * In case of a SIGHUP, just reload the configuration.
		 */
		if (got_sighup)
		{
			got_sighup = false;
			ProcessConfigFile(PGC_SIGHUP);
		}

		/*
		 * Start a transaction on which we can run queries.  Note that each
		 * StartTransactionCommand() call should be preceded by a
		 * SetCurrentStatementStartTimestamp() call, which sets both the time
		 * for the statement we're about the run, and also the transaction
		 * start time.  Also, each other query sent to SPI should probably be
		 * preceded by SetCurrentStatementStartTimestamp(), so that statement
		 * start time is always up to date.
		 *
		 * The SPI_connect() call lets us run queries through the SPI manager,
		 * and the PushActiveSnapshot() call creates an "active" snapshot
		 * which is necessary for queries to have MVCC data to work on.
		 *
		 * The pgstat_report_activity() call makes our activity visible
		 * through the pgstat views.
		 */
		SetCurrentStatementStartTimestamp();
		StartTransactionCommand();
		SPI_connect();
		PushActiveSnapshot(GetTransactionSnapshot());
		pgstat_report_activity(STATE_RUNNING, buf.data);

		/* We can now execute queries via SPI */
		ret = SPI_execute(buf.data, false, 0);

		if (ret != SPI_OK_SELECT)
			elog(FATAL, "stored procedure trcheck() not found: error code %d", ret);

        elog(LOG, "%s : trcheck() done !", MyBgworkerEntry->bgw_name);
		/*
		 * And finish our transaction.
		 */
		SPI_finish();
		PopActiveSnapshot();
		CommitTransactionCommand();
		pgstat_report_activity(STATE_IDLE, NULL);
	}

	proc_exit(1);
}
Esempio n. 14
0
static config_log_objects *
initialize_objects(void)
{
    config_log_objects *objects;

	int		ret;
	int		ntup;
	bool	isnull;
	StringInfoData	buf;

	objects = palloc(sizeof(config_log_objects));

	objects->table_name = pstrdup("pg_settings_log");
	objects->function_name = pstrdup("pg_settings_logger");

	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	SPI_connect();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, "Verifying config log objects");

	initStringInfo(&buf);
	appendStringInfo(
		&buf,
		"SELECT COUNT(*)\
           FROM information_schema.tables\
          WHERE table_schema='%s'\
            AND table_name ='%s'\
            AND table_type='BASE TABLE'",
		config_log_schema,
		objects->table_name
		);

	ret = SPI_execute(buf.data, true, 0);
	if (ret != SPI_OK_SELECT)
	{
         ereport(FATAL,
           (errmsg("SPI_execute failed: SPI error code %d", ret)
            ));
	}

    /* This should never happen */
	if (SPI_processed != 1)
	{
		elog(FATAL, "not a singleton result");
	}

	ntup = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
					   SPI_tuptable->tupdesc,
					   1, &isnull));

    /* This should never happen */
	if (isnull)
	{
		elog(FATAL, "null result");
	}

	if (ntup == 0)
	{
        ereport(FATAL,
          (
            errmsg("Expected config log table '%s.%s' not found", config_log_schema,
              objects->table_name),
            errhint("Ensure superuser search_path includes the schema used by config_log; "
              "check config_log.* GUC settings")
           ));
	}

	/* check function pg_settings_logger() exists */

	resetStringInfo(&buf);

	appendStringInfo(
		&buf,
		"SELECT COUNT(*) FROM pg_catalog.pg_proc p \
     INNER JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace \
          WHERE p.proname='%s' \
            AND n.nspname='%s' \
            AND p.pronargs = 0",
		objects->function_name,
		config_log_schema
		);

	ret = SPI_execute(buf.data, true, 0);
	if (ret != SPI_OK_SELECT)
	{
         ereport(FATAL,
           (errmsg("SPI_execute failed: SPI error code %d", ret)));
	}

	if (SPI_processed != 1)
	{
		elog(FATAL, "not a singleton result");
	}

	ntup = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0],
									   SPI_tuptable->tupdesc,
									   1, &isnull));
	if (isnull)
	{
		elog(FATAL, "null result");
	}

	if (ntup == 0)
	{
        ereport(FATAL,
          (
            errmsg("Expected config log function '%s.%s' not found", config_log_schema,
              objects->function_name),
            errhint("Ensure superuser search_path includes the schema used by config_log; "
              "check config_log.* GUC settings")
           ));
	}

   	SPI_finish();
	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);

	log_info("initialized, database objects validated");

	/* execute pg_settings_logger() here to catch any settings which have changed after server restart */
	execute_pg_settings_logger(objects);

	return objects;
}
Esempio n. 15
0
static void
SaveBuffers(void)
{
	int						i;
	int						num_buffers;
	int						log_level		= DEBUG3;
	SavedBuffer			   *saved_buffers;
	volatile BufferDesc	   *bufHdr;			// XXX: Do we really need volatile here?
	FILE				   *file			= NULL;
	int						database_counter= 0;
	Oid						prev_database	= InvalidOid;
	Oid						prev_filenode	= InvalidOid;
	ForkNumber				prev_forknum	= InvalidForkNumber;
	BlockNumber				prev_blocknum	= InvalidBlockNumber;
	BlockNumber				range_counter	= 0;
	const char			   *savefile_path;

	/*
	 * XXX: If the memory request fails, ask for a smaller memory chunk, and use
	 * it to create chunks of save-files, and make the workers read those chunks.
	 *
	 * This is not a concern as of now, so deferred; there's at least one other
	 * place that allocates (NBuffers * (much_bigger_struct)), so this seems to
	 * be an acceptable practice.
	 */

	saved_buffers = (SavedBuffer *) palloc(sizeof(SavedBuffer) * NBuffers);

	/* Lock the buffer partitions for reading. */
	for (i = 0; i < NUM_BUFFER_PARTITIONS; ++i)
		LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);

	/* Scan and save a list of valid buffers. */
	for (num_buffers = 0, i = 0, bufHdr = BufferDescriptors; i < NBuffers; ++i, ++bufHdr)
	{
		/* Lock each buffer header before inspecting. */
		LockBufHdr(bufHdr);

		/* Skip invalid buffers */
		if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID))
		{
			saved_buffers[num_buffers].database	= bufHdr->tag.rnode.dbNode;
			saved_buffers[num_buffers].filenode	= bufHdr->tag.rnode.relNode;
			saved_buffers[num_buffers].forknum	= bufHdr->tag.forkNum;
			saved_buffers[num_buffers].blocknum	= bufHdr->tag.blockNum;

			++num_buffers;
		}

		UnlockBufHdr(bufHdr);
	}

	/* Unlock the buffer partitions in reverse order, to avoid a deadlock. */
	for (i = NUM_BUFFER_PARTITIONS - 1; i >= 0; --i)
		LWLockRelease(FirstBufMappingLock + i);

	/*
	 * Sort the list, so that we can optimize the storage of these buffers.
	 *
	 * The side-effect of this storage optimization is that when reading the
	 * blocks back from relation forks, it leads to sequential reads, which
	 * improve the restore speeds quite considerably as compared to random reads
	 * from different blocks all over the data directory.
	 */
	pg_qsort(saved_buffers, num_buffers, sizeof(SavedBuffer), SavedBufferCmp);

	/* Connect to the database and start a transaction for database name lookups. */
	BackgroundWorkerInitializeConnection(guc_default_database, NULL);
	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, "saving buffers");

	for (i = 0; i < num_buffers; ++i)
	{
		int j;
		SavedBuffer *buf = &saved_buffers[i];

		if (i == 0)
		{
			/*
			 * Special case for global objects. The sort brings them to the
			 * front of the list.
			 */

			/* Make sure the first buffer we save belongs to global object. */
			Assert(buf->database == InvalidOid);

			/*
			 * Database number (and save-file name) 1 is reserverd for storing
			 * list of buffers of global objects.
			 */
			database_counter = 1;

			savefile_path = getSavefileName(database_counter);
			file = fileOpen(savefile_path, PG_BINARY_W);
			writeDBName("", file, savefile_path);

			prev_database = buf->database;
		}

		if (buf->database != prev_database)
		{
			char *dbname;

			/*
			 * We are beginning to process a different database than the
			 * previous one; close the save-file of previous database, and open
			 * a new one.
			 */
			++database_counter;

			dbname = get_database_name(buf->database);

			Assert(dbname != NULL);

			if (file != NULL)
				fileClose(file, savefile_path);

			savefile_path = getSavefileName(database_counter);
			file = fileOpen(savefile_path, PG_BINARY_W);
			writeDBName(dbname, file, savefile_path);

			pfree(dbname);

			/* Reset trackers appropriately */
			prev_database	= buf->database;
			prev_filenode	= InvalidOid;
			prev_forknum	= InvalidForkNumber;
			prev_blocknum	= InvalidBlockNumber;
			range_counter	= 0;
		}

		if (buf->filenode != prev_filenode)
		{
			/* We're beginning to process a new relation; emit a record for it. */
			fileWrite("r", 1, file, savefile_path);
			fileWrite(&(buf->filenode), sizeof(Oid), file, savefile_path);

			/* Reset trackers appropriately */
			prev_filenode	= buf->filenode;
			prev_forknum	= InvalidForkNumber;
			prev_blocknum	= InvalidBlockNumber;
			range_counter	= 0;
		}

		if (buf->forknum != prev_forknum)
		{
			/*
			 * We're beginning to process a new fork of this relation; add a
			 * record for it.
			 */
			fileWrite("f", 1, file, savefile_path);
			fileWrite(&(buf->forknum), sizeof(ForkNumber), file, savefile_path);

			/* Reset trackers appropriately */
			prev_forknum	= buf->forknum;
			prev_blocknum	= InvalidBlockNumber;
			range_counter	= 0;
		}

		ereport(log_level,
				(errmsg("writer: writing block db %d filenode %d forknum %d blocknum %d",
						database_counter, prev_filenode, prev_forknum, buf->blocknum)));

		fileWrite("b", 1, file, savefile_path);
		fileWrite(&(buf->blocknum), sizeof(BlockNumber), file, savefile_path);

		prev_blocknum = buf->blocknum;

		/*
		 * If a continuous range of blocks follows this block, then emit one
		 * entry for the range, instead of one for each block.
		 */
		range_counter = 0;

		for ( j = i+1; j < num_buffers; ++j)
		{
			SavedBuffer *tmp = &saved_buffers[j];

			if (tmp->database		== prev_database
				&& tmp->filenode	== prev_filenode
				&& tmp->forknum		== prev_forknum
				&& tmp->blocknum	== (prev_blocknum + range_counter + 1))
			{
				++range_counter;
			}
		}

		if (range_counter != 0)
		{
			ereport(log_level,
				(errmsg("writer: writing range db %d filenode %d forknum %d blocknum %d range %d",
						database_counter, prev_filenode, prev_forknum, prev_blocknum, range_counter)));

			fileWrite("N", 1, file, savefile_path);
			fileWrite(&range_counter, sizeof(range_counter), file, savefile_path);

			i += range_counter;
		}
	}

	ereport(LOG,
			(errmsg("Buffer Saver: saved metadata of %d blocks", num_buffers)));

	Assert(file != NULL);
	fileClose(file, savefile_path);

	pfree(saved_buffers);

	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);
}
Esempio n. 16
0
static void
ReadBlocks(int filenum)
{
	FILE	   *file;
	char		record_type;
	char	   *dbname;
	Oid			record_filenode;
	ForkNumber	record_forknum;
	BlockNumber	record_blocknum;
	BlockNumber	record_range;

	int			log_level		= DEBUG3;
	Oid			relOid			= InvalidOid;
	Relation	rel				= NULL;
	bool		skip_relation	= false;
	bool		skip_fork		= false;
	bool		skip_block		= false;
	BlockNumber	nblocks			= 0;
	BlockNumber	blocks_restored	= 0;
	const char *filepath;

	/*
	 * If this condition changes, then this code, and the code in the writer
	 * will need to be changed; especially the format specifiers in log and
	 * error messages.
	 */
	StaticAssertStmt(MaxBlockNumber == 0xFFFFFFFE, "Code may need review.");

	filepath = getSavefileName(filenum);
	file = fileOpen(filepath, PG_BINARY_R);
	dbname = readDBName(file, filepath);

	/*
	 * When restoring global objects, the dbname is zero-length string, and non-
	 * zero length otherwise. And filenum is never expected to be smaller than 1.
	 */
	Assert(filenum >= 1);
	Assert(filenum == 1 ? strlen(dbname) == 0 : strlen(dbname) > 0);

	/* To restore the global objects, use default database */
	BackgroundWorkerInitializeConnection(filenum == 1 ? guc_default_database : dbname, NULL);
	SetCurrentStatementStartTimestamp();
	StartTransactionCommand();
	SPI_connect();
	PushActiveSnapshot(GetTransactionSnapshot());
	pgstat_report_activity(STATE_RUNNING, "restoring buffers");

	/*
	 * Note that in case of a read error, we will leak relcache entry that we may
	 * currently have open. In case of EOF, we close the relation after the loop.
	 */
	while (fileRead(&record_type, 1, file, true, filepath))
	{
		/*
		 * If we want to process the signals, this seems to be the best place
		 * to do it. Generally the backends refrain from processing config file
		 * while in transaction, but that's more for the fear of allowing GUC
		 * changes to affect expression evaluation, causing different results
		 * for the same expression in a transaction. Since this worker is not
		 * processing any queries, it is okay to process the config file here.
		 *
		 * Even though it's okay to process SIGHUP here, doing so doesn't add
		 * any value. The only reason we might want to process config file here
		 * would be to allow the user to interrupt the BlockReader's operation
		 * by changing this extenstion's GUC parameter. But the user can do that
		 * anyway, using SIGTERM or pg_terminate_backend().
		 */

		/* Stop processing the save-file if the Postmaster wants us to die. */
		if (got_sigterm)
			break;

		ereport(log_level,
				(errmsg("record type %x - %c", record_type, record_type)));

		switch (record_type)
		{
			case 'r':
			{
				/* Close the previous relation, if any. */
				if (rel)
				{
					relation_close(rel, AccessShareLock);
					rel = NULL;
				}

				record_forknum = InvalidForkNumber;
				record_blocknum = InvalidBlockNumber;
				nblocks = 0;

				fileRead(&record_filenode, sizeof(Oid), file, false, filepath);

				relOid = GetRelOid(record_filenode);

				ereport(log_level, (errmsg("processing filenode %u, relation %u",
										record_filenode, relOid)));
				/*
				 * If the relation has been rewritten/dropped since we saved it,
				 * just skip it and process the next relation.
				 */
				if (relOid == InvalidOid)
					skip_relation = true;
				else
				{
					skip_relation = false;

					/* Open the relation */
					rel = relation_open(relOid, AccessShareLock);
					RelationOpenSmgr(rel);
				}
			}
			break;
			case 'f':
			{
				record_blocknum = InvalidBlockNumber;
				nblocks = 0;

				fileRead(&record_forknum, sizeof(ForkNumber), file, false, filepath);

				if (skip_relation)
					continue;

				if (rel == NULL)
					ereport(ERROR,
							(errmsg("found a fork record without a preceeding relation record")));

				ereport(log_level, (errmsg("processing fork %d", record_forknum)));

				if (!smgrexists(rel->rd_smgr, record_forknum))
					skip_fork = true;
				else
				{
					skip_fork = false;

					nblocks = RelationGetNumberOfBlocksInFork(rel, record_forknum);
				}
			}
			break;
			case 'b':
			{
				if (record_forknum == InvalidForkNumber)
					ereport(ERROR,
							(errmsg("found a block record without a preceeding fork record")));

				fileRead(&record_blocknum, sizeof(BlockNumber), file, false, filepath);

				if (skip_relation || skip_fork)
					continue;

				/*
				 * Don't try to read past the file; the file may have been shrunk
				 * by a vaccum/truncate operation.
				 */
				if (record_blocknum >= nblocks)
				{
					ereport(log_level,
							(errmsg("reader %d skipping block filenode %u forknum %d blocknum %u",
									filenum, record_filenode, record_forknum, record_blocknum)));

					skip_block = true;
					continue;
				}
				else
				{
					Buffer	buf;

					skip_block = false;

					ereport(log_level,
							(errmsg("reader %d reading block filenode %u forknum %d blocknum %u",
									filenum, record_filenode, record_forknum, record_blocknum)));

					buf = ReadBufferExtended(rel, record_forknum, record_blocknum, RBM_NORMAL, NULL);
					ReleaseBuffer(buf);

					++blocks_restored;
				}
			}
			break;
			case 'N':
			{
				BlockNumber block;

				Assert(record_blocknum != InvalidBlockNumber);

				if (record_blocknum == InvalidBlockNumber)
					ereport(ERROR,
							(errmsg("found a block range record without a preceeding block record")));

				fileRead(&record_range, sizeof(int), file, false, filepath);

				if (skip_relation || skip_fork || skip_block)
					continue;

				ereport(log_level,
						(errmsg("reader %d reading range filenode %u forknum %d blocknum %u range %u",
								filenum, record_filenode, record_forknum, record_blocknum, record_range)));

				for (block = record_blocknum + 1; block <= (record_blocknum + record_range); ++block)
				{
					Buffer	buf;

					/*
					* Don't try to read past the file; the file may have been
					* shrunk by a vaccum operation.
					*/
					if (block >= nblocks)
					{
						ereport(log_level,
								(errmsg("reader %d skipping block range filenode %u forknum %d start %u end %u",
										filenum, record_filenode, record_forknum,
										block, record_blocknum + record_range)));

						break;
					}

					buf = ReadBufferExtended(rel, record_forknum, block, RBM_NORMAL, NULL);
					ReleaseBuffer(buf);

					++blocks_restored;
				}
			}
			break;
			default:
			{
				ereport(ERROR,
						(errmsg("found unexpected save-file marker %x - %c)", record_type, record_type)));
				Assert(false);
			}
			break;
		}
	}

	if (rel)
		relation_close(rel, AccessShareLock);

	ereport(LOG,
			(errmsg("Block Reader %d: restored %u blocks",
					filenum, blocks_restored)));

	SPI_finish();
	PopActiveSnapshot();
	CommitTransactionCommand();
	pgstat_report_activity(STATE_IDLE, NULL);

	fileClose(file, filepath);

	/* Remove the save-file */
	if (remove(filepath) != 0)
		ereport(ERROR,
				(errcode_for_file_access(),
				errmsg("error removing file \"%s\" : %m", filepath)));
}