예제 #1
0
void
test_get_format_name(void **state)
{
	char	   *formatName = get_format_name('t');

	assert_string_equal(formatName, TextFormatName);

	formatName = get_format_name('c');
	assert_string_equal(formatName, TextFormatName);

	formatName = get_format_name('b');
	assert_string_equal(formatName, GpdbWritableFormatName);

	MemoryContext old_context = CurrentMemoryContext;

	PG_TRY();
	{
		formatName = get_format_name('x');
		assert_false("Expected Exception");
	}
	PG_CATCH();
	{
		ErrorData  *edata;

		MemoryContextSwitchTo(old_context);
		edata = CopyErrorData();
		FlushErrorState();
		assert_string_equal(edata->message, "Unable to get format name for format code: x");
	}
	PG_END_TRY();
}
예제 #2
0
파일: plpy_spi.c 프로젝트: pguyot/postgres
void
PLy_spi_subtransaction_abort(MemoryContext oldcontext, ResourceOwner oldowner)
{
	ErrorData  *edata;
	PLyExceptionEntry *entry;
	PyObject   *exc;

	/* Save error info */
	MemoryContextSwitchTo(oldcontext);
	edata = CopyErrorData();
	FlushErrorState();

	/* Abort the inner transaction */
	RollbackAndReleaseCurrentSubTransaction();
	MemoryContextSwitchTo(oldcontext);
	CurrentResourceOwner = oldowner;

	/*
	 * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will have
	 * left us in a disconnected state.  We need this hack to return to
	 * connected state.
	 */
	SPI_restore_connection();

	/* Look up the correct exception */
	entry = hash_search(PLy_spi_exceptions, &(edata->sqlerrcode),
						HASH_FIND, NULL);
	/* We really should find it, but just in case have a fallback */
	Assert(entry != NULL);
	exc = entry ? entry->exc : PLy_exc_spi_error;
	/* Make Python raise the exception */
	PLy_spi_exception_set(exc, edata);
	FreeErrorData(edata);
}
예제 #3
0
static void
WalSendServer_ServiceShutdown(void)
{
	PG_TRY();
	{
		disableQDMirroring_ShutDown();
		if (disconnectMirrorQD_SendClose())
			elog(LOG,"Master mirror disconnected");
	}
	PG_CATCH();
	{
		/* 
		 * Report the error related to reading the primary's WAL
		 * to the server log 
		 */
	    if (!elog_demote(NOTICE))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}
		EmitErrorReport();
		FlushErrorState();
	}
	PG_END_TRY();
}
예제 #4
0
/*
 * Used by clients to send a request to a service.
 */
bool
ServiceClientSendRequest(ServiceClient *serviceClient, void* request, int requestLen)
{
	ServiceConfig *serviceConfig;
	char        *message;
	bool		result = false;
	DECLARE_SAVE_SUPPRESS_PANIC();

	Assert(serviceClient != NULL);
	Assert(request != NULL);

	PG_TRY();
	{
		SUPPRESS_PANIC();

		serviceConfig = serviceClient->serviceConfig;
		if (serviceConfig == NULL)
		{
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
						    errmsg("Not connected to '%s'",
						           serviceConfig->title)));
		}
		if (requestLen != serviceClient->serviceConfig->requestLen)
		{
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							 errmsg("Expecting request length %d and actual length is %d for '%s'",
				 					serviceClient->serviceConfig->requestLen, requestLen,
				 					serviceConfig->title)));
		}

		result = ServiceClientWrite(serviceClient, request, requestLen);
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();
		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;

}
예제 #5
0
static bool const_record_walker(Node *node, pgssConstLocations *jstate)
{
	bool result;

	if (node == NULL) return false;

	if (IsA(node, A_Const))
	{
		RecordConstLocation(jstate, castNode(A_Const, node)->location);
	}
	else if (IsA(node, ParamRef))
	{
		/* Track the highest ParamRef number */
		if (((ParamRef *) node)->number > jstate->highest_extern_param_id)
			jstate->highest_extern_param_id = castNode(ParamRef, node)->number;
	}
	else if (IsA(node, DefElem))
	{
		return const_record_walker((Node *) ((DefElem *) node)->arg, jstate);
	}
	else if (IsA(node, RawStmt))
	{
		return const_record_walker((Node *) ((RawStmt *) node)->stmt, jstate);
	}
	else if (IsA(node, VariableSetStmt))
	{
		return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
	}
	else if (IsA(node, CopyStmt))
	{
		return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
	}
	else if (IsA(node, ExplainStmt))
	{
		return const_record_walker((Node *) ((ExplainStmt *) node)->query, jstate);
	}
	else if (IsA(node, AlterRoleStmt))
	{
		return const_record_walker((Node *) ((AlterRoleStmt *) node)->options, jstate);
	}
	else if (IsA(node, DeclareCursorStmt))
	{
		return const_record_walker((Node *) ((DeclareCursorStmt *) node)->query, jstate);
	}

	PG_TRY();
	{
		result = raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
	}
	PG_CATCH();
	{
		FlushErrorState();
		result = false;
	}
	PG_END_TRY();

	return result;
}
예제 #6
0
PgQueryNormalizeResult pg_query_normalize(const char* input)
{
	MemoryContext ctx = NULL;
	PgQueryNormalizeResult result = {0};

	ctx = pg_query_enter_memory_context("pg_query_normalize");

	PG_TRY();
	{
		List *tree;
		pgssConstLocations jstate;
		int query_len;

		/* Parse query */
		tree = raw_parser(input);

		/* Set up workspace for constant recording */
		jstate.clocations_buf_size = 32;
		jstate.clocations = (pgssLocationLen *)
			palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
		jstate.clocations_count = 0;
		jstate.highest_extern_param_id = 0;

		/* Walk tree and record const locations */
		const_record_walker((Node *) tree, &jstate);

		/* Normalize query */
		query_len = (int) strlen(input);
		result.normalized_query = strdup(generate_normalized_query(&jstate, input, 0, &query_len, PG_UTF8));
	}
	PG_CATCH();
	{
		ErrorData* error_data;
		PgQueryError* error;

		MemoryContextSwitchTo(ctx);
		error_data = CopyErrorData();

		error = malloc(sizeof(PgQueryError));
		error->message   = strdup(error_data->message);
		error->filename  = strdup(error_data->filename);
		error->lineno    = error_data->lineno;
		error->cursorpos = error_data->cursorpos;

		result.error = error;
		FlushErrorState();
	}
	PG_END_TRY();

	pg_query_exit_memory_context(ctx);

	return result;
}
static bool const_record_walker(Node *node, pgssConstLocations *jstate)
{
	bool result;

	if (node == NULL) return false;

	if ((IsA(node, A_Const) && ((A_Const *) node)->location >= 0) || (IsA(node, DefElem) && ((DefElem *) node)->location >= 0))
	{
		/* enlarge array if needed */
		if (jstate->clocations_count >= jstate->clocations_buf_size)
		{
			jstate->clocations_buf_size *= 2;
			jstate->clocations = (pgssLocationLen *)
				repalloc(jstate->clocations,
						 jstate->clocations_buf_size *
						 sizeof(pgssLocationLen));
		}
		jstate->clocations[jstate->clocations_count].location = IsA(node, DefElem) ? ((DefElem *) node)->location : ((A_Const *) node)->location;
		/* initialize lengths to -1 to simplify fill_in_constant_lengths */
		jstate->clocations[jstate->clocations_count].length = -1;
		jstate->clocations_count++;
	}
	else if (IsA(node, VariableSetStmt))
	{
		return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
	}
	else if (IsA(node, CopyStmt))
	{
		return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
	}
	else if (IsA(node, ExplainStmt))
	{
		return const_record_walker((Node *) ((ExplainStmt *) node)->query, jstate);
	}
	else if (IsA(node, AlterRoleStmt))
	{
		return const_record_walker((Node *) ((AlterRoleStmt *) node)->options, jstate);
	}

	PG_TRY();
	{
		result = raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
	}
	PG_CATCH();
	{
		FlushErrorState();
		result = false;
	}
	PG_END_TRY();

	return result;
}
예제 #8
0
inline
void
AbstractionLayer::Allocator::free(void *inPtr) const {
    if (inPtr == NULL)
        return;
        
    /*
     * See allocate(const size_t, const std::nothrow_t&) why we disable
     * processing of interrupts.
     */
    HOLD_INTERRUPTS();
    PG_TRY(); {
        pfree(unaligned(inPtr));
    } PG_CATCH(); {
        FlushErrorState();
    } PG_END_TRY();
    RESUME_INTERRUPTS();
}
예제 #9
0
static bool WalSendServer_ServiceRequest(ServiceCtrl *serviceCtrl, int sockfd, uint8 *request)
{
	WalSendRequest *walSendRequest = (WalSendRequest*)request;
	WalSendResponse walSendResponse;
	bool result = false;

	/* 
	 * Use a TRY block to catch unexpected errors that bubble up to this level
	 * and disable QD mirroring.
	 */
	PG_TRY();
	{
		if (Debug_print_qd_mirroring)
			elog(LOG, "request command %d = '%s'",
			     walSendRequest->command, 
			     WalSendRequestCommandToString(walSendRequest->command));

		WalSendServerDoRequest(walSendRequest);

		/*
		 * Currently, all requests need a response.
		 */
		walSendResponse.ok = true;

		result = ServiceProcessRespond(serviceCtrl, sockfd, (uint8*)&walSendResponse, sizeof(walSendResponse));
	}
	PG_CATCH();
	{
		/* 
		 * Report the unexpected error.
		 */
		EmitErrorReport();
		FlushErrorState();

		disableQDMirroring_UnexpectedError(
			"An unexpected error encountered.  Please report this problem to Greenplum");

		result = false;
	}
	PG_END_TRY();

	return result;
}
예제 #10
0
/*
 * run_pg_parse_json:
 *
 * Wrap pg_parse_json in order to restore InterruptHoldoffCount when parse
 * error occured.
 *
 * Returns true when parse completed. False for unexpected end of string.
 */
bool
run_pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
{
	MemoryContext ccxt = CurrentMemoryContext;
	uint32 saved_IntrHoldoffCount;

	/*
	 * "ereport(ERROR.." occurs on error in pg_parse_json resets
	 * InterruptHoldoffCount to zero, so we must save the value before calling
	 * json parser to restore it on parse error. See errfinish().
	 */
	saved_IntrHoldoffCount = InterruptHoldoffCount;

	PG_TRY();
	{
		pg_parse_json(lex, sem);
	}
	PG_CATCH();
	{
		ErrorData *errdata;
		MemoryContext ecxt;

		InterruptHoldoffCount = saved_IntrHoldoffCount;

		ecxt = MemoryContextSwitchTo(ccxt);
		errdata = CopyErrorData();
		
		if (errdata->sqlerrcode == ERRCODE_INVALID_TEXT_REPRESENTATION)
		{
			FlushErrorState();
			return false;
		}
		else
		{
			MemoryContextSwitchTo(ecxt);
			PG_RE_THROW();
		}
	}
	PG_END_TRY();

	return true;
}
예제 #11
0
/*
 * norm_yylex: core_yylex with replacing some tokens.
 */
static int
norm_yylex(char *str, core_YYSTYPE *yylval, YYLTYPE *yylloc, core_yyscan_t yyscanner)
{
	int tok;

	PG_TRY();
	{
		tok = core_yylex(yylval, yylloc, yyscanner);
	}
	PG_CATCH();
	{
		/*
		 * Error might occur during parsing quoted tokens that chopped
		 * halfway. Just ignore the rest of this query even if there might
		 * be other reasons for parsing to fail.
		 */
		FlushErrorState();
		return -1;
	}
	PG_END_TRY();
	
	/*
	 * '?' alone is assumed to be an IDENT.  If there's a real
	 * operator '?', this should be confused but there's hardly be.
	 */
	if (tok == Op && str[*yylloc] == '?' &&
		strchr(OPCHARS, str[*yylloc + 1]) == NULL)
		tok = SCONST;

	/*
	 * Replace tokens with '=' if the operator is consists of two or
	 * more opchars only. Assuming that opchars do not compose a token
	 * with non-opchars, check the first char only is sufficient.
	 */
	if (tok == Op && strchr(OPCHARS, str[*yylloc]) != NULL)
		tok = '=';

	return tok;
}
예제 #12
0
static void *
exec_func(void *arg)
{
	unsigned long rtn = (unsigned long)WD_NG;
	MemoryContext oldContext =  CurrentMemoryContext;
	WdThreadInfo* thread_arg = (WdThreadInfo*) arg;
	Assert(thread_arg != NULL);
	
	PG_TRY();
	{
		rtn = thread_arg->start_routine(thread_arg->arg);
	}
	PG_CATCH();
	{
		/* ignore the error message */
		EmitErrorReport();
		MemoryContextSwitchTo(oldContext);
		FlushErrorState();
	}
	PG_END_TRY();
	return rtn;
}
예제 #13
0
static javaFdwExecutionState* javaGetOptions(Oid foreigntableid) {
	ForeignTable	*f_table;
	ForeignServer	*f_server;
	UserMapping	*f_mapping;
	List		*options;
	ListCell	*cell;
    jobject obj, res, serverClass;
    jmethodID serverConstructor;
    jclass cl;
    javaFdwExecutionState *state;
    char *classname = NULL;
    
	options = NIL;
	f_table = GetForeignTable(foreigntableid);
	options = list_concat(options, f_table->options);

	f_server = GetForeignServer(f_table->serverid);	
	options = list_concat(options, f_server->options);

	PG_TRY();
	{
		f_mapping = GetUserMapping(GetUserId(), f_table->serverid);
		options = list_concat(options, f_mapping->options);
	}
	PG_CATCH();
	{
		FlushErrorState();
		/* DO NOTHING HERE */
	}
	PG_END_TRY();
	

	foreach(cell, options) {
		DefElem *def = (DefElem *) lfirst(cell);
        if (strcmp(def->defname, "class") == 0) {
            classname = (char *) defGetString(def);
        }
    }
예제 #14
0
static void
lint_func_beg( PLpgSQL_execstate * estate, PLpgSQL_function * func )
{
	const char *err_text = estate->err_text;

	if (plpgsql_lint_enable)
	{
		int i;
		PLpgSQL_rec *saved_records;
		PLpgSQL_var *saved_vars;
		MemoryContext oldcontext;
		ResourceOwner oldowner;

		/*
		 * inside control a rec and vars variables are modified, so we should to save their
		 * content
		 */
		saved_records = palloc(sizeof(PLpgSQL_rec) * estate->ndatums);
		saved_vars = palloc(sizeof(PLpgSQL_var) * estate->ndatums);

		for (i = 0; i < estate->ndatums; i++)
		{
			if (estate->datums[i]->dtype == PLPGSQL_DTYPE_REC)
			{
				PLpgSQL_rec *rec = (PLpgSQL_rec *) estate->datums[i];

				saved_records[i].tup = rec->tup;
				saved_records[i].tupdesc = rec->tupdesc;
				saved_records[i].freetup = rec->freetup;
				saved_records[i].freetupdesc = rec->freetupdesc;

				/* don't release a original tupdesc and original tup */
				rec->freetup = false;
				rec->freetupdesc = false;
			}
			else if (estate->datums[i]->dtype == PLPGSQL_DTYPE_VAR)
			{
				PLpgSQL_var *var = (PLpgSQL_var *) estate->datums[i];

				saved_vars[i].value = var->value;
				saved_vars[i].isnull = var->isnull;
				saved_vars[i].freeval = var->freeval;

				var->freeval = false;
			}
		}

		estate->err_text = NULL;

		/*
		 * Raised exception should be trapped in outer functtion. Protection
		 * against outer trap is QUERY_CANCELED exception. 
		 */
		oldcontext = CurrentMemoryContext;
		oldowner = CurrentResourceOwner;

		PG_TRY();
		{
			lint_stmt(estate, func, (PLpgSQL_stmt *) func->action);
		}
		PG_CATCH();
		{
			ErrorData  *edata;

			/* Save error info */
			MemoryContextSwitchTo(oldcontext);
			edata = CopyErrorData();
			FlushErrorState();
			CurrentResourceOwner = oldowner;

			edata->sqlerrcode = ERRCODE_QUERY_CANCELED;
			ReThrowError(edata);
		}
		PG_END_TRY();

		estate->err_text = err_text;
		estate->err_stmt = NULL;

		/* return back a original rec variables */
		for (i = 0; i < estate->ndatums; i++)
		{
			if (estate->datums[i]->dtype == PLPGSQL_DTYPE_REC)
			{
				PLpgSQL_rec *rec = (PLpgSQL_rec *) estate->datums[i];

				if (rec->freetupdesc)
					FreeTupleDesc(rec->tupdesc);

				rec->tup = saved_records[i].tup;
				rec->tupdesc = saved_records[i].tupdesc;
				rec->freetup = saved_records[i].freetup;
				rec->freetupdesc = saved_records[i].freetupdesc;
			}
			else if (estate->datums[i]->dtype == PLPGSQL_DTYPE_VAR)
			{
				PLpgSQL_var *var = (PLpgSQL_var *) estate->datums[i];

				var->value = saved_vars[i].value;
				var->isnull = saved_vars[i].isnull;
				var->freeval = saved_vars[i].freeval;
			}
		}

		pfree(saved_records);
		pfree(saved_vars);
	}
}
예제 #15
0
/*
 * initSuffixTree  - create suffix tree from file. Function converts
 * UTF8-encoded file into current encoding.
 */
static SuffixChar *
initSuffixTree(char *filename)
{
	SuffixChar *volatile rootSuffixTree = NULL;
	MemoryContext ccxt = CurrentMemoryContext;
	tsearch_readline_state trst;
	volatile bool skip;

	filename = get_tsearch_config_filename(filename, "rules");
	if (!tsearch_readline_begin(&trst, filename))
		ereport(ERROR,
				(errcode(ERRCODE_CONFIG_FILE_ERROR),
				 errmsg("could not open unaccent file \"%s\": %m",
						filename)));

	do
	{
		char		src[4096];
		char		trg[4096];
		int			srclen;
		int			trglen;
		char	   *line = NULL;

		skip = true;

		PG_TRY();
		{
			/*
			 * pg_do_encoding_conversion() (called by tsearch_readline()) will
			 * emit exception if it finds untranslatable characters in current
			 * locale. We just skip such characters.
			 */
			while ((line = tsearch_readline(&trst)) != NULL)
			{
				if (sscanf(line, "%s\t%s\n", src, trg) != 2)
					continue;

				srclen = strlen(src);
				trglen = strlen(trg);

				rootSuffixTree = placeChar(rootSuffixTree,
										   (unsigned char *) src, srclen,
										   trg, trglen);
				skip = false;
				pfree(line);
			}
		}
		PG_CATCH();
		{
			ErrorData  *errdata;
			MemoryContext ecxt;

			ecxt = MemoryContextSwitchTo(ccxt);
			errdata = CopyErrorData();
			if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)
			{
				FlushErrorState();
			}
			else
			{
				MemoryContextSwitchTo(ecxt);
				PG_RE_THROW();
			}
		}
		PG_END_TRY();
	}
	while (skip);

	tsearch_readline_end(&trst);

	return rootSuffixTree;
}
예제 #16
0
/*
 * kafka_consume_main
 *
 * Main function for Kafka consumers running as background workers
 */
void
kafka_consume_main(Datum arg)
{
	char err_msg[512];
	rd_kafka_topic_conf_t *topic_conf;
	rd_kafka_t *kafka;
	rd_kafka_topic_t *topic;
	rd_kafka_message_t **messages;
	const struct rd_kafka_metadata *meta;
	struct rd_kafka_metadata_topic topic_meta;
	rd_kafka_resp_err_t err;
	bool found;
	Oid id = (Oid) arg;
	ListCell *lc;
	KafkaConsumerProc *proc = hash_search(consumer_procs, &id, HASH_FIND, &found);
	KafkaConsumer consumer;
	CopyStmt *copy;
	int valid_brokers = 0;
	int i;
	int my_partitions = 0;

	if (!found)
		elog(ERROR, "kafka consumer %d not found", id);

	pqsignal(SIGTERM, kafka_consume_main_sigterm);
#define BACKTRACE_SEGFAULTS
#ifdef BACKTRACE_SEGFAULTS
	pqsignal(SIGSEGV, debug_segfault);
#endif

	/* we're now ready to receive signals */
	BackgroundWorkerUnblockSignals();

	/* give this proc access to the database */
	BackgroundWorkerInitializeConnection(NameStr(proc->dbname), NULL);

	/* load saved consumer state */
	StartTransactionCommand();
	load_consumer_state(proc->consumer_id, &consumer);
	copy = get_copy_statement(&consumer);

	topic_conf = rd_kafka_topic_conf_new();
	kafka = rd_kafka_new(RD_KAFKA_CONSUMER, NULL, err_msg, sizeof(err_msg));
	rd_kafka_set_logger(kafka, logger);

	/*
	 * Add all brokers currently in pipeline_kafka_brokers
	 */
	if (consumer.brokers == NIL)
		elog(ERROR, "no valid brokers were found");

	foreach(lc, consumer.brokers)
		valid_brokers += rd_kafka_brokers_add(kafka, lfirst(lc));

	if (!valid_brokers)
		elog(ERROR, "no valid brokers were found");

	/*
	 * Set up our topic to read from
	 */
	topic = rd_kafka_topic_new(kafka, consumer.topic, topic_conf);
	err = rd_kafka_metadata(kafka, false, topic, &meta, CONSUMER_TIMEOUT);

	if (err != RD_KAFKA_RESP_ERR_NO_ERROR)
		elog(ERROR, "failed to acquire metadata: %s", rd_kafka_err2str(err));

	Assert(meta->topic_cnt == 1);
	topic_meta = meta->topics[0];

	load_consumer_offsets(&consumer, &topic_meta, proc->offset);
	CommitTransactionCommand();

	/*
	* Begin consuming all partitions that this process is responsible for
	*/
	for (i = 0; i < topic_meta.partition_cnt; i++)
	{
		int partition = topic_meta.partitions[i].id;

		Assert(partition <= consumer.num_partitions);
		if (partition % consumer.parallelism != proc->partition_group)
			continue;

		elog(LOG, "[kafka consumer] %s <- %s consuming partition %d from offset %ld",
				consumer.rel->relname, consumer.topic, partition, consumer.offsets[partition]);

		if (rd_kafka_consume_start(topic, partition, consumer.offsets[partition]) == -1)
			elog(ERROR, "failed to start consuming: %s", rd_kafka_err2str(rd_kafka_errno2err(errno)));

		my_partitions++;
	}

	/*
	* No point doing anything if we don't have any partitions assigned to us
	*/
	if (my_partitions == 0)
	{
		elog(LOG, "[kafka consumer] %s <- %s consumer %d doesn't have any partitions to read from",
				consumer.rel->relname, consumer.topic, MyProcPid);
		goto done;
	}

	messages = palloc0(sizeof(rd_kafka_message_t) * consumer.batch_size);

	/*
	 * Consume messages until we are terminated
	 */
	while (!got_sigterm)
	{
		ssize_t num_consumed;
		int i;
		int messages_buffered = 0;
		int partition;
		StringInfoData buf;
		bool xact = false;

		for (partition = 0; partition < consumer.num_partitions; partition++)
		{
			if (partition % consumer.parallelism != proc->partition_group)
				continue;

			num_consumed = rd_kafka_consume_batch(topic, partition,
					CONSUMER_TIMEOUT, messages, consumer.batch_size);

			if (num_consumed <= 0)
				continue;

			if (!xact)
			{
				StartTransactionCommand();
				xact = true;
			}

			initStringInfo(&buf);
			for (i = 0; i < num_consumed; i++)
			{
				if (messages[i]->payload != NULL)
				{
					appendBinaryStringInfo(&buf, messages[i]->payload, messages[i]->len);
					if (buf.len > 0 && buf.data[buf.len - 1] != '\n')
						appendStringInfoChar(&buf, '\n');
					messages_buffered++;
				}
				consumer.offsets[partition] = messages[i]->offset;
				rd_kafka_message_destroy(messages[i]);
			}
		}

		if (!xact)
		{
			pg_usleep(1 * 1000);
			continue;
		}

		/* we don't want to die in the event of any errors */
		PG_TRY();
		{
			if (messages_buffered)
				execute_copy(copy, &buf);
		}
		PG_CATCH();
		{
			elog(LOG, "[kafka consumer] %s <- %s failed to process batch, dropped %d message%s:",
					consumer.rel->relname, consumer.topic, (int) num_consumed, (num_consumed == 1 ? "" : "s"));
			EmitErrorReport();
			FlushErrorState();

			AbortCurrentTransaction();
			xact = false;
		}
		PG_END_TRY();

		if (!xact)
			StartTransactionCommand();

		if (messages_buffered)
			save_consumer_state(&consumer, proc->partition_group);

		CommitTransactionCommand();
	}

done:

	hash_search(consumer_procs, &id, HASH_REMOVE, NULL);

	rd_kafka_topic_destroy(topic);
	rd_kafka_destroy(kafka);
	rd_kafka_wait_destroyed(CONSUMER_TIMEOUT);
}
예제 #17
0
/*
 * A function having everything to do with logging, which ought to be factored
 * out one day to make a start on the Thoughts-on-logging wiki ideas.
 */
static void reLogWithChangedLevel(int level)
{
	ErrorData *edata = CopyErrorData();
	int sqlstate = edata->sqlerrcode;
	int category = ERRCODE_TO_CATEGORY(sqlstate);
	FlushErrorState();
	if ( WARNING > level )
	{
		if ( ERRCODE_SUCCESSFUL_COMPLETION != category )
			sqlstate = ERRCODE_SUCCESSFUL_COMPLETION;
	}
	else if ( WARNING == level )
	{
		if ( ERRCODE_WARNING != category && ERRCODE_NO_DATA != category )
			sqlstate = ERRCODE_WARNING;
	}
	else if ( ERRCODE_WARNING == category || ERRCODE_NO_DATA == category ||
		ERRCODE_SUCCESSFUL_COMPLETION == category )
		sqlstate = ERRCODE_INTERNAL_ERROR;
#if PG_VERSION_NUM >= 90500
	edata->elevel = level;
	edata->sqlerrcode = sqlstate;
	PG_TRY();
	{
		ThrowErrorData(edata);
	}
	PG_CATCH();
	{
		FreeErrorData(edata); /* otherwise this wouldn't happen in ERROR case */
		PG_RE_THROW();
	}
	PG_END_TRY();
	FreeErrorData(edata);
#else
	if (!errstart(level, edata->filename, edata->lineno,
				  edata->funcname, NULL))
	{
		FreeErrorData(edata);
		return;
	}

	errcode(sqlstate);
	if (edata->message)
		errmsg("%s", edata->message);
	if (edata->detail)
		errdetail("%s", edata->detail);
	if (edata->detail_log)
		errdetail_log("%s", edata->detail_log);
	if (edata->hint)
		errhint("%s", edata->hint);
	if (edata->context)
		errcontext("%s", edata->context); /* this may need to be trimmed */
#if PG_VERSION_NUM >= 90300
	if (edata->schema_name)
		err_generic_string(PG_DIAG_SCHEMA_NAME, edata->schema_name);
	if (edata->table_name)
		err_generic_string(PG_DIAG_TABLE_NAME, edata->table_name);
	if (edata->column_name)
		err_generic_string(PG_DIAG_COLUMN_NAME, edata->column_name);
	if (edata->datatype_name)
		err_generic_string(PG_DIAG_DATATYPE_NAME, edata->datatype_name);
	if (edata->constraint_name)
		err_generic_string(PG_DIAG_CONSTRAINT_NAME, edata->constraint_name);
#endif
	if (edata->internalquery)
		internalerrquery(edata->internalquery);

	FreeErrorData(edata);
	errfinish(0);
#endif
}
예제 #18
0
/*
 * There are a few ways to arrive in the initsequencer.
 * 1. From _PG_init (called exactly once when the library is loaded for ANY
 *    reason).
 *    1a. Because of the command LOAD 'libraryname';
 *        This case can be distinguished because _PG_init will have found the
 *        LOAD command and saved the 'libraryname' in pljavaLoadPath.
 *    1b. Because of a CREATE FUNCTION naming this library. pljavaLoadPath will
 *        be NULL.
 *    1c. By the first actual use of a PL/Java function, causing this library
 *        to be loaded. pljavaLoadPath will be NULL. The called function's Oid
 *        will be available to the call handler once we return from _PG_init,
 *        but it isn't (easily) available here.
 * 2. From the call handler, if initialization isn't complete yet. That can only
 *    mean something failed in the earlier call to _PG_init, and whatever it was
 *    is highly likely to fail again. That may lead to the untidyness of
 *    duplicated diagnostic messages, but for now I like the belt-and-suspenders
 *    approach of making sure the init sequence gets as many chances as possible
 *    to succeed.
 * 3. From a GUC assign hook, if the user has updated a setting that might allow
 *    initialization to succeed. It resumes from where it left off.
 *
 * In all cases, the sequence must progress as far as starting the VM and
 * initializing the PL/Java classes. In all cases except 1a, that's enough,
 * assuming the language handlers and schema have all been set up already (or,
 * in case 1b, the user is intent on setting them up explicitly).
 *
 * In case 1a, we can go ahead and test for, and create, the schema, functions,
 * and language entries as needed, using pljavaLoadPath as the library path
 * if creating the language handler functions. One-stop shopping. (The presence
 * of pljavaLoadPath in any of the other cases, such as resumption by an assign
 * hook, indicates it is really a continuation of case 1a.)
 */
static void initsequencer(enum initstage is, bool tolerant)
{
	JVMOptList optList;
	Invocation ctx;
	jint JNIresult;
	char *greeting;

	switch (is)
	{
	case IS_FORMLESS_VOID:
		initstage = IS_GUCS_REGISTERED;

	case IS_GUCS_REGISTERED:
		libjvmlocation = strdup("libjvm.so");

		initstage = IS_PLJAVA_ENABLED;

	case IS_PLJAVA_ENABLED:
		libjvm_handle = pg_dlopen(libjvmlocation);
		if ( NULL == libjvm_handle )
		{
			ereport(ERROR, (
				errmsg("Cannot load libjvm.so library, check that it is available in LD_LIBRARY_PATH"),
				errdetail("%s", (char *)pg_dlerror())));
			goto check_tolerant;
		}
		initstage = IS_CAND_JVMOPENED;

	case IS_CAND_JVMOPENED:
		pljava_createvm =
			(jint (JNICALL *)(JavaVM **, void **, void *))
			pg_dlsym(libjvm_handle, "JNI_CreateJavaVM");
		if ( NULL == pljava_createvm )
		{
			/*
			 * If it hasn't got the symbol, it can't be the right
			 * library, so close/unload it so another can be tried.
			 * Format the dlerror string first: dlclose may clobber it.
			 */
			char *dle = MemoryContextStrdup(ErrorContext, pg_dlerror());
			pg_dlclose(libjvm_handle);
			initstage = IS_CAND_JVMLOCATION;
			ereport(ERROR, (
				errmsg("Cannot start Java VM"),
				errdetail("%s", dle),
				errhint("Check that libjvm.so is available in LD_LIBRARY_PATH")));
			goto check_tolerant;
		}
		initstage = IS_CREATEVM_SYM_FOUND;

	case IS_CREATEVM_SYM_FOUND:
		s_javaLogLevel = INFO;
		checkIntTimeType();
		HashMap_initialize(); /* creates things in TopMemoryContext */
#ifdef PLJAVA_DEBUG
		/* Hard setting for debug. Don't forget to recompile...
		 */
		pljava_debug = 1;
#endif
		initstage = IS_MISC_ONCE_DONE;

	case IS_MISC_ONCE_DONE:
		JVMOptList_init(&optList); /* uses CurrentMemoryContext */
		seenVisualVMName = false;
		addUserJVMOptions(&optList);
		if ( ! seenVisualVMName )
			JVMOptList_addVisualVMName(&optList);
		JVMOptList_add(&optList, "vfprintf", (void*)my_vfprintf, true);
#ifndef GCJ
		JVMOptList_add(&optList, "-Xrs", 0, true);
#endif
		effectiveClassPath = getClassPath("-Djava.class.path=");
		if(effectiveClassPath != 0)
		{
			JVMOptList_add(&optList, effectiveClassPath, 0, true);
		}
		initstage = IS_JAVAVM_OPTLIST;

	case IS_JAVAVM_OPTLIST:
		JNIresult = initializeJavaVM(&optList); /* frees the optList */
		if( JNI_OK != JNIresult )
		{
			initstage = IS_MISC_ONCE_DONE; /* optList has been freed */
			StaticAssertStmt(sizeof(jint) <= sizeof(long int),
				"jint wider than long int?!");
			ereport(WARNING,
				(errmsg("failed to create Java virtual machine"),
				 errdetail("JNI_CreateJavaVM returned an error code: %ld",
					(long int)JNIresult),
				 jvmStartedAtLeastOnce ?
					errhint("Because an earlier attempt during this session "
					"did start a VM before failing, this probably means your "
					"Java runtime environment does not support more than one "
					"VM creation per session.  You may need to exit this "
					"session and start a new one.") : 0));
			goto check_tolerant;
		}
		jvmStartedAtLeastOnce = true;
		elog(DEBUG2, "successfully created Java virtual machine");
		initstage = IS_JAVAVM_STARTED;

	case IS_JAVAVM_STARTED:
#ifdef USE_PLJAVA_SIGHANDLERS
		pqsignal(SIGINT,  pljavaStatementCancelHandler);
		pqsignal(SIGTERM, pljavaDieHandler);
#endif
		/* Register an on_proc_exit handler that destroys the VM
		 */
		on_proc_exit(_destroyJavaVM, 0);
		initstage = IS_SIGHANDLERS;

	case IS_SIGHANDLERS:
		Invocation_pushBootContext(&ctx);
		PG_TRY();
		{
			initPLJavaClasses();
			initJavaSession();
			Invocation_popBootContext();
			initstage = IS_PLJAVA_FOUND;
		}
		PG_CATCH();
		{
			MemoryContextSwitchTo(ctx.upperContext); /* leave ErrorContext */
			Invocation_popBootContext();
			initstage = IS_MISC_ONCE_DONE;
			/* We can't stay here...
			 */
			if ( tolerant )
				reLogWithChangedLevel(WARNING); /* so xact is not aborted */
			else
			{
				EmitErrorReport(); /* no more unwinding, just log it */
				/* Seeing an ERROR emitted to the log, without leaving the
				 * transaction aborted, would violate the principle of least
				 * astonishment. But at check_tolerant below, another ERROR will
				 * be thrown immediately, so the transaction effect will be as
				 * expected and this ERROR will contribute information beyond
				 * what is in the generic one thrown down there.
				 */
				FlushErrorState();
			}
		}
		PG_END_TRY();
		if ( IS_PLJAVA_FOUND != initstage )
		{
			/* JVM initialization failed for some reason. Destroy
			 * the VM if it exists. Perhaps the user will try
			 * fixing the pljava.classpath and make a new attempt.
			 */
			ereport(WARNING, (
				errmsg("failed to load initial PL/Java classes"),
				errhint("The most common reason is that \"pljava_classpath\" "
					"needs to be set, naming the proper \"pljava.jar\" file.")
					));
			_destroyJavaVM(0, 0);
			goto check_tolerant;
		}

	case IS_PLJAVA_FOUND:
		greeting = InstallHelper_hello();
		ereport(NULL != pljavaLoadPath ? NOTICE : DEBUG1, (
				errmsg("PL/Java loaded"),
				errdetail("versions:\n%s", greeting)));
		pfree(greeting);
		if ( NULL != pljavaLoadPath )
			InstallHelper_groundwork(); /* sqlj schema, language handlers, ...*/
		initstage = IS_COMPLETE;

	case IS_COMPLETE:
		pljavaLoadingAsExtension = false;
		if ( alteredSettingsWereNeeded )
		{
			/* Use this StringInfoData to conditionally construct part of the
			 * hint string suggesting ALTER DATABASE ... SET ... FROM CURRENT
			 * provided the server is >= 9.2 where that will actually work.
			 * In 9.3, psprintf appeared, which would make this all simpler,
			 * but if 9.3+ were all that had to be supported, this would all
			 * be moot anyway. Doing the initStringInfo inside the ereport
			 * ensures the string is allocated in ErrorContext and won't leak.
			 * Don't remove the extra parens grouping
			 * (initStringInfo, appendStringInfo, errhint) ... with the parens,
			 * that's a comma expression, which is sequenced; without them, they
			 * are just function parameters with evaluation order unknown.
			 */
			StringInfoData buf;
#if PG_VERSION_NUM >= 90200
#define MOREHINT \
				appendStringInfo(&buf, \
					"using ALTER DATABASE %s SET ... FROM CURRENT or ", \
					pljavaDbName()),
#else
#define MOREHINT
#endif
			ereport(NOTICE, (
				errmsg("PL/Java successfully started after adjusting settings"),
				(initStringInfo(&buf),
				MOREHINT
				errhint("The settings that worked should be saved (%s"
					"in the \"%s\" file). For a reminder of what has been set, "
					"try: SELECT name, setting FROM pg_settings WHERE name LIKE"
					" 'pljava.%%' AND source = 'session'",
					buf.data,
					superuser()
						? PG_GETCONFIGOPTION("config_file")
						: "postgresql.conf"))));
#undef MOREHINT
			if ( loadAsExtensionFailed )
			{
				ereport(NOTICE, (errmsg(
					"PL/Java load successful after failed CREATE EXTENSION"),
					errdetail(
					"PL/Java is now installed, but not as an extension."),
					errhint(
					"To correct that, either COMMIT or ROLLBACK, make sure "
					"the working settings are saved, exit this session, and "
					"in a new session, either: "
					"1. if committed, run "
					"\"CREATE EXTENSION pljava FROM unpackaged\", or 2. "
					"if rolled back, simply \"CREATE EXTENSION pljava\" again."
					)));
			}
		}
		return;

	default:
		ereport(ERROR, (
			errmsg("cannot set up PL/Java"),
			errdetail(
				"An unexpected stage was reached in the startup sequence."),
			errhint(
				"Please report the circumstances to the PL/Java maintainers.")
			));
	}

check_tolerant:
	if ( pljavaLoadingAsExtension )
	{
		tolerant = false;
		loadAsExtensionFailed = true;
		pljavaLoadingAsExtension = false;
	}
	if ( !tolerant )
	{
		ereport(ERROR, (
			errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
			errmsg(
				"cannot use PL/Java before successfully completing its setup"),
			errhint(
				"Check the log for messages closely preceding this one, "
				"detailing what step of setup failed and what will be needed, "
				"probably setting one of the \"pljava.\" configuration "
				"variables, to complete the setup. If there is not enough "
				"help in the log, try again with different settings for "
				"\"log_min_messages\" or \"log_error_verbosity\".")));
	}
}
예제 #19
0
/*
 * Main entry point for checkpointer process
 *
 * This is invoked from AuxiliaryProcessMain, which has already created the
 * basic execution environment, but not enabled signals yet.
 */
void
CheckpointerMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext checkpointer_context;

	CheckpointerShmem->checkpointer_pid = MyProcPid;

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * Note: we deliberately ignore SIGTERM, because during a standard Unix
	 * system shutdown cycle, init will SIGTERM all processes at once.  We
	 * want to wait for the backends to exit, whereupon the postmaster will
	 * tell us it's okay to shut down (via SIGUSR2).
	 */
	pqsignal(SIGHUP, ChkptSigHupHandler);		/* set flag to read config
												 * file */
	pqsignal(SIGINT, ReqCheckpointHandler);		/* request checkpoint */
	pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
	pqsignal(SIGQUIT, chkpt_quickdie);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, chkpt_sigusr1_handler);
	pqsignal(SIGUSR2, ReqShutdownHandler);		/* request shutdown */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Initialize so that first time-driven event happens at the correct time.
	 */
	last_checkpoint_time = last_xlog_switch_time = (pg_time_t) time(NULL);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Checkpointer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	checkpointer_context = AllocSetContextCreate(TopMemoryContext,
												 "Checkpointer",
												 ALLOCSET_DEFAULT_SIZES);
	MemoryContextSwitchTo(checkpointer_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().  We don't have very many resources to worry
		 * about in checkpointer, but we do have LWLocks, buffers, and temp
		 * files.
		 */
		LWLockReleaseAll();
		ConditionVariableCancelSleep();
		pgstat_report_wait_end();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_SMgr();
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/* Warn any waiting backends that the checkpoint failed. */
		if (ckpt_active)
		{
			SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
			CheckpointerShmem->ckpt_failed++;
			CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
			SpinLockRelease(&CheckpointerShmem->ckpt_lck);

			ckpt_active = false;
		}

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(checkpointer_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(checkpointer_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Ensure all shared memory values are set correctly for the config. Doing
	 * this here ensures no race conditions from other concurrent updaters.
	 */
	UpdateSharedMemoryConfig();

	/*
	 * Advertise our latch that backends can use to wake us up while we're
	 * sleeping.
	 */
	ProcGlobal->checkpointerLatch = &MyProc->procLatch;

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		do_checkpoint = false;
		int			flags = 0;
		pg_time_t	now;
		int			elapsed_secs;
		int			cur_timeout;
		int			rc;

		/* Clear any already-pending wakeups */
		ResetLatch(MyLatch);

		/*
		 * Process any requests or signals received recently.
		 */
		AbsorbFsyncRequests();

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);

			/*
			 * Checkpointer is the last process to shut down, so we ask it to
			 * hold the keys for a range of other tasks required most of which
			 * have nothing to do with checkpointing at all.
			 *
			 * For various reasons, some config values can change dynamically
			 * so the primary copy of them is held in shared memory to make
			 * sure all backends see the same value.  We make Checkpointer
			 * responsible for updating the shared memory copy if the
			 * parameter setting changes because of SIGHUP.
			 */
			UpdateSharedMemoryConfig();
		}
		if (checkpoint_requested)
		{
			checkpoint_requested = false;
			do_checkpoint = true;
			BgWriterStats.m_requested_checkpoints++;
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Close down the database */
			ShutdownXLOG(0, 0);
			/* Normal exit from the checkpointer is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Force a checkpoint if too much time has elapsed since the last one.
		 * Note that we count a timed checkpoint in stats only when this
		 * occurs without an external request, but we set the CAUSE_TIME flag
		 * bit even if there is also an external request.
		 */
		now = (pg_time_t) time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
		{
			if (!do_checkpoint)
				BgWriterStats.m_timed_checkpoints++;
			do_checkpoint = true;
			flags |= CHECKPOINT_CAUSE_TIME;
		}

		/*
		 * Do a checkpoint if requested.
		 */
		if (do_checkpoint)
		{
			bool		ckpt_performed = false;
			bool		do_restartpoint;

			/*
			 * Check if we should perform a checkpoint or a restartpoint. As a
			 * side-effect, RecoveryInProgress() initializes TimeLineID if
			 * it's not set yet.
			 */
			do_restartpoint = RecoveryInProgress();

			/*
			 * Atomically fetch the request flags to figure out what kind of a
			 * checkpoint we should perform, and increase the started-counter
			 * to acknowledge that we've started a new checkpoint.
			 */
			SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
			flags |= CheckpointerShmem->ckpt_flags;
			CheckpointerShmem->ckpt_flags = 0;
			CheckpointerShmem->ckpt_started++;
			SpinLockRelease(&CheckpointerShmem->ckpt_lck);

			/*
			 * The end-of-recovery checkpoint is a real checkpoint that's
			 * performed while we're still in recovery.
			 */
			if (flags & CHECKPOINT_END_OF_RECOVERY)
				do_restartpoint = false;

			/*
			 * We will warn if (a) too soon since last checkpoint (whatever
			 * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
			 * since the last checkpoint start.  Note in particular that this
			 * implementation will not generate warnings caused by
			 * CheckPointTimeout < CheckPointWarning.
			 */
			if (!do_restartpoint &&
				(flags & CHECKPOINT_CAUSE_XLOG) &&
				elapsed_secs < CheckPointWarning)
				ereport(LOG,
						(errmsg_plural("checkpoints are occurring too frequently (%d second apart)",
				"checkpoints are occurring too frequently (%d seconds apart)",
									   elapsed_secs,
									   elapsed_secs),
						 errhint("Consider increasing the configuration parameter \"max_wal_size\".")));

			/*
			 * Initialize checkpointer-private variables used during
			 * checkpoint.
			 */
			ckpt_active = true;
			if (do_restartpoint)
				ckpt_start_recptr = GetXLogReplayRecPtr(NULL);
			else
				ckpt_start_recptr = GetInsertRecPtr();
			ckpt_start_time = now;
			ckpt_cached_elapsed = 0;

			/*
			 * Do the checkpoint.
			 */
			if (!do_restartpoint)
			{
				CreateCheckPoint(flags);
				ckpt_performed = true;
			}
			else
				ckpt_performed = CreateRestartPoint(flags);

			/*
			 * After any checkpoint, close all smgr files.  This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();

			/*
			 * Indicate checkpoint completion to any waiting backends.
			 */
			SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
			CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
			SpinLockRelease(&CheckpointerShmem->ckpt_lck);

			if (ckpt_performed)
			{
				/*
				 * Note we record the checkpoint start time not end time as
				 * last_checkpoint_time.  This is so that time-driven
				 * checkpoints happen at a predictable spacing.
				 */
				last_checkpoint_time = now;
			}
			else
			{
				/*
				 * We were not able to perform the restartpoint (checkpoints
				 * throw an ERROR in case of error).  Most likely because we
				 * have not received any new checkpoint WAL records since the
				 * last restartpoint. Try again in 15 s.
				 */
				last_checkpoint_time = now - CheckPointTimeout + 15;
			}

			ckpt_active = false;
		}

		/* Check for archive_timeout and switch xlog files if necessary. */
		CheckArchiveTimeout();

		/*
		 * Send off activity statistics to the stats collector.  (The reason
		 * why we re-use bgwriter-related code for this is that the bgwriter
		 * and checkpointer used to be just one process.  It's probably not
		 * worth the trouble to split the stats support into two independent
		 * stats message types.)
		 */
		pgstat_send_bgwriter();

		/*
		 * Sleep until we are signaled or it's time for another checkpoint or
		 * xlog file switch.
		 */
		now = (pg_time_t) time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
			continue;			/* no sleep for us ... */
		cur_timeout = CheckPointTimeout - elapsed_secs;
		if (XLogArchiveTimeout > 0 && !RecoveryInProgress())
		{
			elapsed_secs = now - last_xlog_switch_time;
			if (elapsed_secs >= XLogArchiveTimeout)
				continue;		/* no sleep for us ... */
			cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs);
		}

		rc = WaitLatch(MyLatch,
					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
					   cur_timeout * 1000L /* convert to ms */,
					   WAIT_EVENT_CHECKPOINTER_MAIN);

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (rc & WL_POSTMASTER_DEATH)
			exit(1);
	}
}
예제 #20
0
/* 
 * main entry pont of pcp worker child process
 */
void
pcp_worker_main(int port)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext PCPMemoryContext;
	int authenticated = 0;

	char salt[4];
	int random_salt = 0;
	struct timeval uptime;
	char tos;
	int rsize;
	char *buf = NULL;

	ereport(DEBUG1,
			(errmsg("I am PCP worker child with pid:%d",getpid())));

	/* Identify myself via ps */
	init_ps_display("", "", "", "");

	gettimeofday(&uptime, NULL);
	srandom((unsigned int) (getpid() ^ uptime.tv_usec));

	/* set up signal handlers */
	signal(SIGTERM, die);
	signal(SIGINT, die);
	signal(SIGQUIT, die);
	signal(SIGCHLD, SIG_DFL);
	signal(SIGUSR2, wakeup_handler_child);
	signal(SIGUSR1, SIG_IGN);
	signal(SIGHUP, SIG_IGN);
	signal(SIGPIPE, SIG_IGN);
	signal(SIGALRM, SIG_IGN);
	/* Create per loop iteration memory context */
	PCPMemoryContext = AllocSetContextCreate(TopMemoryContext,
											 "PCP_worker_main_loop",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);

	MemoryContextSwitchTo(TopMemoryContext);

	/*
	 * install the call back for preparation of pcp worker child exit
	 */
	on_system_exit(pcp_worker_will_go_down, (Datum)NULL);

	/* Initialize my backend status */
	pool_initialize_private_backend_status();
	
	/* Initialize process context */
	pool_init_process_context();

	pcp_frontend = pcp_open(port);
	unset_nonblock(pcp_frontend->fd);

	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		error_context_stack = NULL;
		EmitErrorReport();

		MemoryContextSwitchTo(TopMemoryContext);
		FlushErrorState();
	}
	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;
	
	for(;;)
	{
		MemoryContextSwitchTo(PCPMemoryContext);
		MemoryContextResetAndDeleteChildren(PCPMemoryContext);

		errno = 0;

		/* read a PCP packet */
		do_pcp_read(pcp_frontend, &tos, 1);
		do_pcp_read(pcp_frontend, &rsize, sizeof(int));

		rsize = ntohl(rsize);
		if ((rsize - sizeof(int)) > 0)
		{
			buf = (char *)palloc(rsize - sizeof(int));
			do_pcp_read(pcp_frontend, buf, rsize - sizeof(int));
		}

		ereport(DEBUG1,
			(errmsg("received PCP packet"),
				 errdetail("PCP packet type of service '%c'", tos)));

		if (tos == 'R') /* authentication */
		{
			set_ps_display("PCP: processing authentication", false);
			process_authentication(pcp_frontend, buf,salt, &random_salt);
			authenticated = 1;
			continue;
		}
		if (tos == 'M') /* md5 salt */
		{
			set_ps_display("PCP: processing authentication", false);
			send_md5salt(pcp_frontend, salt);
			random_salt = 1;
			continue;
		}
		/* is this connection authenticated? if not disconnect immediately*/
		if (!authenticated)
			ereport(FATAL,
				(errmsg("authentication failed for new PCP connection"),
					 errdetail("connection not authorized")));

		/* process a request */
		pcp_process_command(tos, buf, rsize);
	}
	exit(0);
}
예제 #21
0
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
    sigjmp_buf	local_sigjmp_buf;
    MemoryContext bgwriter_context;

    am_bg_writer = true;

    /*
     * If possible, make this process a group leader, so that the postmaster
     * can signal any child processes too.	(bgwriter probably never has any
     * child processes, but for consistency we make all postmaster child
     * processes do this.)
     */
#ifdef HAVE_SETSID
    if (setsid() < 0)
        elog(FATAL, "setsid() failed: %m");
#endif

    /*
     * Properly accept or ignore signals the postmaster might send us
     *
     * SIGUSR1 is presently unused; keep it spare in case someday we want this
     * process to participate in ProcSignal signalling.
     */
    pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
    pqsignal(SIGINT, SIG_IGN);			/* as of 9.2 no longer requests checkpoint */
    pqsignal(SIGTERM, ReqShutdownHandler); 	/* shutdown */
    pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
    pqsignal(SIGALRM, SIG_IGN);
    pqsignal(SIGPIPE, SIG_IGN);
    pqsignal(SIGUSR1, SIG_IGN);			/* reserve for ProcSignal */
    pqsignal(SIGUSR2, SIG_IGN);

    /*
     * Reset some signals that are accepted by postmaster but not here
     */
    pqsignal(SIGCHLD, SIG_DFL);
    pqsignal(SIGTTIN, SIG_DFL);
    pqsignal(SIGTTOU, SIG_DFL);
    pqsignal(SIGCONT, SIG_DFL);
    pqsignal(SIGWINCH, SIG_DFL);

    /* We allow SIGQUIT (quickdie) at all times */
    sigdelset(&BlockSig, SIGQUIT);

    /*
     * Create a resource owner to keep track of our resources (currently only
     * buffer pins).
     */
    CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

    /*
     * Create a memory context that we will do all our work in.  We do this so
     * that we can reset the context during error recovery and thereby avoid
     * possible memory leaks.  Formerly this code just ran in
     * TopMemoryContext, but resetting that would be a really bad idea.
     */
    bgwriter_context = AllocSetContextCreate(TopMemoryContext,
                       "Background Writer",
                       ALLOCSET_DEFAULT_MINSIZE,
                       ALLOCSET_DEFAULT_INITSIZE,
                       ALLOCSET_DEFAULT_MAXSIZE);
    MemoryContextSwitchTo(bgwriter_context);

    /*
     * If an exception is encountered, processing resumes here.
     *
     * See notes in postgres.c about the design of this coding.
     */
    if (sigsetjmp(local_sigjmp_buf, 1) != 0)
    {
        /* Since not using PG_TRY, must reset error stack by hand */
        error_context_stack = NULL;

        /* Prevent interrupts while cleaning up */
        HOLD_INTERRUPTS();

        /* Report the error to the server log */
        EmitErrorReport();

        /*
         * These operations are really just a minimal subset of
         * AbortTransaction().	We don't have very many resources to worry
         * about in bgwriter, but we do have LWLocks, buffers, and temp files.
         */
        LWLockReleaseAll();
        AbortBufferIO();
        UnlockBuffers();
        /* buffer pins are released here: */
        ResourceOwnerRelease(CurrentResourceOwner,
                             RESOURCE_RELEASE_BEFORE_LOCKS,
                             false, true);
        /* we needn't bother with the other ResourceOwnerRelease phases */
        AtEOXact_Buffers(false);
        AtEOXact_Files();
        AtEOXact_HashTables(false);

        /*
         * Now return to normal top-level context and clear ErrorContext for
         * next time.
         */
        MemoryContextSwitchTo(bgwriter_context);
        FlushErrorState();

        /* Flush any leaked data in the top-level context */
        MemoryContextResetAndDeleteChildren(bgwriter_context);

        /* Now we can allow interrupts again */
        RESUME_INTERRUPTS();

        /*
         * Sleep at least 1 second after any error.  A write error is likely
         * to be repeated, and we don't want to be filling the error logs as
         * fast as we can.
         */
        pg_usleep(1000000L);

        /*
         * Close all open files after any error.  This is helpful on Windows,
         * where holding deleted files open causes various strange errors.
         * It's not clear we need it elsewhere, but shouldn't hurt.
         */
        smgrcloseall();
    }

    /* We can now handle ereport(ERROR) */
    PG_exception_stack = &local_sigjmp_buf;

    /*
     * Unblock signals (they were blocked when the postmaster forked us)
     */
    PG_SETMASK(&UnBlockSig);

    /*
     * Use the recovery target timeline ID during recovery
     */
    if (RecoveryInProgress())
        ThisTimeLineID = GetRecoveryTargetTLI();

    /*
     * Loop forever
     */
    for (;;)
    {
        /*
         * Emergency bailout if postmaster has died.  This is to avoid the
         * necessity for manual cleanup of all postmaster children.
         */
        if (!PostmasterIsAlive())
            exit(1);

        if (got_SIGHUP)
        {
            got_SIGHUP = false;
            ProcessConfigFile(PGC_SIGHUP);
            /* update global shmem state for sync rep */
        }
        if (shutdown_requested)
        {
            /*
             * From here on, elog(ERROR) should end with exit(1), not send
             * control back to the sigsetjmp block above
             */
            ExitOnAnyError = true;
            /* Normal exit from the bgwriter is here */
            proc_exit(0);		/* done */
        }

        /*
         * Do one cycle of dirty-buffer writing.
         */
        BgBufferSync();

        /* Nap for the configured time. */
        BgWriterNap();
    }
}
예제 #22
0
/**
 * @brief Read the next tuple from parser.
 * @param rd  [in/out] reader
 * @return type
 */
HeapTuple
ReaderNext(Reader *rd)
{
	HeapTuple		tuple;
	MemoryContext	ccxt;
	bool			eof;
	Parser		   *parser = rd->parser;

	ccxt = CurrentMemoryContext;

	eof = false;
	do
	{
		tuple = NULL;
		parser->parsing_field = -1;

		PG_TRY();
		{
			tuple = ParserRead(parser, &rd->checker);
			if (tuple == NULL)
				eof = true;
			else
			{
				tuple = CheckerTuple(&rd->checker, tuple,
									 &parser->parsing_field);
				CheckerConstraints(&rd->checker, tuple, &parser->parsing_field);
			}
		}
		PG_CATCH();
		{
			ErrorData	   *errdata;
			MemoryContext	ecxt;
			char		   *message;
			StringInfoData	buf;

			if (parser->parsing_field < 0)
				PG_RE_THROW();	/* should not ignore */

			ecxt = MemoryContextSwitchTo(ccxt);
			errdata = CopyErrorData();

			/* We cannot ignore query aborts. */
			switch (errdata->sqlerrcode)
			{
				case ERRCODE_ADMIN_SHUTDOWN:
				case ERRCODE_QUERY_CANCELED:
					MemoryContextSwitchTo(ecxt);
					PG_RE_THROW();
					break;
			}

			/* Absorb parse errors. */
			rd->parse_errors++;
			if (errdata->message)
				message = pstrdup(errdata->message);
			else
				message = "<no error message>";
			FlushErrorState();
			FreeErrorData(errdata);

			initStringInfo(&buf);
			appendStringInfo(&buf, "Parse error Record " int64_FMT
				": Input Record " int64_FMT ": Rejected",
				rd->parse_errors, parser->count);

			if (parser->parsing_field > 0)
				appendStringInfo(&buf, " - column %d", parser->parsing_field);

			appendStringInfo(&buf, ". %s\n", message);

			LoggerLog(WARNING, buf.data);

			/* Terminate if PARSE_ERRORS has been reached. */
			if (rd->parse_errors > rd->max_parse_errors)
			{
				eof = true;
				LoggerLog(WARNING,
					"Maximum parse error count exceeded - " int64_FMT
					" error(s) found in input file\n",
					rd->parse_errors);
			}

			/* output parse bad file. */
			if (rd->parse_fp == NULL)
				if ((rd->parse_fp = AllocateFile(rd->parse_badfile, "w")) == NULL)
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not open parse bad file \"%s\": %m",
									rd->parse_badfile)));

			ParserDumpRecord(parser, rd->parse_fp, rd->parse_badfile);

			MemoryContextReset(ccxt);
			// Without the below line, the regression tests shows the different result on debug-build mode.
			tuple = NULL;
		}
		PG_END_TRY();

	} while (!eof && !tuple);

	BULKLOAD_PROFILE(&prof_reader_parser);
	return tuple;
}
예제 #23
0
파일: utility.c 프로젝트: asdliuquan/orafce
static char*
dbms_utility_format_call_stack(char mode)
{
	MemoryContext oldcontext = CurrentMemoryContext;
	ErrorData *edata;
	ErrorContextCallback *econtext;
	StringInfo   sinfo;

#if PG_VERSION_NUM >= 80400
	errstart(ERROR, __FILE__, __LINE__, PG_FUNCNAME_MACRO, TEXTDOMAIN);
#else
	errstart(ERROR, __FILE__, __LINE__, PG_FUNCNAME_MACRO);
#endif

	MemoryContextSwitchTo(oldcontext);

	for (econtext = error_context_stack;
		 econtext != NULL;
		 econtext = econtext->previous)
		(*econtext->callback) (econtext->arg);

	edata = CopyErrorData();

	FlushErrorState();

	/* Now I wont to parse edata->context to more traditional format */
	/* I am not sure about order */

	sinfo = makeStringInfo();

	switch (mode)
	{
		case 'o':
			appendStringInfoString(sinfo, "----- PL/pgSQL Call Stack -----\n");
			appendStringInfoString(sinfo, "  object     line  object\n");
			appendStringInfoString(sinfo, "  handle   number  name\n");
			break;
	}

	if (edata->context)
	{
		char *start = edata->context;
		while (*start)
		{
			char *oname =  "anonymous object";
			char *line  = "";
			char *eol = strchr(start, '\n');
			Oid fnoid = InvalidOid;

			/* first, solve multilines */
			if (eol)
				*eol = '\0';

			/* first know format */
			if (strncmp(start, "PL/pgSQL function ",18) == 0)
			{
				char *p1, *p2;

				if ((p1 = strstr(start, "function \"")))
				{
					p1 += strlen("function \"");

					if ((p2 = strchr(p1, '"')))
					{
						*p2++ = '\0';
						oname = p1;
						start = p2;
					}
				}
				else if ((p1 = strstr(start, "function ")))
				{
					p1 += strlen("function ");

					if ((p2 = strchr(p1, ')')))
					{
						char c = *++p2;
						*p2 = '\0';

						oname = pstrdup(p1);
						fnoid = DatumGetObjectId(DirectFunctionCall1(regprocedurein,
							CStringGetDatum(oname)));
						*p2 = c;
						start = p2;
					}
				}


				if ((p1 = strstr(start, "line ")))
				{
					int p2i;
					char c;

					p1 += strlen("line ");
					p2i = strspn(p1, "0123456789");

					/* safe separator */
					c = p1[p2i];

					p1[p2i] = '\0';
					line = pstrdup(p1);
					p1[p2i] = c;

					start = p1 + p2i;
				}
			}

			switch (mode)
			{
				case 'o':
					appendStringInfo(sinfo, "%8x    %5s  function %s", (int)fnoid, line, oname);
					break;

				case 'p':
					appendStringInfo(sinfo, "%8d    %5s  function %s", (int)fnoid, line, oname);
					break;

				case 's':
					appendStringInfo(sinfo, "%d,%s,%s", (int)fnoid, line, oname);
					break;
			}

			if (eol)
			{
				start = eol + 1;
				appendStringInfoChar(sinfo, '\n');
			}
			else
				break;
		}

	}

	return sinfo->data;
}
예제 #24
0
SV *
plperl_spi_query(char *query)
{
	SV		   *cursor;

	/*
	 * Execute the query inside a sub-transaction, so we can cope with errors
	 * sanely
	 */
	MemoryContext oldcontext = CurrentMemoryContext;
	ResourceOwner oldowner = CurrentResourceOwner;

	BeginInternalSubTransaction(NULL);
	/* Want to run inside function's memory context */
	MemoryContextSwitchTo(oldcontext);

	PG_TRY();
	{
		void	   *plan;
		Portal		portal = NULL;

		/* Create a cursor for the query */
		plan = SPI_prepare(query, 0, NULL);
		if (plan)
			portal = SPI_cursor_open(NULL, plan, NULL, NULL, false);
		if (portal)
			cursor = newSVpv(portal->name, 0);
		else
			cursor = newSV(0);

		/* Commit the inner transaction, return to outer xact context */
		ReleaseCurrentSubTransaction();
		MemoryContextSwitchTo(oldcontext);
		CurrentResourceOwner = oldowner;

		/*
		 * AtEOSubXact_SPI() should not have popped any SPI context, but just
		 * in case it did, make sure we remain connected.
		 */
		SPI_restore_connection();
	}
	PG_CATCH();
	{
		ErrorData  *edata;

		/* Save error info */
		MemoryContextSwitchTo(oldcontext);
		edata = CopyErrorData();
		FlushErrorState();

		/* Abort the inner transaction */
		RollbackAndReleaseCurrentSubTransaction();
		MemoryContextSwitchTo(oldcontext);
		CurrentResourceOwner = oldowner;

		/*
		 * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
		 * have left us in a disconnected state.  We need this hack to return
		 * to connected state.
		 */
		SPI_restore_connection();

		/* Punt the error to Perl */
		croak("%s", edata->message);

		/* Can't get here, but keep compiler quiet */
		return NULL;
	}
	PG_END_TRY();

	return cursor;
}
예제 #25
0
inline
void *
AbstractionLayer::Allocator::internalAllocate(void *inPtr, const size_t inSize) const {
    // Avoid warning that inPtr is not used if R == NewAllocation
    (void) inPtr;
    
    void *ptr;
    bool errorOccurred = false;
    MemoryContext oldContext = NULL;
    MemoryContext aggContext = NULL;

    if (F == dbal::ReturnNULL) {
        /*
         * HOLD_INTERRUPTS() and RESUME_INTERRUPTS() only change the value of a
         * global variable but have no other side effects. In particular, they
         * do not call CHECK_INTERRUPTS(). Hence, we are save to use these
         * macros outside of a PG_TRY() block.
         */
        HOLD_INTERRUPTS();
    }
        
    PG_TRY(); {
        if (MC == dbal::AggregateContext) {
            if (!AggCheckCallContext(fcinfo, &aggContext))
                errorOccurred = true;
            else {
                oldContext = MemoryContextSwitchTo(aggContext);
                ptr = (R == Reallocation) ? internalRePalloc<ZM>(inPtr, inSize)
                                          : internalPalloc<ZM>(inSize);
                MemoryContextSwitchTo(oldContext);
            }
        } else {
            ptr = R ? internalRePalloc<ZM>(inPtr, inSize)
                    : internalPalloc<ZM>(inSize);
        }
    } PG_CATCH(); {
        if (F == dbal::ReturnNULL) {
            /*
             * This cannot be due to an interrupt, so it's reasonably safe
             * to assume that the PG exception was a pure memory-allocation
             * issue. We ignore the error and flush the error state.
             * Flushing is necessary for leaving the error state (e.g., the memory
             * context is restored).
             */
            FlushErrorState();
            ptr = NULL;
        } else {
            /*
             * PostgreSQL error messages can be stacked. So, it doesn't hurt to add
             * our own message. After unwinding the C++ stack, the PostgreSQL
             * exception will be re-thrown into the PostgreSQL C code.
             *
             * Throwing C++ exceptions inside a PG_CATCH block is not problematic
             * per se, but it is good practise to keep the exception mechanisms clearly
             * separated.
             */

            errorOccurred = true;
        }
    } PG_END_TRY();

    if (errorOccurred) {
        PG_TRY(); {
            // Clean up after ourselves
            if (oldContext != NULL)
                MemoryContextSwitchTo(oldContext);
        } PG_CATCH(); {
            if (F == dbal::ReturnNULL) {
                // We tried to clean up after ourselves. If this fails, we can
                // only ignore the issue.
                FlushErrorState();
            } 
            // Else do nothing. We will add a bad-allocation exception on top of
            // the existing PostgreSQL exception stack.
        } PG_END_TRY();
    }
    
    if (errorOccurred || !ptr)
        // We do not want to interleave PG exceptions and C++ exceptions.
        throw std::bad_alloc();

    if (F == dbal::ReturnNULL) {
        RESUME_INTERRUPTS();
    }
   
    return ptr;
}
예제 #26
0
static void
WalSendServerDoRequest(WalSendRequest *walSendRequest)
{
	bool successful;
	struct timeval standbyTimeout;

	WalSendServerGetStandbyTimeout(&standbyTimeout);
	
	switch (walSendRequest->command)
	{
	case PositionToEnd:
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "PositionToEnd");

		successful = write_position_to_end(&originalEndLocation,
			                               NULL, &walsend_shutdown_requested);
		if (successful)
			elog(LOG,"Standby master returned transaction log end location %s",
				 XLogLocationToString(&originalEndLocation));
		else
		{
			disableQDMirroring_ConnectionError(
				"Unable to connect to standby master and determine transaction log end location",
				GetStandbyErrorString());
			disconnectMirrorQD_SendClose();
		}
		break;
		
	case Catchup:
		elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Catchup");

        if (isQDMirroringCatchingUp())
    	{
    		bool tooFarBehind = false;

			elog(LOG,"Current master transaction log is flushed through location %s",
				 XLogLocationToString(&walSendRequest->flushedLocation));
			
			if (XLByteLT(originalEndLocation, walSendRequest->flushedLocation))
			{
				/*
				 * Standby master is behind the primary.  Send catchup WAL.
				 */
				 
				/* 
				 * Use a TRY block to catch errors from our attempt to read
				 * the primary's WAL.  Errors from sending to the standby
				 * come up as a boolean return (successful).
				 */
				PG_TRY();
				{
					successful = XLogCatchupQDMirror(
									&originalEndLocation, 
									&walSendRequest->flushedLocation,
									&standbyTimeout,
									&walsend_shutdown_requested);
				}
				PG_CATCH();
				{
					/* 
					 * Report the error related to reading the primary's WAL
					 * to the server log 
					 */
					 
					/* 
					 * But first demote the error to something much less
					 * scary.
					 */
				    if (!elog_demote(WARNING))
			    	{
			    		elog(LOG,"unable to demote error");
			        	PG_RE_THROW();
			    	}
					
					EmitErrorReport();
					FlushErrorState();

					successful = false;
					tooFarBehind = true;
				}
				PG_END_TRY();
				
				if (successful)
				{
					elog((Debug_print_qd_mirroring ? LOG : DEBUG5),
						 "catchup send from standby end %s through primary flushed location %s",
						 XLogLocationToString(&originalEndLocation),
						 XLogLocationToString2(&walSendRequest->flushedLocation));
				}

			}
			else if (XLByteEQ(originalEndLocation, walSendRequest->flushedLocation))
			{
				elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Mirror was already caught up");
				successful = true;
			}
			else
			{
				elog(WARNING,"Standby master transaction log location %s is beyond the current master end location %s",
				     XLogLocationToString(&originalEndLocation),
				     XLogLocationToString2(&walSendRequest->flushedLocation));
				successful = false;
			}
			
			if (successful)
			{
				char detail[200];
				int count;
				
				count = snprintf(
							 detail, sizeof(detail),
							 "Transaction log copied from locations %s through %s to the standby master",
						     XLogLocationToString(&originalEndLocation),
						     XLogLocationToString2(&walSendRequest->flushedLocation));
				if (count >= sizeof(detail))
				{
					ereport(ERROR,
							(errcode(ERRCODE_INTERNAL_ERROR),
							 errmsg("format command string failure")));
				}

				enableQDMirroring("Master mirroring is now synchronized", detail);

				currentEndLocation = walSendRequest->flushedLocation;

				periodicLen = 0;
				periodicLocation = currentEndLocation;
			}
			else
			{
				if (tooFarBehind)
				{
					disableQDMirroring_TooFarBehind(
						"The current master was unable to synchronize the standby master "
						"because the transaction logs on the current master were recycled.  "
						"A gpinitstandby (at an appropriate time) will be necessary to copy "
						"over the whole master database to the standby master so it may be synchronized");
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost during transaction log catchup",
						GetStandbyErrorString());
				}
				disconnectMirrorQD_SendClose();
			}
		}
		else if (isQDMirroringDisabled())
		{
			elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not catching-up (state is disabled)");
		}
		else
		{
			elog(ERROR,"unexpected master mirroring state %s",
				 QDMirroringStateString());
		}
		
		break;
		
	case WriteWalPages:
		if (Debug_print_qd_mirroring)
			elog(LOG, "WriteWalPages");
		
        if (isQDMirroringEnabled())
        {
			char	   *from;
			Size		nbytes;
			bool		more= false;

			/*
			 * For now, save copy of data until flush.  This could be
			 * optimized.
			 */
			if (saveBuffer == NULL)
			{
				uint32 totalBufferLen = XLOGbuffers * XLOG_BLCKSZ;
				
				saveBuffer = malloc(totalBufferLen);
				if (saveBuffer == NULL)
					elog(ERROR,"Could not allocate buffer for xlog data (%d bytes)",
					     totalBufferLen);
				
				saveBufferLen = 0;
			}

			XLogGetBuffer(walSendRequest->startidx, walSendRequest->npages,
				          &from, &nbytes);

			if (saveBufferLen == 0)
			{
				more = false;
				writeLogId = walSendRequest->logId;
				writeLogSeg = walSendRequest->logSeg;
				writeLogOff = walSendRequest->logOff;

				memcpy(saveBuffer, from, nbytes);
				saveBufferLen = nbytes;
			}
			else
			{
				more = true;
				memcpy(&saveBuffer[saveBufferLen], from, nbytes);
				saveBufferLen += nbytes;
			}
			
			if (Debug_print_qd_mirroring)
				elog(LOG,
					 "Master Mirror Send: WriteWalPages (%s) startidx %d, npages %d, timeLineID %d, logId %u, logSeg %u, logOff 0x%X, nbytes 0x%X",
					 (more ? "more" : "new"),
					 walSendRequest->startidx,
					 walSendRequest->npages,
					 walSendRequest->timeLineID,
					 walSendRequest->logId,
					 walSendRequest->logSeg,
					 walSendRequest->logOff,
					 (int)nbytes);
    	}

	case FlushWalPages:
		if (Debug_print_qd_mirroring)
			elog(LOG, "FlushWalPages");
		
        if (isQDMirroringEnabled())
        {
			char 		cmd[MAXFNAMELEN + 50];

			if (saveBufferLen == 0)
				successful = true;
			else
			{
				if (snprintf(cmd, sizeof(cmd),"xlog %d %d %d %d", 
							 writeLogId, writeLogSeg, writeLogOff, 
							 (int)saveBufferLen) >= sizeof(cmd))
					elog(ERROR,"could not create cmd for qd mirror logid %d seg %d", 
					     writeLogId, writeLogSeg);
				
				successful = write_qd_sync(cmd, saveBuffer, saveBufferLen, 
							               &standbyTimeout,
							               &walsend_shutdown_requested);
				if (successful)
				{
					XLogRecPtr oldEndLocation;
					
					oldEndLocation = currentEndLocation;

					currentEndLocation.xlogid = writeLogId;
					currentEndLocation.xrecoff = writeLogSeg * XLogSegSize + writeLogOff;
					if (currentEndLocation.xrecoff >= XLogFileSize)
					{
						(currentEndLocation.xlogid)++;
						currentEndLocation.xrecoff = 0;
					}

					if (XLByteLT(oldEndLocation,currentEndLocation))
					{
						periodicLen += saveBufferLen;
						if (periodicLen > periodicReportLen)
						{
							elog(LOG,
								 "Master mirroring periodic report: %d bytes successfully send to standby master for locations %s through %s",
								 periodicLen,
								 XLogLocationToString(&periodicLocation),
								 XLogLocationToString2(&currentEndLocation));

							periodicLen = 0;
							periodicLocation = currentEndLocation;
						}
					}
					else
					{
						if (Debug_print_qd_mirroring)
							elog(LOG,
							     "Send to Master mirror successful.  New end location %s (old %s)",
							     XLogLocationToString(&currentEndLocation),
							     XLogLocationToString2(&oldEndLocation));
					}
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost attempting to send new transaction log",
						GetStandbyErrorString());
					disconnectMirrorQD_SendClose();
				}

				/*
				 * Reset so WriteWalPages can fill the buffer again.
				 */
				saveBufferLen = 0;
				writeLogId = 0;
				writeLogSeg = 0;
				writeLogOff = 0;
			}
			
			if (successful && walSendRequest->haveNewCheckpointLocation)
			{
				uint32 logid;
				uint32 seg;
				uint32 offset;
				
	        	elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"New previous checkpoint location %s",
				     XLogLocationToString(&walSendRequest->newCheckpointLocation));
				XLByteToSeg(walSendRequest->newCheckpointLocation, logid, seg);
				offset = walSendRequest->newCheckpointLocation.xrecoff % XLogSegSize;
				
				if (snprintf(cmd, sizeof(cmd),"new_checkpoint_location %d %d %d", 
							 logid, seg, offset) >= sizeof(cmd))
					elog(ERROR,"could not create cmd for qd mirror logid %d seg %d offset %d", 
					     logid, seg, offset);
				
				successful = write_qd_sync(cmd, NULL, 0, 
					                       NULL, &walsend_shutdown_requested);
				if (successful)
				{
					elog((Debug_print_qd_mirroring ? LOG : DEBUG5),"Send of new checkpoint location to master mirror successful");
				}
				else
				{
					disableQDMirroring_ConnectionError(
						"Connection to the standby master was lost attempting to send new checkpoint location",
						GetStandbyErrorString());
					disconnectMirrorQD_SendClose();
				}
			}
			
        }
		else if (isQDMirroringDisabled())
		{
			elog((Debug_print_qd_mirroring ? LOG : DEBUG5), "Master Mirror Send: Master mirroring not enabled");
		}
		else
		{
			elog(ERROR,"unexpected master mirroring state %s",
				 QDMirroringStateString());
		}
		
		break;

	case CloseForShutdown:
		if (Debug_print_qd_mirroring)
			elog(LOG, "CloseForShutdown");

		/*
		 * Do the work we would normally do when signaled to stop.
		 */
		WalSendServer_ServiceShutdown();
		break;

	default:
		elog(ERROR, "Unknown WalSendRequestCommand %d", walSendRequest->command);
	}

}
예제 #27
0
HV *
plperl_spi_exec(char *query, int limit)
{
	HV		   *ret_hv;

	/*
	 * Execute the query inside a sub-transaction, so we can cope with errors
	 * sanely
	 */
	MemoryContext oldcontext = CurrentMemoryContext;
	ResourceOwner oldowner = CurrentResourceOwner;

	BeginInternalSubTransaction(NULL);
	/* Want to run inside function's memory context */
	MemoryContextSwitchTo(oldcontext);

	PG_TRY();
	{
		int			spi_rv;

		spi_rv = SPI_execute(query, plperl_current_prodesc->fn_readonly,
							 limit);
		ret_hv = plperl_spi_execute_fetch_result(SPI_tuptable, SPI_processed,
												 spi_rv);

		/* Commit the inner transaction, return to outer xact context */
		ReleaseCurrentSubTransaction();
		MemoryContextSwitchTo(oldcontext);
		CurrentResourceOwner = oldowner;

		/*
		 * AtEOSubXact_SPI() should not have popped any SPI context, but just
		 * in case it did, make sure we remain connected.
		 */
		SPI_restore_connection();
	}
	PG_CATCH();
	{
		ErrorData  *edata;

		/* Save error info */
		MemoryContextSwitchTo(oldcontext);
		edata = CopyErrorData();
		FlushErrorState();

		/* Abort the inner transaction */
		RollbackAndReleaseCurrentSubTransaction();
		MemoryContextSwitchTo(oldcontext);
		CurrentResourceOwner = oldowner;

		/*
		 * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
		 * have left us in a disconnected state.  We need this hack to return
		 * to connected state.
		 */
		SPI_restore_connection();

		/* Punt the error to Perl */
		croak("%s", edata->message);

		/* Can't get here, but keep compiler quiet */
		return NULL;
	}
	PG_END_TRY();

	return ret_hv;
}
예제 #28
0
파일: walwriter.c 프로젝트: GisKook/Gis
/*
 * Main entry point for walwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
WalWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext walwriter_context;

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(walwriter probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * We have no particular use for SIGINT at the moment, but seems
	 * reasonable to treat like SIGTERM.
	 */
	pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
	pqsignal(SIGINT, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGTERM, WalShutdownHandler);		/* request shutdown */
	pqsignal(SIGQUIT, wal_quickdie);	/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
	pqsignal(SIGUSR2, SIG_IGN); /* not used */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
	sigdelset(&BlockSig, SIGQUIT);

	/*
	 * Create a resource owner to keep track of our resources (not clear that
	 * we need this, but may as well have one).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	walwriter_context = AllocSetContextCreate(TopMemoryContext,
											  "Wal Writer",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(walwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * This code is heavily based on bgwriter.c, q.v.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().	We don't have very many resources to worry
		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(walwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(walwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Loop forever
	 */
	for (;;)
	{
		long		udelay;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Process any requests or signals received recently.
		 */
		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (shutdown_requested)
		{
			/* Normal exit from the walwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Do what we're here for...
		 */
		XLogBackgroundFlush();

		/*
		 * Delay until time to do something more, but fall out of delay
		 * reasonably quickly if signaled.
		 */
		udelay = WalWriterDelay * 1000L;
		while (udelay > 999999L)
		{
			if (got_SIGHUP || shutdown_requested)
				break;
			pg_usleep(1000000L);
			udelay -= 1000000L;
		}
		if (!(got_SIGHUP || shutdown_requested))
			pg_usleep(udelay);
	}
}
예제 #29
0
SV *
plperl_spi_fetchrow(char *cursor)
{
	SV		   *row;

	/*
	 * Execute the FETCH inside a sub-transaction, so we can cope with errors
	 * sanely
	 */
	MemoryContext oldcontext = CurrentMemoryContext;
	ResourceOwner oldowner = CurrentResourceOwner;

	BeginInternalSubTransaction(NULL);
	/* Want to run inside function's memory context */
	MemoryContextSwitchTo(oldcontext);

	PG_TRY();
	{
		Portal		p = SPI_cursor_find(cursor);

		if (!p)
			row = newSV(0);
		else
		{
			SPI_cursor_fetch(p, true, 1);
			if (SPI_processed == 0)
			{
				SPI_cursor_close(p);
				row = newSV(0);
			}
			else
			{
				row = plperl_hash_from_tuple(SPI_tuptable->vals[0],
											 SPI_tuptable->tupdesc);
			}
			SPI_freetuptable(SPI_tuptable);
		}

		/* Commit the inner transaction, return to outer xact context */
		ReleaseCurrentSubTransaction();
		MemoryContextSwitchTo(oldcontext);
		CurrentResourceOwner = oldowner;

		/*
		 * AtEOSubXact_SPI() should not have popped any SPI context, but just
		 * in case it did, make sure we remain connected.
		 */
		SPI_restore_connection();
	}
	PG_CATCH();
	{
		ErrorData  *edata;

		/* Save error info */
		MemoryContextSwitchTo(oldcontext);
		edata = CopyErrorData();
		FlushErrorState();

		/* Abort the inner transaction */
		RollbackAndReleaseCurrentSubTransaction();
		MemoryContextSwitchTo(oldcontext);
		CurrentResourceOwner = oldowner;

		/*
		 * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
		 * have left us in a disconnected state.  We need this hack to return
		 * to connected state.
		 */
		SPI_restore_connection();

		/* Punt the error to Perl */
		croak("%s", edata->message);

		/* Can't get here, but keep compiler quiet */
		return NULL;
	}
	PG_END_TRY();

	return row;
}
예제 #30
0
/*
 * Main entry point for bgwriter process
 *
 * This is invoked from BootstrapMain, which has already created the basic
 * execution environment, but not enabled signals yet.
 */
void
BackgroundWriterMain(void)
{
	sigjmp_buf	local_sigjmp_buf;
	MemoryContext bgwriter_context;

	BgWriterShmem->bgwriter_pid = MyProcPid;
	am_bg_writer = true;

	/*
	 * If possible, make this process a group leader, so that the postmaster
	 * can signal any child processes too.	(bgwriter probably never has any
	 * child processes, but for consistency we make all postmaster child
	 * processes do this.)
	 */
#ifdef HAVE_SETSID
	if (setsid() < 0)
		elog(FATAL, "setsid() failed: %m");
#endif

	/*
	 * Properly accept or ignore signals the postmaster might send us
	 *
	 * Note: we deliberately ignore SIGTERM, because during a standard Unix
	 * system shutdown cycle, init will SIGTERM all processes at once.	We
	 * want to wait for the backends to exit, whereupon the postmaster will
	 * tell us it's okay to shut down (via SIGUSR2).
	 *
	 * SIGUSR1 is presently unused; keep it spare in case someday we want this
	 * process to participate in sinval messaging.
	 */
	pqsignal(SIGHUP, BgSigHupHandler);	/* set flag to read config file */
	pqsignal(SIGINT, ReqCheckpointHandler);		/* request checkpoint */
	pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
	pqsignal(SIGALRM, SIG_IGN);
	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
	pqsignal(SIGUSR2, ReqShutdownHandler);		/* request shutdown */

	/*
	 * Reset some signals that are accepted by postmaster but not here
	 */
	pqsignal(SIGCHLD, SIG_DFL);
	pqsignal(SIGTTIN, SIG_DFL);
	pqsignal(SIGTTOU, SIG_DFL);
	pqsignal(SIGCONT, SIG_DFL);
	pqsignal(SIGWINCH, SIG_DFL);

	/* We allow SIGQUIT (quickdie) at all times */
#ifdef HAVE_SIGPROCMASK
	sigdelset(&BlockSig, SIGQUIT);
#else
	BlockSig &= ~(sigmask(SIGQUIT));
#endif

	/*
	 * Initialize so that first time-driven event happens at the correct time.
	 */
	last_checkpoint_time = last_xlog_switch_time = time(NULL);

	/*
	 * Create a resource owner to keep track of our resources (currently only
	 * buffer pins).
	 */
	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");

	/*
	 * Create a memory context that we will do all our work in.  We do this so
	 * that we can reset the context during error recovery and thereby avoid
	 * possible memory leaks.  Formerly this code just ran in
	 * TopMemoryContext, but resetting that would be a really bad idea.
	 */
	bgwriter_context = AllocSetContextCreate(TopMemoryContext,
											 "Background Writer",
											 ALLOCSET_DEFAULT_MINSIZE,
											 ALLOCSET_DEFAULT_INITSIZE,
											 ALLOCSET_DEFAULT_MAXSIZE);
	MemoryContextSwitchTo(bgwriter_context);

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Since not using PG_TRY, must reset error stack by hand */
		error_context_stack = NULL;

		/* Prevent interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
		 * These operations are really just a minimal subset of
		 * AbortTransaction().	We don't have very many resources to worry
		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
		 */
		LWLockReleaseAll();
		AbortBufferIO();
		UnlockBuffers();
		/* buffer pins are released here: */
		ResourceOwnerRelease(CurrentResourceOwner,
							 RESOURCE_RELEASE_BEFORE_LOCKS,
							 false, true);
		/* we needn't bother with the other ResourceOwnerRelease phases */
		AtEOXact_Buffers(false);
		AtEOXact_Files();
		AtEOXact_HashTables(false);

		/* Warn any waiting backends that the checkpoint failed. */
		if (ckpt_active)
		{
			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_failed++;
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			ckpt_active = false;
		}

		/*
		 * Now return to normal top-level context and clear ErrorContext for
		 * next time.
		 */
		MemoryContextSwitchTo(bgwriter_context);
		FlushErrorState();

		/* Flush any leaked data in the top-level context */
		MemoryContextResetAndDeleteChildren(bgwriter_context);

		/* Now we can allow interrupts again */
		RESUME_INTERRUPTS();

		/*
		 * Sleep at least 1 second after any error.  A write error is likely
		 * to be repeated, and we don't want to be filling the error logs as
		 * fast as we can.
		 */
		pg_usleep(1000000L);

		/*
		 * Close all open files after any error.  This is helpful on Windows,
		 * where holding deleted files open causes various strange errors.
		 * It's not clear we need it elsewhere, but shouldn't hurt.
		 */
		smgrcloseall();
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	/*
	 * Unblock signals (they were blocked when the postmaster forked us)
	 */
	PG_SETMASK(&UnBlockSig);

	/*
	 * Loop forever
	 */
	for (;;)
	{
		bool		do_checkpoint = false;
		int			flags = 0;
		time_t		now;
		int			elapsed_secs;

		/*
		 * Emergency bailout if postmaster has died.  This is to avoid the
		 * necessity for manual cleanup of all postmaster children.
		 */
		if (!PostmasterIsAlive(true))
			exit(1);

		/*
		 * Process any requests or signals received recently.
		 */
		AbsorbFsyncRequests();

		if (got_SIGHUP)
		{
			got_SIGHUP = false;
			ProcessConfigFile(PGC_SIGHUP);
		}
		if (checkpoint_requested)
		{
			checkpoint_requested = false;
			do_checkpoint = true;
			BgWriterStats.m_requested_checkpoints++;
		}
		if (shutdown_requested)
		{
			/*
			 * From here on, elog(ERROR) should end with exit(1), not send
			 * control back to the sigsetjmp block above
			 */
			ExitOnAnyError = true;
			/* Close down the database */
			ShutdownXLOG(0, 0);
			DumpFreeSpaceMap(0, 0);
			/* Normal exit from the bgwriter is here */
			proc_exit(0);		/* done */
		}

		/*
		 * Force a checkpoint if too much time has elapsed since the last one.
		 * Note that we count a timed checkpoint in stats only when this
		 * occurs without an external request, but we set the CAUSE_TIME flag
		 * bit even if there is also an external request.
		 */
		now = time(NULL);
		elapsed_secs = now - last_checkpoint_time;
		if (elapsed_secs >= CheckPointTimeout)
		{
			if (!do_checkpoint)
				BgWriterStats.m_timed_checkpoints++;
			do_checkpoint = true;
			flags |= CHECKPOINT_CAUSE_TIME;
		}

		/*
		 * Do a checkpoint if requested, otherwise do one cycle of
		 * dirty-buffer writing.
		 */
		if (do_checkpoint)
		{
			/* use volatile pointer to prevent code rearrangement */
			volatile BgWriterShmemStruct *bgs = BgWriterShmem;

			/*
			 * Atomically fetch the request flags to figure out what kind of a
			 * checkpoint we should perform, and increase the started-counter
			 * to acknowledge that we've started a new checkpoint.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			flags |= bgs->ckpt_flags;
			bgs->ckpt_flags = 0;
			bgs->ckpt_started++;
			SpinLockRelease(&bgs->ckpt_lck);

			/*
			 * We will warn if (a) too soon since last checkpoint (whatever
			 * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
			 * since the last checkpoint start.  Note in particular that this
			 * implementation will not generate warnings caused by
			 * CheckPointTimeout < CheckPointWarning.
			 */
			if ((flags & CHECKPOINT_CAUSE_XLOG) &&
				elapsed_secs < CheckPointWarning)
				ereport(LOG,
						(errmsg("checkpoints are occurring too frequently (%d seconds apart)",
								elapsed_secs),
						 errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));

			/*
			 * Initialize bgwriter-private variables used during checkpoint.
			 */
			ckpt_active = true;
			ckpt_start_recptr = GetInsertRecPtr();
			ckpt_start_time = now;
			ckpt_cached_elapsed = 0;

			/*
			 * Do the checkpoint.
			 */
			CreateCheckPoint(flags);

			/*
			 * After any checkpoint, close all smgr files.	This is so we
			 * won't hang onto smgr references to deleted files indefinitely.
			 */
			smgrcloseall();

			/*
			 * Indicate checkpoint completion to any waiting backends.
			 */
			SpinLockAcquire(&bgs->ckpt_lck);
			bgs->ckpt_done = bgs->ckpt_started;
			SpinLockRelease(&bgs->ckpt_lck);

			ckpt_active = false;

			/*
			 * Note we record the checkpoint start time not end time as
			 * last_checkpoint_time.  This is so that time-driven checkpoints
			 * happen at a predictable spacing.
			 */
			last_checkpoint_time = now;
		}
		else
			BgBufferSync();

		/* Check for archive_timeout and switch xlog files if necessary. */
		CheckArchiveTimeout();

		/* Nap for the configured time. */
		BgWriterNap();
	}
}