Пример #1
0
void
do_bdr_unregister(void)
{
	PGconn	   *conn = NULL;
	ExtensionStatus extension_status = REPMGR_UNKNOWN;
	int			target_node_id = UNKNOWN_NODE_ID;
	t_node_info node_info = T_NODE_INFO_INITIALIZER;
	RecordStatus record_status = RECORD_NOT_FOUND;
	bool		node_record_deleted = false;
	PQExpBufferData event_details;
	char	   *dbname;

	/* sanity-check configuration for BDR-compatability */

	if (config_file_options.replication_type != REPLICATION_TYPE_BDR)
	{
		log_error(_("cannot run BDR UNREGISTER on a non-BDR node"));
		exit(ERR_BAD_CONFIG);
	}

	dbname = pg_malloc0(MAXLEN);

	if (dbname == NULL)
	{
		log_error(_("unable to allocate memory; terminating."));
		exit(ERR_OUT_OF_MEMORY);
	}

	/* store the database name for future reference */
	get_conninfo_value(config_file_options.conninfo, "dbname", dbname);

	conn = establish_db_connection(config_file_options.conninfo, true);

	if (!is_bdr_db(conn, NULL))
	{
		log_error(_("database \"%s\" is not BDR-enabled"), dbname);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	extension_status = get_repmgr_extension_status(conn, NULL);
	if (extension_status != REPMGR_INSTALLED)
	{
		log_error(_("repmgr is not installed on database \"%s\""), dbname);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	pfree(dbname);

	if (!is_bdr_repmgr(conn))
	{
		log_error(_("repmgr metadatabase contains records for non-BDR nodes"));
		PQfinish(conn);
		exit(ERR_BAD_CONFIG);
	}

	initPQExpBuffer(&event_details);
	if (runtime_options.node_id != UNKNOWN_NODE_ID)
		target_node_id = runtime_options.node_id;
	else
		target_node_id = config_file_options.node_id;


	/* Check node exists and is really a BDR node */
	record_status = get_node_record(conn, target_node_id, &node_info);

	if (record_status != RECORD_FOUND)
	{
		log_error(_("no record found for node %i"), target_node_id);
		PQfinish(conn);
		exit(ERR_BAD_CONFIG);
	}

	begin_transaction(conn);

	log_debug("unregistering node %i", target_node_id);

	node_record_deleted = delete_node_record(conn, target_node_id);

	if (node_record_deleted == false)
	{
		appendPQExpBuffer(&event_details,
						  "unable to delete node record for node \"%s\" (ID: %i)",
						  node_info.node_name,
						  target_node_id);
		rollback_transaction(conn);
	}
	else
	{
		appendPQExpBuffer(&event_details,
						  "node record deleted for node \"%s\" (ID: %i)",
						  node_info.node_name,
						  target_node_id);
		commit_transaction(conn);
	}


	/* Log the event */
	create_event_notification(
							  conn,
							  &config_file_options,
							  config_file_options.node_id,
							  "bdr_unregister",
							  true,
							  event_details.data);

	PQfinish(conn);

	log_notice(_("bdr node \"%s\" (ID: %i) successfully unregistered"),
			   node_info.node_name, target_node_id);

	termPQExpBuffer(&event_details);

	return;
}
Пример #2
0
bool
reload_config(t_configuration_options *orig_options)
{
	PGconn	   *conn;
	t_configuration_options new_options;
	bool	  config_changed = false;

	/*
	 * Re-read the configuration file: repmgr.conf
	 */
	log_info(_("reloading configuration file and updating repmgr tables\n"));

	parse_config(&new_options);
	if (new_options.node == -1)
	{
		log_warning(_("unable to parse new configuration, retaining current configuration\n"));
		return false;
	}

	if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
	{
		log_warning(_("unable to change cluster name, retaining current configuration\n"));
		return false;
	}

	if (new_options.node != orig_options->node)
	{
		log_warning(_("unable to change node ID, retaining current configuration\n"));
		return false;
	}

	if (strcmp(new_options.node_name, orig_options->node_name) != 0)
	{
		log_warning(_("unable to change standby name, keeping current configuration\n"));
		return false;
	}

	if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
	{
		log_warning(_("new value for 'failover' must be 'automatic' or 'manual'\n"));
		return false;
	}

	if (new_options.master_response_timeout <= 0)
	{
		log_warning(_("new value for 'master_response_timeout' must be greater than zero\n"));
		return false;
	}

	if (new_options.reconnect_attempts < 0)
	{
		log_warning(_("new value for 'reconnect_attempts' must be zero or greater\n"));
		return false;
	}

	if (new_options.reconnect_interval < 0)
	{
		log_warning(_("new value for 'reconnect_interval' must be zero or greater\n"));
		return false;
	}

	if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
	{
		/* Test conninfo string */
		conn = establish_db_connection(new_options.conninfo, false);
		if (!conn || (PQstatus(conn) != CONNECTION_OK))
		{
			log_warning(_("'conninfo' string is not valid, retaining current configuration\n"));
			return false;
		}
		PQfinish(conn);
	}

	/*
	 * No configuration problems detected - copy any changed values
	 *
	 * NB: keep these in the same order as in config.h to make it easier
	 * to manage them
	 */

	/* cluster_name */
	if (strcmp(orig_options->cluster_name, new_options.cluster_name) != 0)
	{
		strcpy(orig_options->cluster_name, new_options.cluster_name);
		config_changed = true;
	}

	/* conninfo */
	if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
	{
		strcpy(orig_options->conninfo, new_options.conninfo);
		config_changed = true;
	}

	/* node */
	if (orig_options->node != new_options.node)
	{
		orig_options->node = new_options.node;
		config_changed = true;
	}

	/* failover */
	if (orig_options->failover != new_options.failover)
	{
		orig_options->failover = new_options.failover;
		config_changed = true;
	}

	/* priority */
	if (orig_options->priority != new_options.priority)
	{
		orig_options->priority = new_options.priority;
		config_changed = true;
	}

	/* node_name */
	if (strcmp(orig_options->node_name, new_options.node_name) != 0)
	{
		strcpy(orig_options->node_name, new_options.node_name);
		config_changed = true;
	}

	/* promote_command */
	if (strcmp(orig_options->promote_command, new_options.promote_command) != 0)
	{
		strcpy(orig_options->promote_command, new_options.promote_command);
		config_changed = true;
	}

	/* follow_command */
	if (strcmp(orig_options->follow_command, new_options.follow_command) != 0)
	{
		strcpy(orig_options->follow_command, new_options.follow_command);
		config_changed = true;
	}

	/*
	 * XXX These ones can change with a simple SIGHUP?
	 *
	 * strcpy (orig_options->loglevel, new_options.loglevel); strcpy
	 * (orig_options->logfacility, new_options.logfacility);
	 *
	 * logger_shutdown(); XXX do we have progname here ? logger_init(progname,
	 * orig_options.loglevel, orig_options.logfacility);
	 */

	/* rsync_options */
	if (strcmp(orig_options->rsync_options, new_options.rsync_options) != 0)
	{
		strcpy(orig_options->rsync_options, new_options.rsync_options);
		config_changed = true;
	}

	/* ssh_options */
	if (strcmp(orig_options->ssh_options, new_options.ssh_options) != 0)
	{
		strcpy(orig_options->ssh_options, new_options.ssh_options);
		config_changed = true;
	}

	/* master_response_timeout */
	if (orig_options->master_response_timeout != new_options.master_response_timeout)
	{
		orig_options->master_response_timeout = new_options.master_response_timeout;
		config_changed = true;
	}

	/* reconnect_attempts */
	if (orig_options->reconnect_attempts != new_options.reconnect_attempts)
	{
		orig_options->reconnect_attempts = new_options.reconnect_attempts;
		config_changed = true;
	}

	/* reconnect_interval */
	if (orig_options->reconnect_interval != new_options.reconnect_interval)
	{
		orig_options->reconnect_interval = new_options.reconnect_interval;
		config_changed = true;
	}

	/* pg_ctl_options */
	if (strcmp(orig_options->pg_ctl_options, new_options.pg_ctl_options) != 0)
	{
		strcpy(orig_options->pg_ctl_options, new_options.pg_ctl_options);
		config_changed = true;
	}

	/* pg_basebackup_options */
	if (strcmp(orig_options->pg_basebackup_options, new_options.pg_basebackup_options) != 0)
	{
		strcpy(orig_options->pg_basebackup_options, new_options.pg_basebackup_options);
		config_changed = true;
	}

	/* monitor_interval_secs */
	if (orig_options->monitor_interval_secs != new_options.monitor_interval_secs)
	{
		orig_options->monitor_interval_secs = new_options.monitor_interval_secs;
		config_changed = true;
	}

	/* retry_promote_interval_secs */
	if (orig_options->retry_promote_interval_secs != new_options.retry_promote_interval_secs)
	{
		orig_options->retry_promote_interval_secs = new_options.retry_promote_interval_secs;
		config_changed = true;
	}

	/* use_replication_slots */
	if (orig_options->use_replication_slots != new_options.use_replication_slots)
	{
		orig_options->use_replication_slots = new_options.use_replication_slots;
		config_changed = true;
	}

	if (config_changed == true)
	{
		log_debug(_("reload_config(): configuration has changed\n"));
	}
	else
	{
		log_debug(_("reload_config(): configuration has not changed\n"));
	}

	return config_changed;
}
Пример #3
0
PGconn *
get_master_connection(PGconn *standby_conn, char *cluster,
					  int *master_id, char *master_conninfo_out)
{
	PGconn	   *master_conn = NULL;
	PGresult   *res1;
	PGresult   *res2;
	char		sqlquery[QUERY_STR_LEN];
	char		master_conninfo_stack[MAXCONNINFO];
	char	   *master_conninfo = &*master_conninfo_stack;

	int			i,
				node_id;

	if (master_id != NULL)
	{
		*master_id = NODE_NOT_FOUND;
	}

	/* find all nodes belonging to this cluster */
	log_info(_("finding node list for cluster '%s'\n"),
			 cluster);

	sqlquery_snprintf(sqlquery,
					  "SELECT id, conninfo "
					  "  FROM %s.repl_nodes "
					  " WHERE cluster = '%s' "
					  "   AND type != 'witness' ",
					  get_repmgr_schema_quoted(standby_conn),
					  cluster);

	res1 = PQexec(standby_conn, sqlquery);
	if (PQresultStatus(res1) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to retrieve node records: %s\n"),
				PQerrorMessage(standby_conn));
		PQclear(res1);
		return NULL;
	}

	for (i = 0; i < PQntuples(res1); i++)
	{
		/* initialize with the values of the current node being processed */
		node_id = atoi(PQgetvalue(res1, i, 0));
		strncpy(master_conninfo, PQgetvalue(res1, i, 1), MAXCONNINFO);
		log_info(_("checking role of cluster node '%i'\n"),
				 node_id);
		master_conn = establish_db_connection(master_conninfo, false);

		if (PQstatus(master_conn) != CONNECTION_OK)
			continue;

		/*
		 * Can't use the is_standby() function here because on error that
		 * function closes the connection passed and exits.  This still needs
		 * to close master_conn first.
		 */
		res2 = PQexec(master_conn, "SELECT pg_catalog.pg_is_in_recovery()");

		if (PQresultStatus(res2) != PGRES_TUPLES_OK)
		{
			log_err(_("unable to retrieve recovery state from this node: %s\n"),
					PQerrorMessage(master_conn));
			PQclear(res2);
			PQfinish(master_conn);
			continue;
		}

		/* if false, this is the master */
		if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
		{
			PQclear(res2);
			PQclear(res1);
			log_debug(_("get_master_connection(): current master node is %i\n"), node_id);

			if (master_id != NULL)
			{
				*master_id = node_id;
			}

			return master_conn;
		}
		else
		{
			/* if it is a standby, clear info */
			PQclear(res2);
			PQfinish(master_conn);
		}
	}

	/*
	 * If we finish this loop without finding a master then we doesn't have
	 * the info or the master has failed (or we reached max_connections or
	 * superuser_reserved_connections, anything else I'm missing?).
	 *
	 * Probably we will need to check the error to know if we need to start
	 * failover procedure or just fix some situation on the standby.
	 */
	PQclear(res1);
	return NULL;
}
Пример #4
0
/*
 * do_bdr_register()
 *
 * As each BDR node is its own primary, registering a BDR node
 * will create the repmgr metadata schema if necessary.
 */
void
do_bdr_register(void)
{
	PGconn	   *conn = NULL;
	BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER;
	ExtensionStatus extension_status = REPMGR_UNKNOWN;
	t_node_info node_info = T_NODE_INFO_INITIALIZER;
	RecordStatus record_status = RECORD_NOT_FOUND;
	PQExpBufferData event_details;
	bool		success = true;
	char	   *dbname = NULL;

	/* sanity-check configuration for BDR-compatability */
	if (config_file_options.replication_type != REPLICATION_TYPE_BDR)
	{
		log_error(_("cannot run BDR REGISTER on a non-BDR node"));
		exit(ERR_BAD_CONFIG);
	}

	dbname = pg_malloc0(MAXLEN);

	if (dbname == NULL)
	{
		log_error(_("unable to allocate memory; terminating."));
		exit(ERR_OUT_OF_MEMORY);
	}

	/* store the database name for future reference */
	get_conninfo_value(config_file_options.conninfo, "dbname", dbname);

	conn = establish_db_connection(config_file_options.conninfo, true);

	if (!is_bdr_db(conn, NULL))
	{
		log_error(_("database \"%s\" is not BDR-enabled"), dbname);
		log_hint(_("when using repmgr with BDR, the repmgr schema must be stored in the BDR database"));
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	/* Check that there are at most 2 BDR nodes */
	get_all_bdr_node_records(conn, &bdr_nodes);

	if (bdr_nodes.node_count == 0)
	{
		log_error(_("database \"%s\" is BDR-enabled but no BDR nodes were found"), dbname);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	/* BDR 2 implementation is for 2 nodes only */
	if (get_bdr_version_num() < 3 && bdr_nodes.node_count > 2)
	{
		log_error(_("repmgr can only support BDR 2.x clusters with 2 nodes"));
		log_detail(_("this BDR cluster has %i nodes"), bdr_nodes.node_count);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	if (get_bdr_version_num() > 2)
	{
		log_error(_("\"repmgr bdr register\" is for BDR 2.x only"));
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}


	/* check for a matching BDR node */
	{
		PQExpBufferData bdr_local_node_name;
		bool		node_match = false;

		initPQExpBuffer(&bdr_local_node_name);
		node_match = bdr_node_name_matches(conn, config_file_options.node_name, &bdr_local_node_name);

		if (node_match == false)
		{
			if (strlen(bdr_local_node_name.data))
			{
				log_error(_("local node BDR node name is \"%s\", expected: \"%s\""),
						  bdr_local_node_name.data,
						  config_file_options.node_name);
				log_hint(_("\"node_name\" in repmgr.conf must match \"node_name\" in bdr.bdr_nodes"));
			}
			else
			{
				log_error(_("local node does not report BDR node name"));
				log_hint(_("ensure this is an active BDR node"));
			}

			PQfinish(conn);
			pfree(dbname);
			termPQExpBuffer(&bdr_local_node_name);
			exit(ERR_BAD_CONFIG);
		}

		termPQExpBuffer(&bdr_local_node_name);
	}

	/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
	extension_status = get_repmgr_extension_status(conn, NULL);

	if (extension_status == REPMGR_UNKNOWN)
	{
		log_error(_("unable to determine status of \"repmgr\" extension in database \"%s\""),
				  dbname);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	if (extension_status == REPMGR_UNAVAILABLE)
	{
		log_error(_("\"repmgr\" extension is not available"));
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	if (extension_status == REPMGR_INSTALLED)
	{
		if (!is_bdr_repmgr(conn))
		{
			log_error(_("repmgr metadatabase contains records for non-BDR nodes"));
			PQfinish(conn);
			pfree(dbname);
			exit(ERR_BAD_CONFIG);
		}
	}
	else
	{
		log_debug("creating repmgr extension in database \"%s\"", dbname);

		begin_transaction(conn);

		if (!create_repmgr_extension(conn))
		{
			log_error(_("unable to create repmgr extension - see preceding error message(s); aborting"));
			rollback_transaction(conn);
			pfree(dbname);
			PQfinish(conn);
			exit(ERR_BAD_CONFIG);
		}

		commit_transaction(conn);
	}

	pfree(dbname);

	if (bdr_node_has_repmgr_set(conn, config_file_options.node_name) == false)
	{
		log_debug("bdr_node_has_repmgr_set() = false");
		bdr_node_set_repmgr_set(conn, config_file_options.node_name);
	}

	/*
	 * before adding the extension tables to the replication set, if any other
	 * BDR nodes exist, populate repmgr.nodes with a copy of existing entries
	 *
	 * currently we won't copy the contents of any other tables
	 *
	 */
	{
		NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER;

		(void) get_all_node_records(conn, &local_node_records);

		if (local_node_records.node_count == 0)
		{
			BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER;
			BdrNodeInfoListCell *bdr_cell = NULL;

			get_all_bdr_node_records(conn, &bdr_nodes);

			if (bdr_nodes.node_count == 0)
			{
				log_error(_("unable to retrieve any BDR node records"));
				log_detail("%s", PQerrorMessage(conn));
				PQfinish(conn);
				exit(ERR_BAD_CONFIG);
			}

			for (bdr_cell = bdr_nodes.head; bdr_cell; bdr_cell = bdr_cell->next)
			{
				PGconn	   *bdr_node_conn = NULL;
				NodeInfoList existing_nodes = T_NODE_INFO_LIST_INITIALIZER;
				NodeInfoListCell *cell = NULL;
				ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;

				/* skip the local node */
				if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0)
				{
					continue;
				}

				log_debug("connecting to BDR node \"%s\" (conninfo: \"%s\")",
						  bdr_cell->node_info->node_name,
						  bdr_cell->node_info->node_local_dsn);
				bdr_node_conn = establish_db_connection_quiet(bdr_cell->node_info->node_local_dsn);

				if (PQstatus(bdr_node_conn) != CONNECTION_OK)
				{
					continue;
				}

				/* check repmgr schema exists, skip if not */
				other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL);

				if (other_node_extension_status != REPMGR_INSTALLED)
				{
					continue;
				}

				(void) get_all_node_records(bdr_node_conn, &existing_nodes);

				for (cell = existing_nodes.head; cell; cell = cell->next)
				{
					log_debug("creating record for node \"%s\" (ID: %i)",
							  cell->node_info->node_name, cell->node_info->node_id);
					create_node_record(conn, "bdr register", cell->node_info);
				}

				PQfinish(bdr_node_conn);
				break;
			}
		}
	}

	/* Add the repmgr extension tables to a replication set */

	if (get_bdr_version_num() < 3)
	{
		add_extension_tables_to_bdr_replication_set(conn);
	}
	else
	{
		/* this is the only table we need to replicate */
		char *replication_set = get_default_bdr_replication_set(conn);

		/*
		 * this probably won't happen, but we need to be sure we're using
		 * the replication set metadata correctly...
		 */
		if (conn == NULL)
		{
			log_error(_("unable to retrieve default BDR replication set"));
			log_hint(_("see preceding messages"));
			log_debug("check query in get_default_bdr_replication_set()");
			exit(ERR_BAD_CONFIG);
		}

		if (is_table_in_bdr_replication_set(conn, "nodes", replication_set) == false)
		{
			add_table_to_bdr_replication_set(conn, "nodes", replication_set);
		}

		pfree(replication_set);
	}

	initPQExpBuffer(&event_details);

	begin_transaction(conn);

	/*
	 * we'll check if a record exists (even if the schema was just created),
	 * as there's a faint chance of a race condition
	 */

	record_status = get_node_record(conn, config_file_options.node_id, &node_info);

	/* Update internal node record */

	node_info.type = BDR;
	node_info.node_id = config_file_options.node_id;
	node_info.upstream_node_id = NO_UPSTREAM_NODE;
	node_info.active = true;
	node_info.priority = config_file_options.priority;

	strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name));
	strncpy(node_info.location, config_file_options.location, sizeof(node_info.location));
	strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo));

	if (record_status == RECORD_FOUND)
	{
		bool		node_updated = false;

		/*
		 * At this point we will have established there are no non-BDR
		 * records, so no need to verify the node type
		 */
		if (!runtime_options.force)
		{
			log_error(_("this node is already registered"));
			log_hint(_("use -F/--force to overwrite the existing node record"));
			rollback_transaction(conn);
			PQfinish(conn);
			exit(ERR_BAD_CONFIG);
		}

		/*
		 * don't permit changing the node name - this must match the BDR node
		 * name set when the node was registered.
		 */

		if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0)
		{
			log_error(_("a record for node %i is already registered with node_name \"%s\""),
					  config_file_options.node_id, node_info.node_name);
			log_hint(_("node_name configured in repmgr.conf is \"%s\""), config_file_options.node_name);

			rollback_transaction(conn);
			PQfinish(conn);
			exit(ERR_BAD_CONFIG);
		}

		node_updated = update_node_record(conn, "bdr register", &node_info);

		if (node_updated == true)
		{
			appendPQExpBuffer(&event_details, _("node record updated for node \"%s\" (%i)"),
							  config_file_options.node_name, config_file_options.node_id);
			log_verbose(LOG_NOTICE, "%s", event_details.data);
		}
		else
		{
			success = false;
		}

	}
	else
	{
		/* create new node record */
		bool		node_created = create_node_record(conn, "bdr register", &node_info);

		if (node_created == true)
		{
			appendPQExpBuffer(&event_details,
							  _("node record created for node \"%s\" (ID: %i)"),
							  config_file_options.node_name, config_file_options.node_id);
			log_notice("%s", event_details.data);
		}
		else
		{
			success = false;
		}
	}

	if (success == false)
	{
		rollback_transaction(conn);
		PQfinish(conn);
		exit(ERR_DB_QUERY);
	}

	commit_transaction(conn);
	/* Log the event */
	create_event_notification(
							  conn,
							  &config_file_options,
							  config_file_options.node_id,
							  "bdr_register",
							  true,
							  event_details.data);

	termPQExpBuffer(&event_details);

	PQfinish(conn);

	log_notice(_("BDR node %i registered (conninfo: %s)"),
			   config_file_options.node_id, config_file_options.conninfo);

	return;
}
Пример #5
0
/*
 * get_upstream_connection()
 *
 * Returns connection to node's upstream node
 *
 * NOTE: will attempt to connect even if node is marked as inactive
 */
PGconn *
get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
						int *upstream_node_id_ptr, char *upstream_conninfo_out)
{
	PGconn	   *upstream_conn = NULL;
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];
	char		upstream_conninfo_stack[MAXCONNINFO];
	char	   *upstream_conninfo = &*upstream_conninfo_stack;

	/*
	 * If the caller wanted to get a copy of the connection info string, sub
	 * out the local stack pointer for the pointer passed by the caller.
	 */
	if (upstream_conninfo_out != NULL)
		upstream_conninfo = upstream_conninfo_out;

	sqlquery_snprintf(sqlquery,
					  "    SELECT un.conninfo, un.name, un.id "
					  "      FROM %s.repl_nodes un "
					  "INNER JOIN %s.repl_nodes n "
					  "        ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
					  "     WHERE n.cluster = '%s' "
					  "       AND n.id = %i ",
					  get_repmgr_schema_quoted(standby_conn),
					  get_repmgr_schema_quoted(standby_conn),
					  cluster,
					  node_id);

	log_debug("get_upstream_connection(): %s\n", sqlquery);

	res = PQexec(standby_conn, sqlquery);

	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to get conninfo for upstream server: %s\n"),
				PQerrorMessage(standby_conn));
		PQclear(res);
		return NULL;
	}

	if (!PQntuples(res))
	{
		log_notice(_("no record found for upstream server"));
		PQclear(res);
		return NULL;
	}

	strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);

	if (upstream_node_id_ptr != NULL)
		*upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 1));

	PQclear(res);

	log_debug("conninfo is: '%s'\n", upstream_conninfo);
	upstream_conn = establish_db_connection(upstream_conninfo, false);

	if (PQstatus(upstream_conn) != CONNECTION_OK)
	{
		log_err(_("unable to connect to upstream node: %s\n"),
				PQerrorMessage(upstream_conn));
		return NULL;
	}

	return upstream_conn;
}
Пример #6
0
static void
do_failover(void)
{
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];

	int			total_nodes = 0;
	int			visible_nodes = 0;
	int			ready_nodes = 0;

	bool		find_best = false;

	int			i;
	int			r;

	uint32		uxlogid;
	uint32		uxrecoff;
	XLogRecPtr	xlog_recptr;

	char		last_wal_standby_applied[MAXLEN];

	PGconn	   *node_conn = NULL;

	/*
	 * will get info about until 50 nodes, which seems to be large enough for
	 * most scenarios
	 */
	t_node_info nodes[50];

	/* initialize to keep compiler quiet */
	t_node_info best_candidate = {-1, "", InvalidXLogRecPtr, false, false, false};

	/* get a list of standby nodes, including myself */
	sprintf(sqlquery, "SELECT id, conninfo, witness "
			"  FROM %s.repl_nodes "
			" WHERE cluster = '%s' "
			" ORDER BY priority, id ",
			repmgr_schema, local_options.cluster_name);

	res = PQexec(my_local_conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(my_local_conn));
		PQclear(res);
		terminate(ERR_DB_QUERY);
	}

	/*
	 * total nodes that are registered
	 */
	total_nodes = PQntuples(res);
	log_debug(_("%s: there are %d nodes registered\n"), progname, total_nodes);

	/*
	 * Build an array with the nodes and indicate which ones are visible and
	 * ready
	 */
	for (i = 0; i < total_nodes; i++)
	{
		nodes[i].node_id = atoi(PQgetvalue(res, i, 0));
		strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXLEN);
		nodes[i].is_witness = (strcmp(PQgetvalue(res, i, 2), "t") == 0) ? true : false;

		/*
		 * Initialize on false so if we can't reach this node we know that
		 * later
		 */
		nodes[i].is_visible = false;
		nodes[i].is_ready = false;

		XLAssignValue(nodes[i].xlog_location, 0, 0);

		log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"),
				  progname, nodes[i].node_id, nodes[i].conninfo_str,
				  (nodes[i].is_witness) ? "true" : "false");

		node_conn = establish_db_connection(nodes[i].conninfo_str, false);

		/* if we can't see the node just skip it */
		if (PQstatus(node_conn) != CONNECTION_OK)
		{
			if (node_conn != NULL)
				PQfinish(node_conn);

			continue;
		}

		visible_nodes++;
		nodes[i].is_visible = true;

		PQfinish(node_conn);
	}
	PQclear(res);

	log_debug(_("Total nodes counted: registered=%d, visible=%d\n"),
			  total_nodes, visible_nodes);

	/*
	 * am i on the group that should keep alive? if i see less than half of
	 * total_nodes then i should do nothing
	 */
	if (visible_nodes < (total_nodes / 2.0))
	{
		log_err(_("Can't reach most of the nodes.\n"
				  "Let the other standby servers decide which one will be the primary.\n"
		"Manual action will be needed to readd this node to the cluster.\n"));
		terminate(ERR_FAILOVER_FAIL);
	}

	/* Query all the nodes to determine which ones are ready */
	for (i = 0; i < total_nodes; i++)
	{
		/* if the node is not visible, skip it */
		if (!nodes[i].is_visible)
			continue;

		if (nodes[i].is_witness)
			continue;

		node_conn = establish_db_connection(nodes[i].conninfo_str, false);

		/*
		 * XXX This shouldn't happen, if this happens it means this is a major
		 * problem maybe network outages? anyway, is better for a human to
		 * react
		 */
		if (PQstatus(node_conn) != CONNECTION_OK)
		{
			log_err(_("It seems new problems are arising, manual intervention is needed\n"));
			terminate(ERR_FAILOVER_FAIL);
		}

		uxlogid = 0;
		uxrecoff = 0;

		sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
		res = PQexec(node_conn, sqlquery);
		if (PQresultStatus(res) != PGRES_TUPLES_OK)
		{
			log_info(_("Can't get node's last standby location: %s\n"),
					 PQerrorMessage(node_conn));
			log_info(_("Connection details: %s\n"), nodes[i].conninfo_str);
			PQclear(res);
			PQfinish(node_conn);
			terminate(ERR_FAILOVER_FAIL);
		}

		if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
			log_info(_("could not parse transaction log location \"%s\"\n"),
					 PQgetvalue(res, 0, 0));

		log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n",
				  nodes[i].node_id, uxlogid, uxlogid, uxrecoff, uxrecoff);

		/* If position is 0/0, error */
		if (uxlogid == 0 && uxrecoff == 0)
		{
			PQclear(res);
			PQfinish(node_conn);
			log_info(_("InvalidXLogRecPtr detected in a standby\n"));
			terminate(ERR_FAILOVER_FAIL);
		}

		XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff);

		PQclear(res);
		PQfinish(node_conn);
	}

	/* last we get info about this node, and update shared memory */
	sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
	res = PQexec(my_local_conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s.\nReport an invalid value to not be "
				  " considered as new primary and exit.\n"),
				PQerrorMessage(my_local_conn));
		PQclear(res);
		sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
		update_shared_memory(last_wal_standby_applied);
		terminate(ERR_DB_QUERY);
	}

	/* write last location in shared memory */
	update_shared_memory(PQgetvalue(res, 0, 0));
	PQclear(res);

	for (i = 0; i < total_nodes; i++)
	{
		while (!nodes[i].is_ready)
		{
			/*
			 * the witness will always be masked as ready if it's still not
			 * marked that way and avoid a useless query
			 */
			if (nodes[i].is_witness)
			{
				if (!nodes[i].is_ready)
				{
					nodes[i].is_ready = true;
					ready_nodes++;
				}
				break;
			}

			/* if the node is not visible, skip it */
			if (!nodes[i].is_visible)
				break;

			/* if the node is ready there is nothing to check, skip it too */
			if (nodes[i].is_ready)
				break;

			node_conn = establish_db_connection(nodes[i].conninfo_str, false);

			/*
			 * XXX This shouldn't happen, if this happens it means this is a
			 * major problem maybe network outages? anyway, is better for a
			 * human to react
			 */
			if (PQstatus(node_conn) != CONNECTION_OK)
			{
				/* XXX */
				log_info(_("At this point, it could be some race conditions "
						"that are acceptable, assume the node is restarting "
						   "and starting failover procedure\n"));
				break;
			}

			uxlogid = 0;
			uxrecoff = 0;

			sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()",
							  repmgr_schema);
			res = PQexec(node_conn, sqlquery);
			if (PQresultStatus(res) != PGRES_TUPLES_OK)
			{
				log_err(_("PQexec failed: %s.\nReport an invalid value to not"
						  "be considered as new primary and exit.\n"),
						PQerrorMessage(node_conn));
				PQclear(res);
				PQfinish(node_conn);
				terminate(ERR_DB_QUERY);
			}

			if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
			{
				log_info(_("could not parse transaction log location \"%s\"\n"),
						 PQgetvalue(res, 0, 0));

				/* we can't do anything but fail at this point... */
				if (*PQgetvalue(res, 0, 0) == '\0')
				{
					log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n");
					exit(ERR_BAD_CONFIG);
				}
			}


			PQclear(res);
			PQfinish(node_conn);
			/* If position is 0/0, keep checking */
			if (uxlogid == 0 && uxrecoff == 0)
				continue;

			XLAssignValue(xlog_recptr, uxlogid, uxrecoff);

			if (XLByteLT(nodes[i].xlog_location, xlog_recptr))
			{
				XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff);
			}

			log_debug("Last XLog position of node %d: log id=%u (%X), offset=%u (%X)\n",
					  nodes[i].node_id, uxlogid, uxlogid,
					  uxrecoff, uxrecoff);

			ready_nodes++;
			nodes[i].is_ready = true;
		}
	}

	/* Close the connection to this server */
	PQfinish(my_local_conn);
	my_local_conn = NULL;

	/*
	 * determine which one is the best candidate to promote to primary
	 */
	for (i = 0; i < total_nodes; i++)
	{
		/* witness is never a good candidate */
		if (nodes[i].is_witness)
			continue;

		if (!nodes[i].is_ready || !nodes[i].is_visible)
			continue;

		if (!find_best)
		{
			/*
			 * start with the first ready node, and then move on to the next
			 * one
			 */
			best_candidate.node_id = nodes[i].node_id;
			XLAssign(best_candidate.xlog_location, nodes[i].xlog_location);
			best_candidate.is_ready = nodes[i].is_ready;
			best_candidate.is_witness = nodes[i].is_witness;
			find_best = true;
		}

		/* we use the macros provided by xlogdefs.h to compare XLogRecPtr */

		/*
		 * Nodes are retrieved ordered by priority, so if the current best
		 * candidate is lower than the next node's wal location then assign
		 * next node as the new best candidate.
		 */
		if (XLByteLT(best_candidate.xlog_location, nodes[i].xlog_location))
		{
			best_candidate.node_id = nodes[i].node_id;
			XLAssign(best_candidate.xlog_location, nodes[i].xlog_location);
			best_candidate.is_ready = nodes[i].is_ready;
			best_candidate.is_witness = nodes[i].is_witness;
		}
	}

	/* once we know who is the best candidate, promote it */
	if (find_best && (best_candidate.node_id == local_options.node))
	{
		if (best_candidate.is_witness)
		{
			log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"),
					progname);
			terminate(ERR_FAILOVER_FAIL);
		}

		/* wait */
		sleep(5);

		if (verbose)
			log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"),
					 progname);
		log_debug(_("promote command is: \"%s\"\n"),
				  local_options.promote_command);

		if (log_type == REPMGR_STDERR && *local_options.logfile)
		{
			fflush(stderr);
		}

		r = system(local_options.promote_command);
		if (r != 0)
		{
			log_err(_("%s: promote command failed. You could check and try it manually.\n"),
					progname);
			terminate(ERR_BAD_CONFIG);
		}
	}
	else if (find_best)
	{
		/* wait */
		sleep(10);

		if (verbose)
			log_info(_("%s: Node %d is the best candidate to be the new primary, we should follow it...\n"),
					 progname, best_candidate.node_id);
		log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);

		/*
		 * New Primary need some time to be promoted. The follow command
		 * should take care of that.
		 */
		if (log_type == REPMGR_STDERR && *local_options.logfile)
		{
			fflush(stderr);
		}

		r = system(local_options.follow_command);
		if (r != 0)
		{
			log_err(_("%s: follow command failed. You could check and try it manually.\n"),
					progname);
			terminate(ERR_BAD_CONFIG);
		}
	}
	else
	{
		log_err(_("%s: Did not find candidates. You should check and try manually.\n"),
				progname);
		terminate(ERR_FAILOVER_FAIL);
	}

	/* to force it to re-calculate mode and master node */
	failover_done = true;

	/* and reconnect to the local database */
	my_local_conn = establish_db_connection(local_options.conninfo, true);
}
Пример #7
0
int
main(int argc, char **argv)
{
	static struct option long_options[] =
	{
		{"config-file", required_argument, NULL, 'f'},
		{"verbose", no_argument, NULL, 'v'},
		{"monitoring-history", no_argument, NULL, 'm'},
		{"daemonize", no_argument, NULL, 'd'},
		{"pid-file", required_argument, NULL, 'p'},
		{NULL, 0, NULL, 0}
	};

	int			optindex;
	int			c,
				ret;
	bool		daemonize = false;
	FILE	   *fd;

	char		standby_version[MAXVERSIONSTR],
			   *ret_ver;

	progname = get_progname(argv[0]);

	if (argc > 1)
	{
		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
		{
			help(progname);
			exit(SUCCESS);
		}
		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
		{
			printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION);
			exit(SUCCESS);
		}
	}

	while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1)
	{
		switch (c)
		{
			case 'f':
				config_file = optarg;
				break;
			case 'v':
				verbose = true;
				break;
			case 'm':
				monitoring_history = true;
				break;
			case 'd':
				daemonize = true;
				break;
			case 'p':
				pid_file = optarg;
				break;
			default:
				usage();
				exit(ERR_BAD_CONFIG);
		}
	}

	if (daemonize)
	{
		do_daemonize();
	}

	if (pid_file)
	{
		check_and_create_pid_file(pid_file);
	}

#ifndef WIN32
	setup_event_handlers();
#endif

	/*
	 * Read the configuration file: repmgr.conf
	 */
	parse_config(config_file, &local_options);
	if (local_options.node == -1)
	{
		log_err(_("Node information is missing. "
				  "Check the configuration file, or provide one if you have not done so.\n"));
		terminate(ERR_BAD_CONFIG);
	}

	fd = freopen("/dev/null", "r", stdin);
	if (fd == NULL)
	{
		fprintf(stderr, "error reopening stdin to '/dev/null': %s",
				strerror(errno));
	}

	fd = freopen("/dev/null", "w", stdout);
	if (fd == NULL)
	{
		fprintf(stderr, "error reopening stdout to '/dev/null': %s",
				strerror(errno));
	}

	logger_init(&local_options, progname, local_options.loglevel,
				local_options.logfacility);
	if (verbose)
		logger_min_verbose(LOG_INFO);

	if (log_type == REPMGR_SYSLOG)
	{
		fd = freopen("/dev/null", "w", stderr);

		if (fd == NULL)
		{
			fprintf(stderr, "error reopening stderr to '/dev/null': %s",
					strerror(errno));
		}
	}

	xsnprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX,
			 local_options.cluster_name);

	log_info(_("%s Connecting to database '%s'\n"), progname,
			 local_options.conninfo);
	my_local_conn = establish_db_connection(local_options.conninfo, true);

	/* should be v9 or better */
	log_info(_("%s Connected to database, checking its state\n"), progname);
	ret_ver = pg_version(my_local_conn, standby_version);
	if (ret_ver == NULL || strcmp(standby_version, "") == 0)
	{
		if (ret_ver != NULL)
			log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"),
					progname);
		terminate(ERR_BAD_CONFIG);
	}


	/*
	 * MAIN LOOP This loops cicles once per failover and at startup
	 * Requisites: - my_local_conn needs to be already setted with an active
	 * connection - no master connection
	 */
	do
	{
		/*
		 * Set my server mode, establish a connection to primary and start
		 * monitor
		 */
		ret = is_witness(my_local_conn, repmgr_schema,
						 local_options.cluster_name, local_options.node);

		if (ret == 1)
			my_local_mode = WITNESS_MODE;
		else if (ret == 0)
		{
			ret = is_standby(my_local_conn);

			if (ret == 1)
				my_local_mode = STANDBY_MODE;
			else if (ret == 0)	/* is the master */
				my_local_mode = PRIMARY_MODE;
		}

		/*
		 * XXX we did this before changing is_standby() to return int; we
		 * should not exit at this point, but for now we do until we have a
		 * better strategy
		 */
		if (ret == -1)
			terminate(1);

		switch (my_local_mode)
		{
			case PRIMARY_MODE:
				primary_options.node = local_options.node;
				strncpy(primary_options.conninfo, local_options.conninfo,
						MAXLEN);
				primary_conn = my_local_conn;

				check_cluster_configuration(my_local_conn);
				check_node_configuration();

				if (reload_config(config_file, &local_options))
				{
					PQfinish(my_local_conn);
					my_local_conn = establish_db_connection(local_options.conninfo, true);
					primary_conn = my_local_conn;
					update_registration();
				}

				log_info(_("%s Starting continuous primary connection check\n"),
						 progname);

				/*
				 * Check that primary is still alive, and standbies are
				 * sending info
				 */

				/*
				 * Every local_options.monitor_interval_secs seconds, do
				 * master checks XXX Check that standbies are sending info
				 */
				do
				{
					if (check_connection(primary_conn, "master"))
					{
						/*
						 * CheckActiveStandbiesConnections();
						 * CheckInactiveStandbies();
						 */
						sleep(local_options.monitor_interval_secs);
					}
					else
					{
						/*
						 * XXX May we do something more verbose ?
						 */
						terminate(1);
					}

					if (got_SIGHUP)
					{
						/*
						 * if we can reload, then could need to change
						 * my_local_conn
						 */
						if (reload_config(config_file, &local_options))
						{
							PQfinish(my_local_conn);
							my_local_conn = establish_db_connection(local_options.conninfo, true);
							primary_conn = my_local_conn;

							if (*local_options.logfile)
							{
								FILE	   *fd;

								fd = freopen(local_options.logfile, "a", stderr);
								if (fd == NULL)
								{
									fprintf(stderr, "error reopening stderr to '%s': %s",
									 local_options.logfile, strerror(errno));
								}

							}

							update_registration();
						}
						got_SIGHUP = false;
					}
				} while (!failover_done);
				break;

			case WITNESS_MODE:
			case STANDBY_MODE:
				/* I need the id of the primary as well as a connection to it */
				log_info(_("%s Connecting to primary for cluster '%s'\n"),
						 progname, local_options.cluster_name);
				primary_conn = get_master_connection(my_local_conn, repmgr_schema,
												  local_options.cluster_name,
												&primary_options.node, NULL);
				if (primary_conn == NULL)
				{
					terminate(ERR_BAD_CONFIG);
				}

				check_cluster_configuration(my_local_conn);
				check_node_configuration();

				if (reload_config(config_file, &local_options))
				{
					PQfinish(my_local_conn);
					my_local_conn = establish_db_connection(local_options.conninfo, true);
					update_registration();
				}

				/*
				 * Every local_options.monitor_interval_secs seconds, do
				 * checks
				 */
				if (my_local_mode == WITNESS_MODE)
				{
					log_info(_("%s Starting continuous witness node monitoring\n"),
							 progname);
				}
				else if (my_local_mode == STANDBY_MODE)
				{
					log_info(_("%s Starting continuous standby node monitoring\n"),
							 progname);
				}

				do
				{
					if (my_local_mode == WITNESS_MODE)
						witness_monitor();
					else if (my_local_mode == STANDBY_MODE)
						standby_monitor();
					sleep(local_options.monitor_interval_secs);

					if (got_SIGHUP)
					{
						/*
						 * if we can reload, then could need to change
						 * my_local_conn
						 */
						if (reload_config(config_file, &local_options))
						{
							PQfinish(my_local_conn);
							my_local_conn = establish_db_connection(local_options.conninfo, true);
							update_registration();
						}
						got_SIGHUP = false;
					}
				} while (!failover_done);
				break;
			default:
				log_err(_("%s: Unrecognized mode for node %d\n"), progname,
						local_options.node);
		}

		failover_done = false;

	} while (true);

	/* close the connection to the database and cleanup */
	close_connections();

	/* Shuts down logging system */
	logger_shutdown();

	return 0;
}
Пример #8
0
/*
 * reload_config()
 *
 * This is only called by repmgrd after receiving a SIGHUP or when a monitoring
 * loop is started up; it therefore only needs to reload options required
 * by repmgrd, which are as follows:
 *
 * changeable options:
 * - failover
 * - follow_command
 * - logfacility
 * - logfile
 * - loglevel
 * - master_response_timeout
 * - monitor_interval_secs
 * - priority
 * - promote_command
 * - reconnect_attempts
 * - reconnect_interval
 * - retry_promote_interval_secs
 * - witness_repl_nodes_sync_interval_secs
 *
 * non-changeable options:
 * - cluster_name
 * - conninfo
 * - node
 * - node_name
 *
 * extract with something like:
 *	 grep local_options\\. repmgrd.c | perl -n -e '/local_options\.([\w_]+)/ && print qq|$1\n|;' | sort | uniq

 */
bool
reload_config(t_configuration_options *orig_options)
{
	PGconn	   *conn;
	t_configuration_options new_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
	bool	  config_changed = false;
	bool	  log_config_changed = false;

	static ItemList config_errors = { NULL, NULL };

	/*
	 * Re-read the configuration file: repmgr.conf
	 */
	log_info(_("reloading configuration file\n"));

	_parse_config(&new_options, &config_errors);

	if (config_errors.head != NULL)
	{
		/* XXX dump errors to log */
		log_warning(_("unable to parse new configuration, retaining current configuration\n"));
		return false;
	}

	/* The following options cannot be changed */
	if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
	{
		log_warning(_("cluster_name cannot be changed, retaining current configuration\n"));
		return false;
	}

	if (new_options.node != orig_options->node)
	{
		log_warning(_("node ID cannot be changed, retaining current configuration\n"));
		return false;
	}

	if (strcmp(new_options.node_name, orig_options->node_name) != 0)
	{
		log_warning(_("node_name cannot be changed, keeping current configuration\n"));
		return false;
	}

	if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
	{
		/* Test conninfo string works*/
		conn = establish_db_connection(new_options.conninfo, false);
		if (!conn || (PQstatus(conn) != CONNECTION_OK))
		{
			log_warning(_("'conninfo' string is not valid, retaining current configuration\n"));
			return false;
		}
		PQfinish(conn);
	}

	/*
	 * No configuration problems detected - copy any changed values
	 *
	 * NB: keep these in the same order as in config.h to make it easier
	 * to manage them
	 */

	/* failover */
	if (orig_options->failover != new_options.failover)
	{
		orig_options->failover = new_options.failover;
		config_changed = true;
	}

	/* follow_command */
	if (strcmp(orig_options->follow_command, new_options.follow_command) != 0)
	{
		strcpy(orig_options->follow_command, new_options.follow_command);
		config_changed = true;
	}

	/* master_response_timeout */
	if (orig_options->master_response_timeout != new_options.master_response_timeout)
	{
		orig_options->master_response_timeout = new_options.master_response_timeout;
		config_changed = true;
	}

	/* monitor_interval_secs */
	if (orig_options->monitor_interval_secs != new_options.monitor_interval_secs)
	{
		orig_options->monitor_interval_secs = new_options.monitor_interval_secs;
		config_changed = true;
	}

	/* priority */
	if (orig_options->priority != new_options.priority)
	{
		orig_options->priority = new_options.priority;
		config_changed = true;
	}

	/* promote_command */
	if (strcmp(orig_options->promote_command, new_options.promote_command) != 0)
	{
		strcpy(orig_options->promote_command, new_options.promote_command);
		config_changed = true;
	}

	/* reconnect_attempts */
	if (orig_options->reconnect_attempts != new_options.reconnect_attempts)
	{
		orig_options->reconnect_attempts = new_options.reconnect_attempts;
		config_changed = true;
	}

	/* reconnect_interval */
	if (orig_options->reconnect_interval != new_options.reconnect_interval)
	{
		orig_options->reconnect_interval = new_options.reconnect_interval;
		config_changed = true;
	}

	/* retry_promote_interval_secs */
	if (orig_options->retry_promote_interval_secs != new_options.retry_promote_interval_secs)
	{
		orig_options->retry_promote_interval_secs = new_options.retry_promote_interval_secs;
		config_changed = true;
	}


	/* witness_repl_nodes_sync_interval_secs */
	if (orig_options->witness_repl_nodes_sync_interval_secs != new_options.witness_repl_nodes_sync_interval_secs)
	{
		orig_options->witness_repl_nodes_sync_interval_secs = new_options.witness_repl_nodes_sync_interval_secs;
		config_changed = true;
	}

	/*
	 * Handle changes to logging configuration
	 */
	if (strcmp(orig_options->logfacility, new_options.logfacility) != 0)
	{
		strcpy(orig_options->logfacility, new_options.logfacility);
		log_config_changed = true;
	}

	if (strcmp(orig_options->logfile, new_options.logfile) != 0)
	{
		strcpy(orig_options->logfile, new_options.logfile);
		log_config_changed = true;
	}


	if (strcmp(orig_options->loglevel, new_options.loglevel) != 0)
	{
		strcpy(orig_options->loglevel, new_options.loglevel);
		log_config_changed = true;
	}

	if (log_config_changed == true)
	{
		log_notice(_("restarting logging with changed parameters\n"));
		logger_shutdown();
		logger_init(orig_options, progname());
	}

	if (config_changed == true)
	{
		log_notice(_("configuration file reloaded with changed parameters\n"));
	}
	/*
	 * if logging configuration changed, don't say the configuration didn't
	 * change, as it clearly has.
	 */
	else if (log_config_changed == false)
	{
		log_info(_("configuration has not changed\n"));
	}

	return config_changed;
}
Пример #9
0
bool
reload_config(char *config_file, t_configuration_options * orig_options)
{
	PGconn	   *conn;

	t_configuration_options new_options;

	/*
	 * Re-read the configuration file: repmgr.conf
	 */
	log_info(_("Reloading configuration file and updating repmgr tables\n"));
	parse_config(config_file, &new_options);
	if (new_options.node == -1)
	{
		log_warning(_("Cannot load new configuration, will keep current one.\n"));
		return false;
	}

	if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0)
	{
		log_warning(_("Cannot change cluster name, will keep current configuration.\n"));
		return false;
	}

	if (new_options.node != orig_options->node)
	{
		log_warning(_("Cannot change node number, will keep current configuration.\n"));
		return false;
	}

	if (strcmp(new_options.node_name, orig_options->node_name) != 0)
	{
		log_warning(_("Cannot change standby name, will keep current configuration.\n"));
		return false;
	}

	if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER)
	{
		log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n"));
		return false;
	}

	if (new_options.master_response_timeout <= 0)
	{
		log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n"));
		return false;
	}

	if (new_options.reconnect_attempts < 0)
	{
		log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n"));
		return false;
	}

	if (new_options.reconnect_intvl < 0)
	{
		log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n"));
		return false;
	}

	/* Test conninfo string */
	conn = establish_db_connection(new_options.conninfo, false);
	if (!conn || (PQstatus(conn) != CONNECTION_OK))
	{
		log_warning(_("conninfo string is not valid, will keep current configuration.\n"));
		return false;
	}
	PQfinish(conn);

	/* Configuration seems ok, will load new values */
	strcpy(orig_options->cluster_name, new_options.cluster_name);
	orig_options->node = new_options.node;
	strcpy(orig_options->conninfo, new_options.conninfo);
	orig_options->failover = new_options.failover;
	orig_options->priority = new_options.priority;
	strcpy(orig_options->node_name, new_options.node_name);
	strcpy(orig_options->promote_command, new_options.promote_command);
	strcpy(orig_options->follow_command, new_options.follow_command);
	strcpy(orig_options->rsync_options, new_options.rsync_options);
	strcpy(orig_options->ssh_options, new_options.ssh_options);
	orig_options->master_response_timeout = new_options.master_response_timeout;
	orig_options->reconnect_attempts = new_options.reconnect_attempts;
	orig_options->reconnect_intvl = new_options.reconnect_intvl;

	/*
	 * XXX These ones can change with a simple SIGHUP?
	 *
	 * strcpy (orig_options->loglevel, new_options.loglevel); strcpy
	 * (orig_options->logfacility, new_options.logfacility);
	 *
	 * logger_shutdown(); XXX do we have progname here ? logger_init(progname,
	 * orig_options.loglevel, orig_options.logfacility);
	 */

	return true;
}