示例#1
0
文件: repmgrd.c 项目: klando/repmgr
static void
checkNodeConfiguration(char *conninfo)
{
	PGresult   *res;

	/*
	 * Check if we have my node information in repl_nodes
	 */
	log_info(_("%s Checking node %d in cluster '%s'\n"),
	         progname, local_options.node, local_options.cluster_name);
	sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes "
	                  " WHERE id = %d AND cluster = '%s' ",
	                  repmgr_schema, local_options.node,
	                  local_options.cluster_name);

	res = PQexec(myLocalConn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn));
		PQclear(res);
		CloseConnections();
		exit(ERR_BAD_CONFIG);
	}

	/*
	 * If there isn't any results then we have not configured this node yet
	 * in repmgr, if that is the case we will insert the node to the cluster,
	 * except if it is a witness
	 */
	if (PQntuples(res) == 0)
	{
		PQclear(res);

		if (myLocalMode == WITNESS_MODE)
		{
			log_err(_("The witness is not configured\n"));
			CloseConnections();
			exit(ERR_BAD_CONFIG);
		}

		/* Adding the node */
		log_info(_("%s Adding node %d to cluster '%s'\n"),
		         progname, local_options.node, local_options.cluster_name);
		sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes "
		                  "VALUES (%d, '%s', '%s', 'f')",
		                  repmgr_schema, local_options.node,
		                  local_options.cluster_name,
		                  local_options.conninfo);

		if (!PQexec(primaryConn, sqlquery))
		{
			log_err(_("Cannot insert node details, %s\n"),
			        PQerrorMessage(primaryConn));
			CloseConnections();
			exit(ERR_BAD_CONFIG);
		}
	}
	PQclear(res);
}
示例#2
0
文件: dbutils.c 项目: Bazoozoo/repmgr
/**
 * Just like guc_set except with an extra parameter containing the name of
 * the pg datatype so that the comparison can be done properly.
 */
int
guc_set_typed(PGconn *conn, const char *parameter, const char *op,
			  const char *value, const char *datatype)
{
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];
	int			retval = 1;

	sqlquery_snprintf(sqlquery,
					  "SELECT true FROM pg_settings "
					  " WHERE name = '%s' AND setting::%s %s '%s'::%s",
					  parameter, datatype, op, value, datatype);

	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("GUC setting check PQexec failed: %s"),
				PQerrorMessage(conn));
		retval = -1;
	}
	else if (PQntuples(res) == 0)
	{
		retval = 0;
	}

	PQclear(res);

	return retval;
}
示例#3
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
check_cluster_schema(PGconn *conn)
{
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];

	sqlquery_snprintf(sqlquery,
					  "SELECT 1 FROM pg_namespace WHERE nspname = '%s'",
					  get_repmgr_schema());

	log_debug(_("check_cluster_schema(): %s\n"), sqlquery);
	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("check_cluster_schema(): unable to check cluster schema: %s\n"),
				PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	if (PQntuples(res) == 0)
	{
		/* schema doesn't exist */
		log_debug(_("check_cluster_schema(): schema '%s' doesn't exist\n"), get_repmgr_schema());
		PQclear(res);

		return false;
	}

	PQclear(res);

	return true;
}
示例#4
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id)
{
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];

	log_debug(_("update_node_record_set_upstream(): Updating node %i's upstream node to %i\n"), this_node_id, new_upstream_node_id);

	sqlquery_snprintf(sqlquery,
					  "  UPDATE %s.repl_nodes "
					  "     SET upstream_node_id = %i "
					  "   WHERE cluster = '%s' "
					  "     AND id = %i ",
					  get_repmgr_schema_quoted(conn),
					  new_upstream_node_id,
					  cluster_name,
					  this_node_id);
	res = PQexec(conn, sqlquery);

	if (PQresultStatus(res) != PGRES_COMMAND_OK)
	{
		log_err(_("Unable to set new upstream node id: %s\n"),
				PQerrorMessage(conn));
		PQclear(res);

		return false;
	}

	PQclear(res);

	return true;
}
示例#5
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;

	sqlquery_snprintf(sqlquery,
					  "SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_start_backup('repmgr_standby_clone_%ld', %s))",
					  time(NULL),
					  fast_checkpoint ? "TRUE" : "FALSE");

	log_debug(_("standby clone: %s\n"), sqlquery);

	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to start backup: %s\n"), PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	if (first_wal_segment != NULL)
	{
		char	   *first_wal_seg_pq = PQgetvalue(res, 0, 0);
		size_t		buf_sz = strlen(first_wal_seg_pq);

		first_wal_segment = pg_malloc0(buf_sz + 1);
		xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq);
	}

	PQclear(res);

	return true;
}
示例#6
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
delete_node_record(PGconn *conn, int node, char *action)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;

	sqlquery_snprintf(sqlquery,
					  "DELETE FROM %s.repl_nodes "
					  " WHERE id = %d",
					  get_repmgr_schema_quoted(conn),
					  node);
	if (action != NULL)
	{
		log_debug(_("%s: %s\n"), action, sqlquery);
	}

	res = PQexec(conn, sqlquery);
	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
	{
		log_warning(_("Unable to delete node record: %s\n"),
					PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	PQclear(res);
	return true;
}
示例#7
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
stop_backup(PGconn *conn, char *last_wal_segment)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;

	sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_xlogfile_name(pg_catalog.pg_stop_backup())");

	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to stop backup: %s\n"), PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	if (last_wal_segment != NULL)
	{
		char	   *last_wal_seg_pq = PQgetvalue(res, 0, 0);
		size_t		buf_sz = strlen(last_wal_seg_pq);

		last_wal_segment = pg_malloc0(buf_sz + 1);
		xsnprintf(last_wal_segment, buf_sz + 1, "%s", last_wal_seg_pq);
	}

	PQclear(res);

	return true;
}
示例#8
0
static void
checkClusterConfiguration(PGconn *conn, PGconn *primary)
{
	PGresult   *res;

	log_info(_("%s Checking cluster configuration with schema '%s'\n"),
	         progname, repmgr_schema);
	sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class "
	                  " WHERE oid = '%s.repl_nodes'::regclass",
	                  repmgr_schema);
	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err("PQexec failed: %s\n", PQerrorMessage(conn));
		PQclear(res);
		CloseConnections();
		exit(ERR_DB_QUERY);
	}

	/*
	 * If there isn't any results then we have not configured a primary node
	 * yet in repmgr or the connection string is pointing to the wrong
	 * database.
	 *
	 * XXX if we are the primary, should we try to create the tables needed?
	 */
	if (PQntuples(res) == 0)
	{
		log_err("The replication cluster is not configured\n");
		PQclear(res);
		CloseConnections();
		exit(ERR_BAD_CONFIG);
	}
	PQclear(res);
}
示例#9
0
bool
guc_setted(PGconn *conn, const char *parameter, const char *op,
           const char *value)
{
	PGresult	*res;
	char		sqlquery[QUERY_STR_LEN];

	sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
	                  " WHERE name = '%s' AND setting %s '%s'",
	                  parameter, op, value);

	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("GUC setting check PQexec failed: %s"),
		        PQerrorMessage(conn));
		PQclear(res);
		PQfinish(conn);
		exit(ERR_DB_QUERY);
	}
	if (PQntuples(res) == 0)
	{
		PQclear(res);
		return false;
	}
	PQclear(res);

	return true;
}
示例#10
0
const char *
get_cluster_size(PGconn *conn)
{
	PGresult	*res;
	const char	*size;
	char		 sqlquery[QUERY_STR_LEN];

	sqlquery_snprintf(
	    sqlquery,
	    "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) "
	    "	 FROM pg_database ");

	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("Get cluster size PQexec failed: %s"),
		        PQerrorMessage(conn));
		PQclear(res);
		PQfinish(conn);
		exit(ERR_DB_QUERY);
	}
	size = PQgetvalue(res, 0, 0);
	PQclear(res);
	return size;
}
示例#11
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
get_cluster_size(PGconn *conn, char *size)
{
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];

	sqlquery_snprintf(
					  sqlquery,
				 "SELECT pg_catalog.pg_size_pretty(SUM(pg_catalog.pg_database_size(oid))::bigint) "
					  "	 FROM pg_database ");

	res = PQexec(conn, sqlquery);
	if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("get_cluster_size(): PQexec failed: %s"),
				PQerrorMessage(conn));

		PQclear(res);
		return false;
	}

	strncpy(size, PQgetvalue(res, 0, 0), MAXLEN);

	PQclear(res);
	return true;
}
示例#12
0
文件: dbutils.c 项目: Bazoozoo/repmgr
/* check the PQStatus and try to 'select 1' to confirm good connection */
bool
is_pgup(PGconn *conn, int timeout)
{
	char		sqlquery[QUERY_STR_LEN];

	/* Check the connection status twice in case it changes after reset */
	bool		twice = false;

	/* Check the connection status twice in case it changes after reset */
	for (;;)
	{
		if (PQstatus(conn) != CONNECTION_OK)
		{
			if (twice)
				return false;
			PQreset(conn);		/* reconnect */
			twice = true;
		}
		else
		{
			/*
			 * Send a SELECT 1 just to check if the connection is OK
			 */
			if (!cancel_query(conn, timeout))
				goto failed;
			if (wait_connection_availability(conn, timeout) != 1)
				goto failed;

			sqlquery_snprintf(sqlquery, "SELECT 1");
			if (PQsendQuery(conn, sqlquery) == 0)
			{
				log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"),
							PQerrorMessage(conn));
				goto failed;
			}
			if (wait_connection_availability(conn, timeout) != 1)
				goto failed;

			break;

	failed:

			/*
			 * we need to retry, because we might just have lost the
			 * connection once
			 */
			if (twice)
				return false;
			PQreset(conn);		/* reconnect */
			twice = true;
		}
	}
	return true;
}
示例#13
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
get_pg_setting(PGconn *conn, const char *setting, char *output)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;
	int			i;
	bool        success = true;

	sqlquery_snprintf(sqlquery,
					  "SELECT name, setting "
					  " FROM pg_settings WHERE name = '%s'",
					  setting);

	log_debug(_("get_pg_setting(): %s\n"), sqlquery);

	res = PQexec(conn, sqlquery);

	if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("get_pg_setting() - PQexec failed: %s"),
				PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	for (i = 0; i < PQntuples(res); i++)
	{
		if (strcmp(PQgetvalue(res, i, 0), setting) == 0)
		{
			strncpy(output, PQgetvalue(res, i, 1), MAXLEN);
			success = true;
			break;
		}
		else
		{
			log_err(_("unknown parameter: %s"), PQgetvalue(res, i, 0));
		}
	}

	if (success == true)
	{
		log_debug(_("get_pg_setting(): returned value is '%s'\n"), output);
	}

	PQclear(res);

	return success;
}
示例#14
0
文件: repmgrd.c 项目: klando/repmgr
static void
update_registration(void)
{
	PGresult *res;

	sqlquery_snprintf(sqlquery, "UPDATE %s.repl_nodes "
	                  "   SET conninfo = '%s', "
	                  "       priority = %d "
	                  " WHERE id = %d",
	                  repmgr_schema, local_options.conninfo, local_options.priority, local_options.node);

	res = PQexec(primaryConn, sqlquery);
	if (PQresultStatus(res) != PGRES_COMMAND_OK)
	{
		log_err(_("Cannot update registration: %s\n"), PQerrorMessage(primaryConn));
		CloseConnections();
		exit(ERR_DB_CON);
	}
	PQclear(res);
}
示例#15
0
文件: dbutils.c 项目: Bazoozoo/repmgr
/*
 * Return the id of the active master node, or NODE_NOT_FOUND if no
 * record available.
 *
 * This reports the value stored in the database only and
 * does not verify whether the node is actually available
 */
int
get_master_node_id(PGconn *conn, char *cluster)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;
	int			retval;

	sqlquery_snprintf(sqlquery,
					  "SELECT id              "
					  "  FROM %s.repl_nodes   "
					  " WHERE cluster = '%s'  "
					  "   AND type = 'master' "
					  "   AND active IS TRUE  ",
					  get_repmgr_schema_quoted(conn),
					  cluster);

	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("get_master_node_id(): query failed\n%s\n"),
				PQerrorMessage(conn));
		retval = NODE_NOT_FOUND;
	}
	else if (PQntuples(res) == 0)
	{
		log_warning(_("get_master_node_id(): no active primary found\n"));
		retval = NODE_NOT_FOUND;
	}
	else
	{
		retval = atoi(PQgetvalue(res, 0, 0));
	}
	PQclear(res);

	return retval;
}
示例#16
0
bool
is_witness(PGconn *conn, char *schema, char *cluster, int node_id)
{
	PGresult   *res;
	bool		result = false;
	char		sqlquery[QUERY_STR_LEN];

	sqlquery_snprintf(sqlquery, "SELECT witness from %s.repl_nodes where cluster = '%s' and id = %d",
	                  schema, cluster, node_id);
	res = PQexec(conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("Can't query server mode: %s"), PQerrorMessage(conn));
		PQclear(res);
		PQfinish(conn);
		exit(ERR_DB_QUERY);
	}

	if (PQntuples(res) == 1 && strcmp(PQgetvalue(res, 0, 0), "t") == 0)
		result = true;

	PQclear(res);
	return result;
}
示例#17
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
set_config_bool(PGconn *conn, const char *config_param, bool state)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;

	sqlquery_snprintf(sqlquery,
					  "SET %s TO %s",
					  config_param,
					  state ? "TRUE" : "FALSE");

	res = PQexec(conn, sqlquery);

	if (PQresultStatus(res) != PGRES_COMMAND_OK)
	{
		log_err("unable to set '%s': %s\n", config_param, PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	PQclear(res);

	return true;
}
示例#18
0
文件: repmgrd.c 项目: klando/repmgr
/*
 * Insert monitor info, this is basically the time and xlog replayed,
 * applied on standby and current xlog location in primary.
 * Also do the math to see how far are we in bytes for being uptodate
 */
static void
StandbyMonitor(void)
{
	PGresult *res;
	char monitor_standby_timestamp[MAXLEN];
	char last_wal_primary_location[MAXLEN];
	char last_wal_standby_received[MAXLEN];
	char last_wal_standby_applied[MAXLEN];

	unsigned long long int lsn_primary;
	unsigned long long int lsn_standby_received;
	unsigned long long int lsn_standby_applied;

	int	connection_retries;

	/*
	 * Check if the master is still available, if after 5 minutes of retries
	 * we cannot reconnect, try to get a new master.
	 */
	CheckPrimaryConnection(); // this take up to NUM_RETRY * SLEEP_RETRY seconds

	if (PQstatus(primaryConn) != CONNECTION_OK)
	{
		if (local_options.failover == MANUAL_FAILOVER)
		{
			log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n"));
			for (connection_retries = 0; connection_retries < 6; connection_retries++)
			{
				primaryConn = getMasterConnection(myLocalConn, repmgr_schema, local_options.node,
				                                  local_options.cluster_name, &primary_options.node, NULL);
				if (PQstatus(primaryConn) == CONNECTION_OK)
				{
					/* Connected, we can continue the process so break the loop */
					log_err(_("Connected to node %d, continue monitoring.\n"), primary_options.node);
					break;
				}
				else
				{
					log_err(_("We haven't found a new master, waiting before retry...\n"));
					/* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */
					sleep(300);
				}
			}

			if (PQstatus(primaryConn) != CONNECTION_OK)
			{
				log_err(_("We couldn't reconnect for long enough, exiting...\n"));
				exit(ERR_DB_CON);
			}
		}
		else if (local_options.failover == AUTOMATIC_FAILOVER)
		{
			/*
			 * When we returns from this function we will have a new primary and
			 * a new primaryConn
			 */
			do_failover();
		}
	}

	/* Check if we still are a standby, we could have been promoted */
	if (!is_standby(myLocalConn))
	{
		log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
		CloseConnections();
		exit(ERR_PROMOTED);
	}

	/*
	 * first check if there is a command being executed,
	 * and if that is the case, cancel the query so i can
	 * insert the current record
	 */
	if (PQisBusy(primaryConn) == 1)
		CancelQuery();

	/* Get local xlog info */
	sqlquery_snprintf(
	    sqlquery,
	    "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
	    "pg_last_xlog_replay_location()");

	res = PQexec(myLocalConn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn));
		PQclear(res);
		/* if there is any error just let it be and retry in next loop */
		return;
	}

	strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
	strncpy(last_wal_standby_received , PQgetvalue(res, 0, 1), MAXLEN);
	strncpy(last_wal_standby_applied , PQgetvalue(res, 0, 2), MAXLEN);
	PQclear(res);

	/* Get primary xlog info */
	sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() ");

	res = PQexec(primaryConn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s\n"), PQerrorMessage(primaryConn));
		PQclear(res);
		return;
	}

	strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN);
	PQclear(res);

	/* Calculate the lag */
	lsn_primary = walLocationToBytes(last_wal_primary_location);
	lsn_standby_received = walLocationToBytes(last_wal_standby_received);
	lsn_standby_applied = walLocationToBytes(last_wal_standby_applied);

	/*
	 * Build the SQL to execute on primary
	 */
	sqlquery_snprintf(sqlquery,
	                  "INSERT INTO %s.repl_monitor "
	                  "VALUES(%d, %d, '%s'::timestamp with time zone, "
	                  " '%s', '%s', "
	                  " %lld, %lld)", repmgr_schema,
	                  primary_options.node, local_options.node, monitor_standby_timestamp,
	                  last_wal_primary_location,
	                  last_wal_standby_received,
	                  (lsn_primary - lsn_standby_received),
	                  (lsn_standby_received - lsn_standby_applied));

	/*
	 * Execute the query asynchronously, but don't check for a result. We
	 * will check the result next time we pause for a monitor step.
	 */
	if (PQsendQuery(primaryConn, sqlquery) == 0)
		log_warning(_("Query could not be sent to primary. %s\n"),
		            PQerrorMessage(primaryConn));
}
示例#19
0
/*
 * get a connection to master by reading repl_nodes, creating a connection
 * to each node (one at a time) and finding if it is a master or a standby
 *
 * NB: If master_conninfo_out may be NULL.  If it is non-null, it is assumed to
 * point to allocated memory of MAXCONNINFO in length, and the master server
 * connection string is placed there.
 */
PGconn *
getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
                    int *master_id, char *master_conninfo_out)
{
	PGconn		*master_conn	 = NULL;
	PGresult	*res1;
	PGresult	*res2;
	char		 sqlquery[QUERY_STR_LEN];
	char		 master_conninfo_stack[MAXCONNINFO];
	char		*master_conninfo = &*master_conninfo_stack;
	char		 schema_quoted[MAXLEN];

	int		 i;

	/*
	 * If the caller wanted to get a copy of the connection info string, sub
	 * out the local stack pointer for the pointer passed by the caller.
	 */
	if (master_conninfo_out != NULL)
		master_conninfo = master_conninfo_out;

	/*
	 * XXX: This is copied in at least two other procedures
	 *
	 * Assemble the unquoted schema name
	 */
	{
		char *identifier = PQescapeIdentifier(standby_conn, schema,
		                                      strlen(schema));

		maxlen_snprintf(schema_quoted, "%s", identifier);
		PQfreemem(identifier);
	}

	/* find all nodes belonging to this cluster */
	log_info(_("finding node list for cluster '%s'\n"),
	         cluster);

	sqlquery_snprintf(sqlquery, "SELECT id, conninfo FROM %s.repl_nodes "
	                  " WHERE cluster = '%s' and not witness",
	                  schema_quoted, cluster);

	res1 = PQexec(standby_conn, sqlquery);
	if (PQresultStatus(res1) != PGRES_TUPLES_OK)
	{
		log_err(_("Can't get nodes info: %s\n"),
		        PQerrorMessage(standby_conn));
		PQclear(res1);
		PQfinish(standby_conn);
		exit(ERR_DB_QUERY);
	}

	for (i = 0; i < PQntuples(res1); i++)
	{
		/* initialize with the values of the current node being processed */
		*master_id = atoi(PQgetvalue(res1, i, 0));
		strncpy(master_conninfo, PQgetvalue(res1, i, 1), MAXCONNINFO);
		log_info(_("checking role of cluster node '%s'\n"),
		         master_conninfo);
		master_conn = establishDBConnection(master_conninfo, false);

		if (PQstatus(master_conn) != CONNECTION_OK)
			continue;

		/*
		 * Can't use the is_standby() function here because on error that
		 * function closes the connection passed and exits.  This still
		 * needs to close master_conn first.
		 */
		res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");

		if (PQresultStatus(res2) != PGRES_TUPLES_OK)
		{
			log_err(_("Can't get recovery state from this node: %s\n"),
			        PQerrorMessage(master_conn));
			PQclear(res2);
			PQfinish(master_conn);
			continue;
		}

		/* if false, this is the master */
		if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
		{
			PQclear(res2);
			PQclear(res1);
			return master_conn;
		}
		else
		{
			/* if it is a standby clear info */
			PQclear(res2);
			PQfinish(master_conn);
			*master_id = -1;
		}
	}

	/* If we finish this loop without finding a master then
	 * we doesn't have the info or the master has failed (or we
	 * reached max_connections or superuser_reserved_connections,
	 * anything else I'm missing?).
	 *
	 * Probably we will need to check the error to know if we need
	 * to start failover procedure or just fix some situation on the
	 * standby.
	 */
	PQclear(res1);
	return NULL;
}
示例#20
0
static void
witness_monitor(void)
{
	char		monitor_witness_timestamp[MAXLEN];
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];

	/*
	 * Check if the master is still available, if after 5 minutes of retries
	 * we cannot reconnect, return false.
	 */
	check_connection(primary_conn, "master");	/* this take up to
												 * local_options.reconnect_atte
												 * mpts *
												 * local_options.reconnect_intv
												 * l seconds */

	if (PQstatus(primary_conn) != CONNECTION_OK)
	{
		/*
		 * If we can't reconnect, just exit... XXX we need to make witness
		 * connect to the new master
		 */
		terminate(0);
	}

	/* Fast path for the case where no history is requested */
	if (!monitoring_history)
		return;

	/*
	 * Cancel any query that is still being executed, so i can insert the
	 * current record
	 */
	if (!cancel_query(primary_conn, local_options.master_response_timeout))
		return;
	if (wait_connection_availability(primary_conn,
								 local_options.master_response_timeout) != 1)
		return;

	/* Get local xlog info */
	sqlquery_snprintf(sqlquery, "SELECT CURRENT_TIMESTAMP ");

	res = PQexec(my_local_conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s\n"), PQerrorMessage(my_local_conn));
		PQclear(res);
		/* if there is any error just let it be and retry in next loop */
		return;
	}

	strcpy(monitor_witness_timestamp, PQgetvalue(res, 0, 0));
	PQclear(res);

	/*
	 * Build the SQL to execute on primary
	 */
	sqlquery_snprintf(sqlquery,
					  "INSERT INTO %s.repl_monitor "
					  "VALUES(%d, %d, '%s'::timestamp with time zone, "
					  " pg_current_xlog_location(), null,  "
					  " 0, 0)",
					  repmgr_schema, primary_options.node, local_options.node,
					  monitor_witness_timestamp);

	/*
	 * Execute the query asynchronously, but don't check for a result. We will
	 * check the result next time we pause for a monitor step.
	 */
	log_debug("witness_monitor: %s\n", sqlquery);
	if (PQsendQuery(primary_conn, sqlquery) == 0)
		log_warning(_("Query could not be sent to primary. %s\n"),
					PQerrorMessage(primary_conn));
}
示例#21
0
/*
 * Insert monitor info, this is basically the time and xlog replayed,
 * applied on standby and current xlog location in primary.
 * Also do the math to see how far are we in bytes for being uptodate
 */
static void
standby_monitor(void)
{
	PGresult   *res;
	char		monitor_standby_timestamp[MAXLEN];
	char		last_wal_primary_location[MAXLEN];
	char		last_wal_standby_received[MAXLEN];
	char		last_wal_standby_applied[MAXLEN];
	char		last_wal_standby_applied_timestamp[MAXLEN];
	char		sqlquery[QUERY_STR_LEN];

	unsigned long long int lsn_primary;
	unsigned long long int lsn_standby_received;
	unsigned long long int lsn_standby_applied;

	int			connection_retries,
				ret;
	bool		did_retry = false;

	/*
	 * Check if the master is still available, if after 5 minutes of retries
	 * we cannot reconnect, try to get a new master.
	 */
	check_connection(primary_conn, "master");	/* this take up to
												 * local_options.reconnect_atte
												 * mpts *
												 * local_options.reconnect_intv
												 * l seconds */

	if (!check_connection(my_local_conn, "standby"))
	{
		log_err("Failed to connect to local node, exiting!\n");
		terminate(1);
	}

	if (PQstatus(primary_conn) != CONNECTION_OK)
	{
		PQfinish(primary_conn);
		primary_conn = NULL;

		if (local_options.failover == MANUAL_FAILOVER)
		{
			log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n"));
			for (connection_retries = 0; connection_retries < 6; connection_retries++)
			{
				primary_conn = get_master_connection(my_local_conn, repmgr_schema,
					local_options.cluster_name, &primary_options.node, NULL);
				if (PQstatus(primary_conn) == CONNECTION_OK)
				{
					/*
					 * Connected, we can continue the process so break the
					 * loop
					 */
					log_err(_("Connected to node %d, continue monitoring.\n"),
							primary_options.node);
					break;
				}
				else
				{
					log_err(_("We haven't found a new master, waiting before retry...\n"));

					/*
					 * wait local_options.retry_promote_interval_secs minutes
					 * before retries, after 6 failures (6 *
					 * local_options.monitor_interval_secs seconds) we stop
					 * trying
					 */
					sleep(local_options.retry_promote_interval_secs);
				}
			}

			if (PQstatus(primary_conn) != CONNECTION_OK)
			{
				log_err(_("We couldn't reconnect for long enough, exiting...\n"));
				terminate(ERR_DB_CON);
			}
		}
		else if (local_options.failover == AUTOMATIC_FAILOVER)
		{
			/*
			 * When we returns from this function we will have a new primary
			 * and a new primary_conn
			 */
			do_failover();
			return;
		}
	}

	/* Check if we still are a standby, we could have been promoted */
	do
	{
		ret = is_standby(my_local_conn);

		switch (ret)
		{
			case 0:
				log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
				terminate(1);
				break;

			case -1:
				log_err(_("Standby node disappeared, trying to reconnect...\n"));
				did_retry = true;

				if (!check_connection(my_local_conn, "standby"))
				{
					terminate(0);
				}

				break;
		}
	} while (ret == -1);

	if (did_retry)
	{
		log_info(_("standby connection got back up again!\n"));
	}

	/* Fast path for the case where no history is requested */
	if (!monitoring_history)
		return;

	/*
	 * Cancel any query that is still being executed, so i can insert the
	 * current record
	 */
	if (!cancel_query(primary_conn, local_options.master_response_timeout))
		return;
	if (wait_connection_availability(primary_conn, local_options.master_response_timeout) != 1)
		return;

	/* Get local xlog info */
	sqlquery_snprintf(
					  sqlquery,
				"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
		  "pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp()");

	res = PQexec(my_local_conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s\n"), PQerrorMessage(my_local_conn));
		PQclear(res);
		/* if there is any error just let it be and retry in next loop */
		return;
	}

	strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
	strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
	strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
	strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
	PQclear(res);

	/* Get primary xlog info */
	sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() ");

	res = PQexec(primary_conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s\n"), PQerrorMessage(primary_conn));
		PQclear(res);
		return;
	}

	strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN);
	PQclear(res);

	/* Calculate the lag */
	lsn_primary = wal_location_to_bytes(last_wal_primary_location);
	lsn_standby_received = wal_location_to_bytes(last_wal_standby_received);
	lsn_standby_applied = wal_location_to_bytes(last_wal_standby_applied);

	/*
	 * Build the SQL to execute on primary
	 */
	sqlquery_snprintf(sqlquery,
					  "INSERT INTO %s.repl_monitor "
					  "VALUES(%d, %d, '%s'::timestamp with time zone, "
					  " '%s'::timestamp with time zone, '%s', '%s', "
					  " %lld, %lld)", repmgr_schema,
		 primary_options.node, local_options.node, monitor_standby_timestamp,
					  last_wal_standby_applied_timestamp,
					  last_wal_primary_location,
					  last_wal_standby_received,
					  (lsn_primary - lsn_standby_received),
					  (lsn_standby_received - lsn_standby_applied));

	/*
	 * Execute the query asynchronously, but don't check for a result. We will
	 * check the result next time we pause for a monitor step.
	 */
	log_debug("standby_monitor: %s\n", sqlquery);
	if (PQsendQuery(primary_conn, sqlquery) == 0)
		log_warning(_("Query could not be sent to primary. %s\n"),
					PQerrorMessage(primary_conn));
}
示例#22
0
static void
do_failover(void)
{
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];

	int			total_nodes = 0;
	int			visible_nodes = 0;
	int			ready_nodes = 0;

	bool		find_best = false;

	int			i;
	int			r;

	uint32		uxlogid;
	uint32		uxrecoff;
	XLogRecPtr	xlog_recptr;

	char		last_wal_standby_applied[MAXLEN];

	PGconn	   *node_conn = NULL;

	/*
	 * will get info about until 50 nodes, which seems to be large enough for
	 * most scenarios
	 */
	t_node_info nodes[50];

	/* initialize to keep compiler quiet */
	t_node_info best_candidate = {-1, "", InvalidXLogRecPtr, false, false, false};

	/* get a list of standby nodes, including myself */
	sprintf(sqlquery, "SELECT id, conninfo, witness "
			"  FROM %s.repl_nodes "
			" WHERE cluster = '%s' "
			" ORDER BY priority, id ",
			repmgr_schema, local_options.cluster_name);

	res = PQexec(my_local_conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(my_local_conn));
		PQclear(res);
		terminate(ERR_DB_QUERY);
	}

	/*
	 * total nodes that are registered
	 */
	total_nodes = PQntuples(res);
	log_debug(_("%s: there are %d nodes registered\n"), progname, total_nodes);

	/*
	 * Build an array with the nodes and indicate which ones are visible and
	 * ready
	 */
	for (i = 0; i < total_nodes; i++)
	{
		nodes[i].node_id = atoi(PQgetvalue(res, i, 0));
		strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXLEN);
		nodes[i].is_witness = (strcmp(PQgetvalue(res, i, 2), "t") == 0) ? true : false;

		/*
		 * Initialize on false so if we can't reach this node we know that
		 * later
		 */
		nodes[i].is_visible = false;
		nodes[i].is_ready = false;

		XLAssignValue(nodes[i].xlog_location, 0, 0);

		log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"),
				  progname, nodes[i].node_id, nodes[i].conninfo_str,
				  (nodes[i].is_witness) ? "true" : "false");

		node_conn = establish_db_connection(nodes[i].conninfo_str, false);

		/* if we can't see the node just skip it */
		if (PQstatus(node_conn) != CONNECTION_OK)
		{
			if (node_conn != NULL)
				PQfinish(node_conn);

			continue;
		}

		visible_nodes++;
		nodes[i].is_visible = true;

		PQfinish(node_conn);
	}
	PQclear(res);

	log_debug(_("Total nodes counted: registered=%d, visible=%d\n"),
			  total_nodes, visible_nodes);

	/*
	 * am i on the group that should keep alive? if i see less than half of
	 * total_nodes then i should do nothing
	 */
	if (visible_nodes < (total_nodes / 2.0))
	{
		log_err(_("Can't reach most of the nodes.\n"
				  "Let the other standby servers decide which one will be the primary.\n"
		"Manual action will be needed to readd this node to the cluster.\n"));
		terminate(ERR_FAILOVER_FAIL);
	}

	/* Query all the nodes to determine which ones are ready */
	for (i = 0; i < total_nodes; i++)
	{
		/* if the node is not visible, skip it */
		if (!nodes[i].is_visible)
			continue;

		if (nodes[i].is_witness)
			continue;

		node_conn = establish_db_connection(nodes[i].conninfo_str, false);

		/*
		 * XXX This shouldn't happen, if this happens it means this is a major
		 * problem maybe network outages? anyway, is better for a human to
		 * react
		 */
		if (PQstatus(node_conn) != CONNECTION_OK)
		{
			log_err(_("It seems new problems are arising, manual intervention is needed\n"));
			terminate(ERR_FAILOVER_FAIL);
		}

		uxlogid = 0;
		uxrecoff = 0;

		sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
		res = PQexec(node_conn, sqlquery);
		if (PQresultStatus(res) != PGRES_TUPLES_OK)
		{
			log_info(_("Can't get node's last standby location: %s\n"),
					 PQerrorMessage(node_conn));
			log_info(_("Connection details: %s\n"), nodes[i].conninfo_str);
			PQclear(res);
			PQfinish(node_conn);
			terminate(ERR_FAILOVER_FAIL);
		}

		if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
			log_info(_("could not parse transaction log location \"%s\"\n"),
					 PQgetvalue(res, 0, 0));

		log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n",
				  nodes[i].node_id, uxlogid, uxlogid, uxrecoff, uxrecoff);

		/* If position is 0/0, error */
		if (uxlogid == 0 && uxrecoff == 0)
		{
			PQclear(res);
			PQfinish(node_conn);
			log_info(_("InvalidXLogRecPtr detected in a standby\n"));
			terminate(ERR_FAILOVER_FAIL);
		}

		XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff);

		PQclear(res);
		PQfinish(node_conn);
	}

	/* last we get info about this node, and update shared memory */
	sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()");
	res = PQexec(my_local_conn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s.\nReport an invalid value to not be "
				  " considered as new primary and exit.\n"),
				PQerrorMessage(my_local_conn));
		PQclear(res);
		sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
		update_shared_memory(last_wal_standby_applied);
		terminate(ERR_DB_QUERY);
	}

	/* write last location in shared memory */
	update_shared_memory(PQgetvalue(res, 0, 0));
	PQclear(res);

	for (i = 0; i < total_nodes; i++)
	{
		while (!nodes[i].is_ready)
		{
			/*
			 * the witness will always be masked as ready if it's still not
			 * marked that way and avoid a useless query
			 */
			if (nodes[i].is_witness)
			{
				if (!nodes[i].is_ready)
				{
					nodes[i].is_ready = true;
					ready_nodes++;
				}
				break;
			}

			/* if the node is not visible, skip it */
			if (!nodes[i].is_visible)
				break;

			/* if the node is ready there is nothing to check, skip it too */
			if (nodes[i].is_ready)
				break;

			node_conn = establish_db_connection(nodes[i].conninfo_str, false);

			/*
			 * XXX This shouldn't happen, if this happens it means this is a
			 * major problem maybe network outages? anyway, is better for a
			 * human to react
			 */
			if (PQstatus(node_conn) != CONNECTION_OK)
			{
				/* XXX */
				log_info(_("At this point, it could be some race conditions "
						"that are acceptable, assume the node is restarting "
						   "and starting failover procedure\n"));
				break;
			}

			uxlogid = 0;
			uxrecoff = 0;

			sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()",
							  repmgr_schema);
			res = PQexec(node_conn, sqlquery);
			if (PQresultStatus(res) != PGRES_TUPLES_OK)
			{
				log_err(_("PQexec failed: %s.\nReport an invalid value to not"
						  "be considered as new primary and exit.\n"),
						PQerrorMessage(node_conn));
				PQclear(res);
				PQfinish(node_conn);
				terminate(ERR_DB_QUERY);
			}

			if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
			{
				log_info(_("could not parse transaction log location \"%s\"\n"),
						 PQgetvalue(res, 0, 0));

				/* we can't do anything but fail at this point... */
				if (*PQgetvalue(res, 0, 0) == '\0')
				{
					log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n");
					exit(ERR_BAD_CONFIG);
				}
			}


			PQclear(res);
			PQfinish(node_conn);
			/* If position is 0/0, keep checking */
			if (uxlogid == 0 && uxrecoff == 0)
				continue;

			XLAssignValue(xlog_recptr, uxlogid, uxrecoff);

			if (XLByteLT(nodes[i].xlog_location, xlog_recptr))
			{
				XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff);
			}

			log_debug("Last XLog position of node %d: log id=%u (%X), offset=%u (%X)\n",
					  nodes[i].node_id, uxlogid, uxlogid,
					  uxrecoff, uxrecoff);

			ready_nodes++;
			nodes[i].is_ready = true;
		}
	}

	/* Close the connection to this server */
	PQfinish(my_local_conn);
	my_local_conn = NULL;

	/*
	 * determine which one is the best candidate to promote to primary
	 */
	for (i = 0; i < total_nodes; i++)
	{
		/* witness is never a good candidate */
		if (nodes[i].is_witness)
			continue;

		if (!nodes[i].is_ready || !nodes[i].is_visible)
			continue;

		if (!find_best)
		{
			/*
			 * start with the first ready node, and then move on to the next
			 * one
			 */
			best_candidate.node_id = nodes[i].node_id;
			XLAssign(best_candidate.xlog_location, nodes[i].xlog_location);
			best_candidate.is_ready = nodes[i].is_ready;
			best_candidate.is_witness = nodes[i].is_witness;
			find_best = true;
		}

		/* we use the macros provided by xlogdefs.h to compare XLogRecPtr */

		/*
		 * Nodes are retrieved ordered by priority, so if the current best
		 * candidate is lower than the next node's wal location then assign
		 * next node as the new best candidate.
		 */
		if (XLByteLT(best_candidate.xlog_location, nodes[i].xlog_location))
		{
			best_candidate.node_id = nodes[i].node_id;
			XLAssign(best_candidate.xlog_location, nodes[i].xlog_location);
			best_candidate.is_ready = nodes[i].is_ready;
			best_candidate.is_witness = nodes[i].is_witness;
		}
	}

	/* once we know who is the best candidate, promote it */
	if (find_best && (best_candidate.node_id == local_options.node))
	{
		if (best_candidate.is_witness)
		{
			log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"),
					progname);
			terminate(ERR_FAILOVER_FAIL);
		}

		/* wait */
		sleep(5);

		if (verbose)
			log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"),
					 progname);
		log_debug(_("promote command is: \"%s\"\n"),
				  local_options.promote_command);

		if (log_type == REPMGR_STDERR && *local_options.logfile)
		{
			fflush(stderr);
		}

		r = system(local_options.promote_command);
		if (r != 0)
		{
			log_err(_("%s: promote command failed. You could check and try it manually.\n"),
					progname);
			terminate(ERR_BAD_CONFIG);
		}
	}
	else if (find_best)
	{
		/* wait */
		sleep(10);

		if (verbose)
			log_info(_("%s: Node %d is the best candidate to be the new primary, we should follow it...\n"),
					 progname, best_candidate.node_id);
		log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);

		/*
		 * New Primary need some time to be promoted. The follow command
		 * should take care of that.
		 */
		if (log_type == REPMGR_STDERR && *local_options.logfile)
		{
			fflush(stderr);
		}

		r = system(local_options.follow_command);
		if (r != 0)
		{
			log_err(_("%s: follow command failed. You could check and try it manually.\n"),
					progname);
			terminate(ERR_BAD_CONFIG);
		}
	}
	else
	{
		log_err(_("%s: Did not find candidates. You should check and try manually.\n"),
				progname);
		terminate(ERR_FAILOVER_FAIL);
	}

	/* to force it to re-calculate mode and master node */
	failover_done = true;

	/* and reconnect to the local database */
	my_local_conn = establish_db_connection(local_options.conninfo, true);
}
示例#23
0
/*
 * Insert monitor info, this is basically the time and xlog replayed,
 * applied on standby and current xlog location in primary.
 * Also do the math to see how far are we in bytes for being uptodate
 */
static void
MonitorExecute(void)
{
	PGresult *res;
	char monitor_standby_timestamp[MAXLEN];
	char last_wal_primary_location[MAXLEN];
	char last_wal_standby_received[MAXLEN];
	char last_wal_standby_applied[MAXLEN];

	unsigned long long int lsn_primary;
	unsigned long long int lsn_standby_received;
	unsigned long long int lsn_standby_applied;

	int	connection_retries;

	/*
	 * Check if the master is still available, if after 5 minutes of retries
	 * we cannot reconnect, try to get a new master.
	 */
	for (connection_retries = 0; connection_retries < 15; connection_retries++)
	{
		if (PQstatus(primaryConn) != CONNECTION_OK)
		{
			log_warning(_("Connection to master has been lost, trying to recover...\n"));
			/* wait 20 seconds between retries */
			sleep(20);

			PQreset(primaryConn);
		}
		else
		{
			if (connection_retries > 0)
			{
				log_notice(_("Connection to master has been restored, continue monitoring.\n"));
			}
			break;
		}
	}
	if (PQstatus(primaryConn) != CONNECTION_OK)
	{
		log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n"));
		for (connection_retries = 0; connection_retries < 6; connection_retries++)
		{
			primaryConn = getMasterConnection(myLocalConn, local_options.node,
			                                  local_options.cluster_name, &primary_options.node,NULL);
			if (PQstatus(primaryConn) == CONNECTION_OK)
			{
				/* Connected, we can continue the process so break the loop */
				log_err(_("Connected to node %d, continue monitoring.\n"), primary_options.node);
				break;
			}
			else
			{
				log_err(_("We haven't found a new master, waiting before retry...\n"));
				/* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */
				sleep(300);
			}
		}
	}
	if (PQstatus(primaryConn) != CONNECTION_OK)
	{
		log_err(_("We couldn't reconnect for long enough, exiting...\n"));
		exit(ERR_DB_CON);
	}

	/* Check if we still are a standby, we could have been promoted */
	if (!is_standby(myLocalConn))
	{
		log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
		CloseConnections();
		exit(ERR_PROMOTED);
	}

	/*
	 * first check if there is a command being executed,
	 * and if that is the case, cancel the query so i can
	 * insert the current record
	 */
	if (PQisBusy(primaryConn) == 1)
		CancelQuery();

	/* Get local xlog info */
	sqlquery_snprintf(
	    sqlquery,
	    "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
	    "pg_last_xlog_replay_location()");

	res = PQexec(myLocalConn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err("PQexec failed: %s\n", PQerrorMessage(myLocalConn));
		PQclear(res);
		/* if there is any error just let it be and retry in next loop */
		return;
	}

	strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
	strncpy(last_wal_standby_received , PQgetvalue(res, 0, 1), MAXLEN);
	strncpy(last_wal_standby_applied , PQgetvalue(res, 0, 2), MAXLEN);
	PQclear(res);

	/* Get primary xlog info */
	sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() ");

	res = PQexec(primaryConn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err("PQexec failed: %s\n", PQerrorMessage(primaryConn));
		PQclear(res);
		return;
	}

	strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN);
	PQclear(res);

	/* Calculate the lag */
	lsn_primary = walLocationToBytes(last_wal_primary_location);
	lsn_standby_received = walLocationToBytes(last_wal_standby_received);
	lsn_standby_applied = walLocationToBytes(last_wal_standby_applied);

	if (only_one_entry && only_one_entry_desired)
	{
		sqlquery_snprintf(sqlquery,
		                  "UPDATE %s.repl_monitor "
		                  "VALUES(%d, %d, '%s'::timestamp with time zone, "
		                  " '%s', '%s', "
		                  " %lld, %lld)"
		                  "WHERE primary_node=%d AND secondary_node=%d", repmgr_schema,
		                  primary_options.node, local_options.node, monitor_standby_timestamp,
		                  last_wal_primary_location,
		                  last_wal_standby_received,
		                  (lsn_primary - lsn_standby_received),
		                  (lsn_standby_received - lsn_standby_applied));
		res = PQexec(primaryConn, sqlquery);
		if (PQresultStatus(res) != PGRES_TUPLES_OK)
		{
			log_err("PQexec failed: %s\n", PQerrorMessage(conn));
			PQclear(res);
			CloseConnections();
			exit(ERR_DB_QUERY);
		}
		if (PQntuples(res) != 1)
		{
			only_one_entry = false;
		}
		PQclear(res);
	}
	else
	{
		/*
		 * Build and send insert
		 */
		sqlquery_snprintf(sqlquery,
		                  "INSERT INTO %s.repl_monitor "
		                  "VALUES(%d, %d, '%s'::timestamp with time zone, "
		                  " '%s', '%s', "
		                  " %lld, %lld)", repmgr_schema,
		                  primary_options.node, local_options.node, monitor_standby_timestamp,
		                  last_wal_primary_location,
		                  last_wal_standby_received,
		                  (lsn_primary - lsn_standby_received),
		                  (lsn_standby_received - lsn_standby_applied));
		res = PQexec(primaryConn, sqlquery);
		if (PQresultStatus(res) != PGRES_TUPLES_OK)
		{
			log_err("PQexec failed: %s\n", PQerrorMessage(conn));
			PQclear(res);
			CloseConnections();
			exit(ERR_DB_QUERY);
		}
		PQclear(res);

		if (only_one_entry_desired)
		{
			/*
			 * Build the SQL to execute on primary
			 */
			sqlquery_snprintf(sqlquery,
			                  "DELETE FROM %s.repl_monitor "
			                  "WHERE primary_node=%d AND standby_node=%d AND last_monitor_time < '%s'::timestamp with time zone",
			                  repmgr_schema, primary_options.node, local_options.node, monitor_standby_timestamp);
			res = PQexec(primaryConn, sqlquery);
			if (PQresultStatus(res) != PGRES_TUPLES_OK)
			{
				log_err("PQexec failed: %s\n", PQerrorMessage(conn));
				PQclear(res);
				CloseConnections();
				exit(ERR_DB_QUERY);
			}
			PQclear(res);
			only_one_entry = true;
		}
	}
}
示例#24
0
文件: dbutils.c 项目: Bazoozoo/repmgr
/*
 * copy_configuration()
 *
 * Copy records in master's `repl_nodes` table to witness database
 *
 * This is used by `repmgr` when setting up the witness database, and
 * `repmgrd` after a failover event occurs
 */
bool
copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
{
	char		sqlquery[MAXLEN];
	PGresult   *res;
	int			i;

	sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
	log_debug("copy_configuration: %s\n", sqlquery);
	res = PQexec(witnessconn, sqlquery);
	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
	{
		fprintf(stderr, "Cannot clean node details in the witness, %s\n",
				PQerrorMessage(witnessconn));
		return false;
	}

	sqlquery_snprintf(sqlquery,
					  "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
					  get_repmgr_schema_quoted(masterconn));
	res = PQexec(masterconn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		fprintf(stderr, "Can't get configuration from master: %s\n",
				PQerrorMessage(masterconn));
		PQclear(res);
		return false;
	}

	for (i = 0; i < PQntuples(res); i++)
	{
		bool node_record_created;
		char *witness = PQgetvalue(res, i, 4);

		log_debug(_("copy_configuration(): %s\n"), witness);

		node_record_created = create_node_record(witnessconn,
												 "copy_configuration",
												 atoi(PQgetvalue(res, i, 0)),
												 PQgetvalue(res, i, 1),
												 strlen(PQgetvalue(res, i, 2))
												   ? atoi(PQgetvalue(res, i, 2))
												   : NO_UPSTREAM_NODE,
												 cluster_name,
												 PQgetvalue(res, i, 3),
												 PQgetvalue(res, i, 4),
												 atoi(PQgetvalue(res, i, 5)),
												 strlen(PQgetvalue(res, i, 6))
													? PQgetvalue(res, i, 6)
													: NULL
												 );

		if (node_record_created == false)
		{
			fprintf(stderr, "Unable to copy node record to witness database: %s\n",
					PQerrorMessage(witnessconn));
			return false;
		}
	}
	PQclear(res);

	return true;
}
示例#25
0
文件: dbutils.c 项目: Bazoozoo/repmgr
/*
 * create_node_record()
 *
 * Create an entry in the `repl_nodes` table.
 *
 * XXX we should pass the record parameters as a struct.
 */
bool
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
{
	char		sqlquery[QUERY_STR_LEN];
	char		upstream_node_id[MAXLEN];
	char		slot_name_buf[MAXLEN];
	PGresult   *res;

	if (upstream_node == NO_UPSTREAM_NODE)
	{
		/*
		 * No explicit upstream node id provided for standby - attempt to
		 * get primary node id
		 */
		if (strcmp(type, "standby") == 0)
		{
			int primary_node_id = get_master_node_id(conn, cluster_name);
			maxlen_snprintf(upstream_node_id, "%i", primary_node_id);
		}
		else
		{
			maxlen_snprintf(upstream_node_id, "%s", "NULL");
		}
	}
	else
	{
		maxlen_snprintf(upstream_node_id, "%i", upstream_node);
	}

	if (slot_name != NULL && slot_name[0])
	{
		maxlen_snprintf(slot_name_buf, "'%s'", slot_name);
	}
	else
	{
		maxlen_snprintf(slot_name_buf, "%s", "NULL");
	}

	/* XXX convert to placeholder query */
	sqlquery_snprintf(sqlquery,
					  "INSERT INTO %s.repl_nodes "
					  "       (id, type, upstream_node_id, cluster, "
					  "        name, conninfo, slot_name, priority) "
					  "VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
					  get_repmgr_schema_quoted(conn),
					  node,
					  type,
					  upstream_node_id,
					  cluster_name,
					  node_name,
					  conninfo,
					  slot_name_buf,
					  priority);

	if (action != NULL)
	{
		log_debug(_("%s: %s\n"), action, sqlquery);
	}

	res = PQexec(conn, sqlquery);
	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
	{
		log_warning(_("Unable to create node record: %s\n"),
					PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	PQclear(res);

	return true;
}
示例#26
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;
	char		event_timestamp[MAXLEN] = "";
	bool		success = true;
	struct tm	ts;

	/* Only attempt to write a record if a connection handle was provided.
	   Also check that the repmgr schema has been properly intialised - if
	   not it means no configuration file was provided, which can happen with
	   e.g. `repmgr standby clone`, and we won't know which schema to write to.
	 */
	if (conn != NULL && strcmp(repmgr_schema, DEFAULT_REPMGR_SCHEMA_PREFIX) != 0)
	{
		int n_node_id = htonl(node_id);
		char *t_successful = successful ? "TRUE" : "FALSE";

		const char *values[4] = { (char *)&n_node_id,
								  event,
								  t_successful,
								  details
					  			};

		int lengths[4] = { sizeof(n_node_id),
						   0,
						   0,
						   0
			  			 };

		int binary[4] = {1, 0, 0, 0};

		sqlquery_snprintf(sqlquery,
						  " INSERT INTO %s.repl_events ( "
						  "             node_id, "
						  "             event, "
						  "             successful, "
						  "             details "
						  "            ) "
						  "      VALUES ($1, $2, $3, $4) "
						  "   RETURNING event_timestamp ",
						  get_repmgr_schema_quoted(conn));

		res = PQexecParams(conn,
						   sqlquery,
						   4,
						   NULL,
						   values,
						   lengths,
						   binary,
						   0);

		if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
		{

			log_warning(_("Unable to create event record: %s\n"),
						PQerrorMessage(conn));

			success = false;

		}
		else
		{
			/* Store timestamp to send to the notification command */
			strncpy(event_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
			log_debug(_("Event timestamp is: %s\n"), event_timestamp);
		}

		PQclear(res);
	}

	/*
	 * If no database connection provided, or the query failed, generate a
	 * current timestamp ourselves. This isn't quite the same
	 * format as PostgreSQL, but is close enough for diagnostic use.
	 */
	if (!strlen(event_timestamp))
	{
		time_t now;

		time(&now);
		ts = *localtime(&now);
		strftime(event_timestamp, MAXLEN, "%Y-%m-%d %H:%M:%S%z", &ts);
	}

	/* an event notification command was provided - parse and execute it */
	if (strlen(options->event_notification_command))
	{
		char		parsed_command[MAXPGPATH];
		const char *src_ptr;
		char	   *dst_ptr;
		char	   *end_ptr;
		int	   	    r;

		/*
		 * If configuration option 'event_notifications' was provided,
		 * check if this event is one of the ones listed; if not listed,
		 * don't execute the notification script.
		 *
		 * (If 'event_notifications' was not provided, we assume the script
		 * should be executed for all events).
		 */
		if (options->event_notifications.head != NULL)
		{
			EventNotificationListCell *cell;
			bool notify_ok = false;

			for (cell = options->event_notifications.head; cell; cell = cell->next)
			{
				if (strcmp(event, cell->event_type) == 0)
				{
					notify_ok = true;
					break;
				}
			}

			/*
			 * Event type not found in the 'event_notifications' list - return early
			 */
			if (notify_ok == false)
			{
				log_debug(_("Not executing notification script for event type '%s'\n"), event);
				return success;
			}
		}

		dst_ptr = parsed_command;
		end_ptr = parsed_command + MAXPGPATH - 1;
		*end_ptr = '\0';

		for(src_ptr = options->event_notification_command; *src_ptr; src_ptr++)
		{
			if (*src_ptr == '%')
			{
				switch (src_ptr[1])
				{
					case 'n':
						/* %n: node id */
						src_ptr++;
						snprintf(dst_ptr, end_ptr - dst_ptr, "%i", node_id);
						dst_ptr += strlen(dst_ptr);
						break;
					case 'e':
						/* %e: event type */
						src_ptr++;
						strlcpy(dst_ptr, event, end_ptr - dst_ptr);
						dst_ptr += strlen(dst_ptr);
						break;
					case 'd':
						/* %d: details */
						src_ptr++;
						if (details != NULL)
						{
							strlcpy(dst_ptr, details, end_ptr - dst_ptr);
							dst_ptr += strlen(dst_ptr);
						}
						break;
					case 's':
						/* %s: successful */
						src_ptr++;
						strlcpy(dst_ptr, successful ? "1" : "0", end_ptr - dst_ptr);
						dst_ptr += strlen(dst_ptr);
						break;
					case 't':
						/* %: timestamp */
						src_ptr++;
						strlcpy(dst_ptr, event_timestamp, end_ptr - dst_ptr);
						dst_ptr += strlen(dst_ptr);
						break;
					default:
						/* otherwise treat the % as not special */
						if (dst_ptr < end_ptr)
							*dst_ptr++ = *src_ptr;
						break;
				}
			}
			else
			{
				if (dst_ptr < end_ptr)
					*dst_ptr++ = *src_ptr;
			}
		}

		*dst_ptr = '\0';

		log_debug(_("Executing: %s\n"), parsed_command);

		r = system(parsed_command);
		if (r != 0)
		{
			log_warning(_("Unable to execute event notification command\n"));
			success = false;
		}
	}

	return success;
}
示例#27
0
文件: dbutils.c 项目: Bazoozoo/repmgr
bool
create_replication_slot(PGconn *conn, char *slot_name)
{
	char		sqlquery[QUERY_STR_LEN];
	PGresult   *res;

	/*
	 * Check whether slot exists already; if it exists and is active, that
	 * means another active standby is using it, which creates an error situation;
	 * if not we can reuse it as-is
	 */

	sqlquery_snprintf(sqlquery,
					  "SELECT active, slot_type "
                      "  FROM pg_replication_slots "
					  " WHERE slot_name = '%s' ",
					  slot_name);

	res = PQexec(conn, sqlquery);
	if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to query pg_replication_slots: %s\n"),
				PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	if (PQntuples(res))
	{
		if (strcmp(PQgetvalue(res, 0, 1), "physical") != 0)
		{
			log_err(_("Slot '%s' exists and is not a physical slot\n"),
					slot_name);
			PQclear(res);
		}
		if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
		{
			PQclear(res);
			log_debug(_("Replication slot '%s' exists but is inactive; reusing\n"),
						slot_name);

			return true;
		}
		PQclear(res);
		log_err(_("Slot '%s' already exists as an active slot\n"),
				slot_name);
		return false;
	}

	sqlquery_snprintf(sqlquery,
					  "SELECT * FROM pg_create_physical_replication_slot('%s')",
					  slot_name);

	log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);

	res = PQexec(conn, sqlquery);
	if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to create slot '%s' on the primary node: %s\n"),
				slot_name,
				PQerrorMessage(conn));
		PQclear(res);
		return false;
	}

	PQclear(res);
	return true;
}
示例#28
0
文件: dbutils.c 项目: Bazoozoo/repmgr
PGconn *
get_master_connection(PGconn *standby_conn, char *cluster,
					  int *master_id, char *master_conninfo_out)
{
	PGconn	   *master_conn = NULL;
	PGresult   *res1;
	PGresult   *res2;
	char		sqlquery[QUERY_STR_LEN];
	char		master_conninfo_stack[MAXCONNINFO];
	char	   *master_conninfo = &*master_conninfo_stack;

	int			i,
				node_id;

	if (master_id != NULL)
	{
		*master_id = NODE_NOT_FOUND;
	}

	/* find all nodes belonging to this cluster */
	log_info(_("finding node list for cluster '%s'\n"),
			 cluster);

	sqlquery_snprintf(sqlquery,
					  "SELECT id, conninfo "
					  "  FROM %s.repl_nodes "
					  " WHERE cluster = '%s' "
					  "   AND type != 'witness' ",
					  get_repmgr_schema_quoted(standby_conn),
					  cluster);

	res1 = PQexec(standby_conn, sqlquery);
	if (PQresultStatus(res1) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to retrieve node records: %s\n"),
				PQerrorMessage(standby_conn));
		PQclear(res1);
		return NULL;
	}

	for (i = 0; i < PQntuples(res1); i++)
	{
		/* initialize with the values of the current node being processed */
		node_id = atoi(PQgetvalue(res1, i, 0));
		strncpy(master_conninfo, PQgetvalue(res1, i, 1), MAXCONNINFO);
		log_info(_("checking role of cluster node '%i'\n"),
				 node_id);
		master_conn = establish_db_connection(master_conninfo, false);

		if (PQstatus(master_conn) != CONNECTION_OK)
			continue;

		/*
		 * Can't use the is_standby() function here because on error that
		 * function closes the connection passed and exits.  This still needs
		 * to close master_conn first.
		 */
		res2 = PQexec(master_conn, "SELECT pg_catalog.pg_is_in_recovery()");

		if (PQresultStatus(res2) != PGRES_TUPLES_OK)
		{
			log_err(_("unable to retrieve recovery state from this node: %s\n"),
					PQerrorMessage(master_conn));
			PQclear(res2);
			PQfinish(master_conn);
			continue;
		}

		/* if false, this is the master */
		if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
		{
			PQclear(res2);
			PQclear(res1);
			log_debug(_("get_master_connection(): current master node is %i\n"), node_id);

			if (master_id != NULL)
			{
				*master_id = node_id;
			}

			return master_conn;
		}
		else
		{
			/* if it is a standby, clear info */
			PQclear(res2);
			PQfinish(master_conn);
		}
	}

	/*
	 * If we finish this loop without finding a master then we doesn't have
	 * the info or the master has failed (or we reached max_connections or
	 * superuser_reserved_connections, anything else I'm missing?).
	 *
	 * Probably we will need to check the error to know if we need to start
	 * failover procedure or just fix some situation on the standby.
	 */
	PQclear(res1);
	return NULL;
}
示例#29
0
文件: dbutils.c 项目: Bazoozoo/repmgr
/*
 * get_upstream_connection()
 *
 * Returns connection to node's upstream node
 *
 * NOTE: will attempt to connect even if node is marked as inactive
 */
PGconn *
get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id,
						int *upstream_node_id_ptr, char *upstream_conninfo_out)
{
	PGconn	   *upstream_conn = NULL;
	PGresult   *res;
	char		sqlquery[QUERY_STR_LEN];
	char		upstream_conninfo_stack[MAXCONNINFO];
	char	   *upstream_conninfo = &*upstream_conninfo_stack;

	/*
	 * If the caller wanted to get a copy of the connection info string, sub
	 * out the local stack pointer for the pointer passed by the caller.
	 */
	if (upstream_conninfo_out != NULL)
		upstream_conninfo = upstream_conninfo_out;

	sqlquery_snprintf(sqlquery,
					  "    SELECT un.conninfo, un.name, un.id "
					  "      FROM %s.repl_nodes un "
					  "INNER JOIN %s.repl_nodes n "
					  "        ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)"
					  "     WHERE n.cluster = '%s' "
					  "       AND n.id = %i ",
					  get_repmgr_schema_quoted(standby_conn),
					  get_repmgr_schema_quoted(standby_conn),
					  cluster,
					  node_id);

	log_debug("get_upstream_connection(): %s\n", sqlquery);

	res = PQexec(standby_conn, sqlquery);

	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		log_err(_("unable to get conninfo for upstream server: %s\n"),
				PQerrorMessage(standby_conn));
		PQclear(res);
		return NULL;
	}

	if (!PQntuples(res))
	{
		log_notice(_("no record found for upstream server"));
		PQclear(res);
		return NULL;
	}

	strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO);

	if (upstream_node_id_ptr != NULL)
		*upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 1));

	PQclear(res);

	log_debug("conninfo is: '%s'\n", upstream_conninfo);
	upstream_conn = establish_db_connection(upstream_conninfo, false);

	if (PQstatus(upstream_conn) != CONNECTION_OK)
	{
		log_err(_("unable to connect to upstream node: %s\n"),
				PQerrorMessage(upstream_conn));
		return NULL;
	}

	return upstream_conn;
}
示例#30
0
文件: repmgrd.c 项目: klando/repmgr
static void
do_failover(void)
{
	PGresult *res1;
	PGresult *res2;
	char 	sqlquery[8192];

	int		total_nodes = 0;
	int		visible_nodes = 0;
	bool	find_best = false;

	int		i;
	int		r;

	int 	node;
	char	nodeConninfo[MAXLEN];

	unsigned int uxlogid;
	unsigned int uxrecoff;
	char last_wal_standby_applied[MAXLEN];

	PGconn	*nodeConn = NULL;

	/*
	 * will get info about until 50 nodes,
	 * which seems to be large enough for most scenarios
	 */
	nodeInfo nodes[50];
	nodeInfo best_candidate;

	/* first we get info about this node, and update shared memory */
	sprintf(sqlquery, "SELECT pg_last_xlog_replay_location()");
	res1 = PQexec(myLocalConn, sqlquery);
	if (PQresultStatus(res1) != PGRES_TUPLES_OK)
	{
		log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn));
		PQclear(res1);
		sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
		update_shared_memory(last_wal_standby_applied);
		exit(ERR_DB_QUERY);
	}

	/* write last location in shared memory */
	update_shared_memory(PQgetvalue(res1, 0, 0));

	/*
	 * we sleep the monitor time + one second
	 * we bet it should be enough for other repmgrd to update their own data
	 */
	sleep(SLEEP_MONITOR + 1);

	/* get a list of standby nodes, including myself */
	sprintf(sqlquery, "SELECT id, conninfo "
	        "  FROM %s.repl_nodes "
	        " WHERE id IN (SELECT standby_node FROM %s.repl_status) "
	        "   AND cluster = '%s' "
	        " ORDER BY priority ",
	        repmgr_schema, repmgr_schema, local_options.cluster_name);

	res1 = PQexec(myLocalConn, sqlquery);
	if (PQresultStatus(res1) != PGRES_TUPLES_OK)
	{
		log_err(_("Can't get nodes info: %s\n"), PQerrorMessage(myLocalConn));
		PQclear(res1);
		PQfinish(myLocalConn);
		exit(ERR_DB_QUERY);
	}

	/* ask for the locations */
	for (i = 0; i < PQntuples(res1); i++)
	{
		node = atoi(PQgetvalue(res1, i, 0));
		/* Initialize on false so if we can't reach this node we know that later */
		nodes[i].is_ready = false;
		strncpy(nodeConninfo, PQgetvalue(res1, i, 1), MAXLEN);
		nodeConn = establishDBConnection(nodeConninfo, false);
		/* if we can't see the node just skip it */
		if (PQstatus(nodeConn) != CONNECTION_OK)
			continue;

		sqlquery_snprintf(sqlquery, "SELECT repmgr_get_last_standby_location()");
		res2 = PQexec(nodeConn, sqlquery);
		if (PQresultStatus(res2) != PGRES_TUPLES_OK)
		{
			log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn));
			log_info(_("Connection details: %s\n"), nodeConninfo);
			PQclear(res2);
			PQfinish(nodeConn);
			continue;
		}

		visible_nodes++;

		if (sscanf(PQgetvalue(res2, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
			log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res2, 0, 0));

		nodes[i].nodeId = node;
		nodes[i].xlog_location.xlogid = uxlogid;
		nodes[i].xlog_location.xrecoff = uxrecoff;
		nodes[i].is_ready = true;

		PQclear(res2);
		PQfinish(nodeConn);
	}
	PQclear(res1);
	/* Close the connection to this server */
	PQfinish(myLocalConn);

	/*
	 * total nodes that are registered, include master which is a node but was
	 * not counted because it's not a standby
	 */
	total_nodes = i + 1;

	/*
	 * am i on the group that should keep alive?
	 * if i see less than half of total_nodes then i should do nothing
	 */
	if (visible_nodes < (total_nodes / 2.0))
	{
		log_err(_("Can't reach most of the nodes.\n"
		          "Let the other standby servers decide which one will be the primary.\n"
		          "Manual action will be needed to readd this node to the cluster.\n"));
		exit(ERR_FAILOVER_FAIL);
	}

	/*
	 * determine which one is the best candidate to promote to primary
	 */
	for (i = 0; i < total_nodes - 1; i++)
	{
		if (!nodes[i].is_ready)
			continue;
		else if (!find_best)
		{
			/* start with the first ready node, and then move on to the next one */
			best_candidate.nodeId                = nodes[i].nodeId;
			best_candidate.xlog_location.xlogid  = nodes[i].xlog_location.xlogid;
			best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff;
			best_candidate.is_ready              = nodes[i].is_ready;
			find_best = true;
		}

		/* we use the macros provided by xlogdefs.h to compare XLogPtr */
		/*
		 * Nodes are retrieved ordered by priority, so if the current
		 * best candidate is lower or equal to the next node's wal location
		 * then assign next node as the new best candidate.
		 */
		if (XLByteLE(best_candidate.xlog_location, nodes[i].xlog_location))
		{
			best_candidate.nodeId                = nodes[i].nodeId;
			best_candidate.xlog_location.xlogid  = nodes[i].xlog_location.xlogid;
			best_candidate.xlog_location.xrecoff = nodes[i].xlog_location.xrecoff;
			best_candidate.is_ready              = nodes[i].is_ready;
		}
	}

	/* once we know who is the best candidate, promote it */
	if (find_best && (best_candidate.nodeId == local_options.node))
	{
		if (verbose)
			log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"),
			         progname);
		log_debug(_("promote command is: \"%s\"\n"), local_options.promote_command);
		r = system(local_options.promote_command);
		if (r != 0)
		{
			log_err(_("%s: promote command failed. You could check and try it manually.\n"), progname);
			exit(ERR_BAD_CONFIG);
		}
	}
	else if (find_best)
	{
		if (verbose)
			log_info(_("%s: Node %d is the best candidate to be the new primary, we should follow it...\n"),
			         progname, best_candidate.nodeId);
		log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);
		/*
		 * New Primary need some time to be promoted.
		 * The follow command should take care of that.
		 */
		r = system(local_options.follow_command);
		if (r != 0)
		{
			log_err(_("%s: follow command failed. You could check and try it manually.\n"), progname);
			exit(ERR_BAD_CONFIG);
		}
	}
	else
	{
		log_err(_("%s: Did not find candidates. You should check and try manually.\n"), progname);
		exit(ERR_FAILOVER_FAIL);
	}

	/* and reconnect to the local database */
	myLocalConn = establishDBConnection(local_options.conninfo, true);
}