Ejemplo n.º 1
0
/*
 * do_bdr_register()
 *
 * As each BDR node is its own primary, registering a BDR node
 * will create the repmgr metadata schema if necessary.
 */
void
do_bdr_register(void)
{
	PGconn	   *conn = NULL;
	BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER;
	ExtensionStatus extension_status = REPMGR_UNKNOWN;
	t_node_info node_info = T_NODE_INFO_INITIALIZER;
	RecordStatus record_status = RECORD_NOT_FOUND;
	PQExpBufferData event_details;
	bool		success = true;
	char	   *dbname = NULL;

	/* sanity-check configuration for BDR-compatability */
	if (config_file_options.replication_type != REPLICATION_TYPE_BDR)
	{
		log_error(_("cannot run BDR REGISTER on a non-BDR node"));
		exit(ERR_BAD_CONFIG);
	}

	dbname = pg_malloc0(MAXLEN);

	if (dbname == NULL)
	{
		log_error(_("unable to allocate memory; terminating."));
		exit(ERR_OUT_OF_MEMORY);
	}

	/* store the database name for future reference */
	get_conninfo_value(config_file_options.conninfo, "dbname", dbname);

	conn = establish_db_connection(config_file_options.conninfo, true);

	if (!is_bdr_db(conn, NULL))
	{
		log_error(_("database \"%s\" is not BDR-enabled"), dbname);
		log_hint(_("when using repmgr with BDR, the repmgr schema must be stored in the BDR database"));
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	/* Check that there are at most 2 BDR nodes */
	get_all_bdr_node_records(conn, &bdr_nodes);

	if (bdr_nodes.node_count == 0)
	{
		log_error(_("database \"%s\" is BDR-enabled but no BDR nodes were found"), dbname);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	/* BDR 2 implementation is for 2 nodes only */
	if (get_bdr_version_num() < 3 && bdr_nodes.node_count > 2)
	{
		log_error(_("repmgr can only support BDR 2.x clusters with 2 nodes"));
		log_detail(_("this BDR cluster has %i nodes"), bdr_nodes.node_count);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	if (get_bdr_version_num() > 2)
	{
		log_error(_("\"repmgr bdr register\" is for BDR 2.x only"));
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}


	/* check for a matching BDR node */
	{
		PQExpBufferData bdr_local_node_name;
		bool		node_match = false;

		initPQExpBuffer(&bdr_local_node_name);
		node_match = bdr_node_name_matches(conn, config_file_options.node_name, &bdr_local_node_name);

		if (node_match == false)
		{
			if (strlen(bdr_local_node_name.data))
			{
				log_error(_("local node BDR node name is \"%s\", expected: \"%s\""),
						  bdr_local_node_name.data,
						  config_file_options.node_name);
				log_hint(_("\"node_name\" in repmgr.conf must match \"node_name\" in bdr.bdr_nodes"));
			}
			else
			{
				log_error(_("local node does not report BDR node name"));
				log_hint(_("ensure this is an active BDR node"));
			}

			PQfinish(conn);
			pfree(dbname);
			termPQExpBuffer(&bdr_local_node_name);
			exit(ERR_BAD_CONFIG);
		}

		termPQExpBuffer(&bdr_local_node_name);
	}

	/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
	extension_status = get_repmgr_extension_status(conn, NULL);

	if (extension_status == REPMGR_UNKNOWN)
	{
		log_error(_("unable to determine status of \"repmgr\" extension in database \"%s\""),
				  dbname);
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	if (extension_status == REPMGR_UNAVAILABLE)
	{
		log_error(_("\"repmgr\" extension is not available"));
		PQfinish(conn);
		pfree(dbname);
		exit(ERR_BAD_CONFIG);
	}

	if (extension_status == REPMGR_INSTALLED)
	{
		if (!is_bdr_repmgr(conn))
		{
			log_error(_("repmgr metadatabase contains records for non-BDR nodes"));
			PQfinish(conn);
			pfree(dbname);
			exit(ERR_BAD_CONFIG);
		}
	}
	else
	{
		log_debug("creating repmgr extension in database \"%s\"", dbname);

		begin_transaction(conn);

		if (!create_repmgr_extension(conn))
		{
			log_error(_("unable to create repmgr extension - see preceding error message(s); aborting"));
			rollback_transaction(conn);
			pfree(dbname);
			PQfinish(conn);
			exit(ERR_BAD_CONFIG);
		}

		commit_transaction(conn);
	}

	pfree(dbname);

	if (bdr_node_has_repmgr_set(conn, config_file_options.node_name) == false)
	{
		log_debug("bdr_node_has_repmgr_set() = false");
		bdr_node_set_repmgr_set(conn, config_file_options.node_name);
	}

	/*
	 * before adding the extension tables to the replication set, if any other
	 * BDR nodes exist, populate repmgr.nodes with a copy of existing entries
	 *
	 * currently we won't copy the contents of any other tables
	 *
	 */
	{
		NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER;

		(void) get_all_node_records(conn, &local_node_records);

		if (local_node_records.node_count == 0)
		{
			BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER;
			BdrNodeInfoListCell *bdr_cell = NULL;

			get_all_bdr_node_records(conn, &bdr_nodes);

			if (bdr_nodes.node_count == 0)
			{
				log_error(_("unable to retrieve any BDR node records"));
				log_detail("%s", PQerrorMessage(conn));
				PQfinish(conn);
				exit(ERR_BAD_CONFIG);
			}

			for (bdr_cell = bdr_nodes.head; bdr_cell; bdr_cell = bdr_cell->next)
			{
				PGconn	   *bdr_node_conn = NULL;
				NodeInfoList existing_nodes = T_NODE_INFO_LIST_INITIALIZER;
				NodeInfoListCell *cell = NULL;
				ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;

				/* skip the local node */
				if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0)
				{
					continue;
				}

				log_debug("connecting to BDR node \"%s\" (conninfo: \"%s\")",
						  bdr_cell->node_info->node_name,
						  bdr_cell->node_info->node_local_dsn);
				bdr_node_conn = establish_db_connection_quiet(bdr_cell->node_info->node_local_dsn);

				if (PQstatus(bdr_node_conn) != CONNECTION_OK)
				{
					continue;
				}

				/* check repmgr schema exists, skip if not */
				other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL);

				if (other_node_extension_status != REPMGR_INSTALLED)
				{
					continue;
				}

				(void) get_all_node_records(bdr_node_conn, &existing_nodes);

				for (cell = existing_nodes.head; cell; cell = cell->next)
				{
					log_debug("creating record for node \"%s\" (ID: %i)",
							  cell->node_info->node_name, cell->node_info->node_id);
					create_node_record(conn, "bdr register", cell->node_info);
				}

				PQfinish(bdr_node_conn);
				break;
			}
		}
	}

	/* Add the repmgr extension tables to a replication set */

	if (get_bdr_version_num() < 3)
	{
		add_extension_tables_to_bdr_replication_set(conn);
	}
	else
	{
		/* this is the only table we need to replicate */
		char *replication_set = get_default_bdr_replication_set(conn);

		/*
		 * this probably won't happen, but we need to be sure we're using
		 * the replication set metadata correctly...
		 */
		if (conn == NULL)
		{
			log_error(_("unable to retrieve default BDR replication set"));
			log_hint(_("see preceding messages"));
			log_debug("check query in get_default_bdr_replication_set()");
			exit(ERR_BAD_CONFIG);
		}

		if (is_table_in_bdr_replication_set(conn, "nodes", replication_set) == false)
		{
			add_table_to_bdr_replication_set(conn, "nodes", replication_set);
		}

		pfree(replication_set);
	}

	initPQExpBuffer(&event_details);

	begin_transaction(conn);

	/*
	 * we'll check if a record exists (even if the schema was just created),
	 * as there's a faint chance of a race condition
	 */

	record_status = get_node_record(conn, config_file_options.node_id, &node_info);

	/* Update internal node record */

	node_info.type = BDR;
	node_info.node_id = config_file_options.node_id;
	node_info.upstream_node_id = NO_UPSTREAM_NODE;
	node_info.active = true;
	node_info.priority = config_file_options.priority;

	strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name));
	strncpy(node_info.location, config_file_options.location, sizeof(node_info.location));
	strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo));

	if (record_status == RECORD_FOUND)
	{
		bool		node_updated = false;

		/*
		 * At this point we will have established there are no non-BDR
		 * records, so no need to verify the node type
		 */
		if (!runtime_options.force)
		{
			log_error(_("this node is already registered"));
			log_hint(_("use -F/--force to overwrite the existing node record"));
			rollback_transaction(conn);
			PQfinish(conn);
			exit(ERR_BAD_CONFIG);
		}

		/*
		 * don't permit changing the node name - this must match the BDR node
		 * name set when the node was registered.
		 */

		if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0)
		{
			log_error(_("a record for node %i is already registered with node_name \"%s\""),
					  config_file_options.node_id, node_info.node_name);
			log_hint(_("node_name configured in repmgr.conf is \"%s\""), config_file_options.node_name);

			rollback_transaction(conn);
			PQfinish(conn);
			exit(ERR_BAD_CONFIG);
		}

		node_updated = update_node_record(conn, "bdr register", &node_info);

		if (node_updated == true)
		{
			appendPQExpBuffer(&event_details, _("node record updated for node \"%s\" (%i)"),
							  config_file_options.node_name, config_file_options.node_id);
			log_verbose(LOG_NOTICE, "%s", event_details.data);
		}
		else
		{
			success = false;
		}

	}
	else
	{
		/* create new node record */
		bool		node_created = create_node_record(conn, "bdr register", &node_info);

		if (node_created == true)
		{
			appendPQExpBuffer(&event_details,
							  _("node record created for node \"%s\" (ID: %i)"),
							  config_file_options.node_name, config_file_options.node_id);
			log_notice("%s", event_details.data);
		}
		else
		{
			success = false;
		}
	}

	if (success == false)
	{
		rollback_transaction(conn);
		PQfinish(conn);
		exit(ERR_DB_QUERY);
	}

	commit_transaction(conn);
	/* Log the event */
	create_event_notification(
							  conn,
							  &config_file_options,
							  config_file_options.node_id,
							  "bdr_register",
							  true,
							  event_details.data);

	termPQExpBuffer(&event_details);

	PQfinish(conn);

	log_notice(_("BDR node %i registered (conninfo: %s)"),
			   config_file_options.node_id, config_file_options.conninfo);

	return;
}
Ejemplo n.º 2
0
/*
 * _build_single_nodeline_info - From the slurm.conf reader, build table,
 * 	and set values
 * RET 0 if no error, error code otherwise
 * Note: Operates on common variables
 *	default_node_record - default node configuration values
 */
static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr,
				       struct config_record *config_ptr)
{
	int error_code = SLURM_SUCCESS;
	struct node_record *node_rec = NULL;
	hostlist_t address_list = NULL;
	hostlist_t alias_list = NULL;
	hostlist_t hostname_list = NULL;
	hostlist_t port_list = NULL;
	char *address = NULL;
	char *alias = NULL;
	char *hostname = NULL;
	char *port_str = NULL;
	int state_val = NODE_STATE_UNKNOWN;
	int address_count, alias_count, hostname_count, port_count;
	uint16_t port = 0;

	if (node_ptr->state != NULL) {
		state_val = state_str2int(node_ptr->state, node_ptr->nodenames);
		if (state_val == NO_VAL)
			goto cleanup;
	}

	if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) {
		fatal("Unable to create NodeAddr list from %s",
		      node_ptr->addresses);
		error_code = errno;
		goto cleanup;
	}
	if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) {
		fatal("Unable to create NodeName list from %s",
		      node_ptr->nodenames);
		error_code = errno;
		goto cleanup;
	}
	if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) {
		fatal("Unable to create NodeHostname list from %s",
		      node_ptr->hostnames);
		error_code = errno;
		goto cleanup;
	}
	if (node_ptr->port_str && node_ptr->port_str[0] &&
	    (node_ptr->port_str[0] != '[') &&
	    (strchr(node_ptr->port_str, '-') ||
	     strchr(node_ptr->port_str, ','))) {
		xstrfmtcat(port_str, "[%s]", node_ptr->port_str);
		port_list = hostlist_create(port_str);
		xfree(port_str);
	} else {
		port_list = hostlist_create(node_ptr->port_str);
	}
	if (port_list == NULL) {
		error("Unable to create Port list from %s",
		      node_ptr->port_str);
		error_code = errno;
		goto cleanup;
	}

	/* some sanity checks */
	address_count  = hostlist_count(address_list);
	alias_count    = hostlist_count(alias_list);
	hostname_count = hostlist_count(hostname_list);
	port_count     = hostlist_count(port_list);
#ifdef HAVE_FRONT_END
	if ((hostname_count != alias_count) && (hostname_count != 1)) {
		error("NodeHostname count must equal that of NodeName "
		      "records of there must be no more than one");
		goto cleanup;
	}
	if ((address_count != alias_count) && (address_count != 1)) {
		error("NodeAddr count must equal that of NodeName "
		      "records of there must be no more than one");
		goto cleanup;
	}
#else
#ifdef MULTIPLE_SLURMD
	if ((address_count != alias_count) && (address_count != 1)) {
		error("NodeAddr count must equal that of NodeName "
		      "records of there must be no more than one");
		goto cleanup;
	}
#else
	if (address_count < alias_count) {
		error("At least as many NodeAddr are required as NodeName");
		goto cleanup;
	}
	if (hostname_count < alias_count) {
		error("At least as many NodeHostname are required "
		      "as NodeName");
		goto cleanup;
	}
#endif	/* MULTIPLE_SLURMD */
#endif	/* HAVE_FRONT_END */
	if ((port_count != alias_count) && (port_count > 1)) {
		error("Port count must equal that of NodeName "
		      "records or there must be no more than one");
		goto cleanup;
	}

	/* now build the individual node structures */
	while ((alias = hostlist_shift(alias_list))) {
		if (address_count > 0) {
			address_count--;
			if (address)
				free(address);
			address = hostlist_shift(address_list);
		}
		if (hostname_count > 0) {
			hostname_count--;
			if (hostname)
				free(hostname);
			hostname = hostlist_shift(hostname_list);
		}
		if (port_count > 0) {
			int port_int;
			port_count--;
			if (port_str)
				free(port_str);
			port_str = hostlist_shift(port_list);
			port_int = atoi(port_str);
			if ((port_int <= 0) || (port_int > 0xffff))
				fatal("Invalid Port %s", node_ptr->port_str);
			port = port_int;
		}
		/* find_node_record locks this to get the
		 * alias so we need to unlock */
		node_rec = find_node_record(alias);

		if (node_rec == NULL) {
			node_rec = create_node_record(config_ptr, alias);
			if ((state_val != NO_VAL) &&
			    (state_val != NODE_STATE_UNKNOWN))
				node_rec->node_state = state_val;
			node_rec->last_response = (time_t) 0;
			node_rec->comm_name = xstrdup(address);
			node_rec->node_hostname = xstrdup(hostname);
			node_rec->port      = port;
			node_rec->weight    = node_ptr->weight;
			node_rec->features  = xstrdup(node_ptr->feature);
			node_rec->reason    = xstrdup(node_ptr->reason);
		} else {
			/* FIXME - maybe should be fatal? */
			error("Reconfiguration for node %s, ignoring!", alias);
		}
		free(alias);
	}

	/* free allocated storage */
cleanup:
	if (address)
		free(address);
	if (hostname)
		free(hostname);
	if (port_str)
		free(port_str);
	if (address_list)
		hostlist_destroy(address_list);
	if (alias_list)
		hostlist_destroy(alias_list);
	if (hostname_list)
		hostlist_destroy(hostname_list);
	if (port_list)
		hostlist_destroy(port_list);
	return error_code;
}
Ejemplo n.º 3
0
/*
 * copy_configuration()
 *
 * Copy records in master's `repl_nodes` table to witness database
 *
 * This is used by `repmgr` when setting up the witness database, and
 * `repmgrd` after a failover event occurs
 */
bool
copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
{
	char		sqlquery[MAXLEN];
	PGresult   *res;
	int			i;

	sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
	log_debug("copy_configuration: %s\n", sqlquery);
	res = PQexec(witnessconn, sqlquery);
	if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
	{
		fprintf(stderr, "Cannot clean node details in the witness, %s\n",
				PQerrorMessage(witnessconn));
		return false;
	}

	sqlquery_snprintf(sqlquery,
					  "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
					  get_repmgr_schema_quoted(masterconn));
	res = PQexec(masterconn, sqlquery);
	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		fprintf(stderr, "Can't get configuration from master: %s\n",
				PQerrorMessage(masterconn));
		PQclear(res);
		return false;
	}

	for (i = 0; i < PQntuples(res); i++)
	{
		bool node_record_created;
		char *witness = PQgetvalue(res, i, 4);

		log_debug(_("copy_configuration(): %s\n"), witness);

		node_record_created = create_node_record(witnessconn,
												 "copy_configuration",
												 atoi(PQgetvalue(res, i, 0)),
												 PQgetvalue(res, i, 1),
												 strlen(PQgetvalue(res, i, 2))
												   ? atoi(PQgetvalue(res, i, 2))
												   : NO_UPSTREAM_NODE,
												 cluster_name,
												 PQgetvalue(res, i, 3),
												 PQgetvalue(res, i, 4),
												 atoi(PQgetvalue(res, i, 5)),
												 strlen(PQgetvalue(res, i, 6))
													? PQgetvalue(res, i, 6)
													: NULL
												 );

		if (node_record_created == false)
		{
			fprintf(stderr, "Unable to copy node record to witness database: %s\n",
					PQerrorMessage(witnessconn));
			return false;
		}
	}
	PQclear(res);

	return true;
}