void do_bdr_unregister(void) { PGconn *conn = NULL; ExtensionStatus extension_status = REPMGR_UNKNOWN; int target_node_id = UNKNOWN_NODE_ID; t_node_info node_info = T_NODE_INFO_INITIALIZER; RecordStatus record_status = RECORD_NOT_FOUND; bool node_record_deleted = false; PQExpBufferData event_details; char *dbname; /* sanity-check configuration for BDR-compatability */ if (config_file_options.replication_type != REPLICATION_TYPE_BDR) { log_error(_("cannot run BDR UNREGISTER on a non-BDR node")); exit(ERR_BAD_CONFIG); } dbname = pg_malloc0(MAXLEN); if (dbname == NULL) { log_error(_("unable to allocate memory; terminating.")); exit(ERR_OUT_OF_MEMORY); } /* store the database name for future reference */ get_conninfo_value(config_file_options.conninfo, "dbname", dbname); conn = establish_db_connection(config_file_options.conninfo, true); if (!is_bdr_db(conn, NULL)) { log_error(_("database \"%s\" is not BDR-enabled"), dbname); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } extension_status = get_repmgr_extension_status(conn, NULL); if (extension_status != REPMGR_INSTALLED) { log_error(_("repmgr is not installed on database \"%s\""), dbname); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } pfree(dbname); if (!is_bdr_repmgr(conn)) { log_error(_("repmgr metadatabase contains records for non-BDR nodes")); PQfinish(conn); exit(ERR_BAD_CONFIG); } initPQExpBuffer(&event_details); if (runtime_options.node_id != UNKNOWN_NODE_ID) target_node_id = runtime_options.node_id; else target_node_id = config_file_options.node_id; /* Check node exists and is really a BDR node */ record_status = get_node_record(conn, target_node_id, &node_info); if (record_status != RECORD_FOUND) { log_error(_("no record found for node %i"), target_node_id); PQfinish(conn); exit(ERR_BAD_CONFIG); } begin_transaction(conn); log_debug("unregistering node %i", target_node_id); node_record_deleted = delete_node_record(conn, target_node_id); if (node_record_deleted == false) { appendPQExpBuffer(&event_details, "unable to delete node record for node \"%s\" (ID: %i)", node_info.node_name, target_node_id); rollback_transaction(conn); } else { appendPQExpBuffer(&event_details, "node record deleted for node \"%s\" (ID: %i)", node_info.node_name, target_node_id); commit_transaction(conn); } /* Log the event */ create_event_notification( conn, &config_file_options, config_file_options.node_id, "bdr_unregister", true, event_details.data); PQfinish(conn); log_notice(_("bdr node \"%s\" (ID: %i) successfully unregistered"), node_info.node_name, target_node_id); termPQExpBuffer(&event_details); return; }
bool reload_config(t_configuration_options *orig_options) { PGconn *conn; t_configuration_options new_options; bool config_changed = false; /* * Re-read the configuration file: repmgr.conf */ log_info(_("reloading configuration file and updating repmgr tables\n")); parse_config(&new_options); if (new_options.node == -1) { log_warning(_("unable to parse new configuration, retaining current configuration\n")); return false; } if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0) { log_warning(_("unable to change cluster name, retaining current configuration\n")); return false; } if (new_options.node != orig_options->node) { log_warning(_("unable to change node ID, retaining current configuration\n")); return false; } if (strcmp(new_options.node_name, orig_options->node_name) != 0) { log_warning(_("unable to change standby name, keeping current configuration\n")); return false; } if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER) { log_warning(_("new value for 'failover' must be 'automatic' or 'manual'\n")); return false; } if (new_options.master_response_timeout <= 0) { log_warning(_("new value for 'master_response_timeout' must be greater than zero\n")); return false; } if (new_options.reconnect_attempts < 0) { log_warning(_("new value for 'reconnect_attempts' must be zero or greater\n")); return false; } if (new_options.reconnect_interval < 0) { log_warning(_("new value for 'reconnect_interval' must be zero or greater\n")); return false; } if (strcmp(orig_options->conninfo, new_options.conninfo) != 0) { /* Test conninfo string */ conn = establish_db_connection(new_options.conninfo, false); if (!conn || (PQstatus(conn) != CONNECTION_OK)) { log_warning(_("'conninfo' string is not valid, retaining current configuration\n")); return false; } PQfinish(conn); } /* * No configuration problems detected - copy any changed values * * NB: keep these in the same order as in config.h to make it easier * to manage them */ /* cluster_name */ if (strcmp(orig_options->cluster_name, new_options.cluster_name) != 0) { strcpy(orig_options->cluster_name, new_options.cluster_name); config_changed = true; } /* conninfo */ if (strcmp(orig_options->conninfo, new_options.conninfo) != 0) { strcpy(orig_options->conninfo, new_options.conninfo); config_changed = true; } /* node */ if (orig_options->node != new_options.node) { orig_options->node = new_options.node; config_changed = true; } /* failover */ if (orig_options->failover != new_options.failover) { orig_options->failover = new_options.failover; config_changed = true; } /* priority */ if (orig_options->priority != new_options.priority) { orig_options->priority = new_options.priority; config_changed = true; } /* node_name */ if (strcmp(orig_options->node_name, new_options.node_name) != 0) { strcpy(orig_options->node_name, new_options.node_name); config_changed = true; } /* promote_command */ if (strcmp(orig_options->promote_command, new_options.promote_command) != 0) { strcpy(orig_options->promote_command, new_options.promote_command); config_changed = true; } /* follow_command */ if (strcmp(orig_options->follow_command, new_options.follow_command) != 0) { strcpy(orig_options->follow_command, new_options.follow_command); config_changed = true; } /* * XXX These ones can change with a simple SIGHUP? * * strcpy (orig_options->loglevel, new_options.loglevel); strcpy * (orig_options->logfacility, new_options.logfacility); * * logger_shutdown(); XXX do we have progname here ? logger_init(progname, * orig_options.loglevel, orig_options.logfacility); */ /* rsync_options */ if (strcmp(orig_options->rsync_options, new_options.rsync_options) != 0) { strcpy(orig_options->rsync_options, new_options.rsync_options); config_changed = true; } /* ssh_options */ if (strcmp(orig_options->ssh_options, new_options.ssh_options) != 0) { strcpy(orig_options->ssh_options, new_options.ssh_options); config_changed = true; } /* master_response_timeout */ if (orig_options->master_response_timeout != new_options.master_response_timeout) { orig_options->master_response_timeout = new_options.master_response_timeout; config_changed = true; } /* reconnect_attempts */ if (orig_options->reconnect_attempts != new_options.reconnect_attempts) { orig_options->reconnect_attempts = new_options.reconnect_attempts; config_changed = true; } /* reconnect_interval */ if (orig_options->reconnect_interval != new_options.reconnect_interval) { orig_options->reconnect_interval = new_options.reconnect_interval; config_changed = true; } /* pg_ctl_options */ if (strcmp(orig_options->pg_ctl_options, new_options.pg_ctl_options) != 0) { strcpy(orig_options->pg_ctl_options, new_options.pg_ctl_options); config_changed = true; } /* pg_basebackup_options */ if (strcmp(orig_options->pg_basebackup_options, new_options.pg_basebackup_options) != 0) { strcpy(orig_options->pg_basebackup_options, new_options.pg_basebackup_options); config_changed = true; } /* monitor_interval_secs */ if (orig_options->monitor_interval_secs != new_options.monitor_interval_secs) { orig_options->monitor_interval_secs = new_options.monitor_interval_secs; config_changed = true; } /* retry_promote_interval_secs */ if (orig_options->retry_promote_interval_secs != new_options.retry_promote_interval_secs) { orig_options->retry_promote_interval_secs = new_options.retry_promote_interval_secs; config_changed = true; } /* use_replication_slots */ if (orig_options->use_replication_slots != new_options.use_replication_slots) { orig_options->use_replication_slots = new_options.use_replication_slots; config_changed = true; } if (config_changed == true) { log_debug(_("reload_config(): configuration has changed\n")); } else { log_debug(_("reload_config(): configuration has not changed\n")); } return config_changed; }
PGconn * get_master_connection(PGconn *standby_conn, char *cluster, int *master_id, char *master_conninfo_out) { PGconn *master_conn = NULL; PGresult *res1; PGresult *res2; char sqlquery[QUERY_STR_LEN]; char master_conninfo_stack[MAXCONNINFO]; char *master_conninfo = &*master_conninfo_stack; int i, node_id; if (master_id != NULL) { *master_id = NODE_NOT_FOUND; } /* find all nodes belonging to this cluster */ log_info(_("finding node list for cluster '%s'\n"), cluster); sqlquery_snprintf(sqlquery, "SELECT id, conninfo " " FROM %s.repl_nodes " " WHERE cluster = '%s' " " AND type != 'witness' ", get_repmgr_schema_quoted(standby_conn), cluster); res1 = PQexec(standby_conn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { log_err(_("unable to retrieve node records: %s\n"), PQerrorMessage(standby_conn)); PQclear(res1); return NULL; } for (i = 0; i < PQntuples(res1); i++) { /* initialize with the values of the current node being processed */ node_id = atoi(PQgetvalue(res1, i, 0)); strncpy(master_conninfo, PQgetvalue(res1, i, 1), MAXCONNINFO); log_info(_("checking role of cluster node '%i'\n"), node_id); master_conn = establish_db_connection(master_conninfo, false); if (PQstatus(master_conn) != CONNECTION_OK) continue; /* * Can't use the is_standby() function here because on error that * function closes the connection passed and exits. This still needs * to close master_conn first. */ res2 = PQexec(master_conn, "SELECT pg_catalog.pg_is_in_recovery()"); if (PQresultStatus(res2) != PGRES_TUPLES_OK) { log_err(_("unable to retrieve recovery state from this node: %s\n"), PQerrorMessage(master_conn)); PQclear(res2); PQfinish(master_conn); continue; } /* if false, this is the master */ if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0) { PQclear(res2); PQclear(res1); log_debug(_("get_master_connection(): current master node is %i\n"), node_id); if (master_id != NULL) { *master_id = node_id; } return master_conn; } else { /* if it is a standby, clear info */ PQclear(res2); PQfinish(master_conn); } } /* * If we finish this loop without finding a master then we doesn't have * the info or the master has failed (or we reached max_connections or * superuser_reserved_connections, anything else I'm missing?). * * Probably we will need to check the error to know if we need to start * failover procedure or just fix some situation on the standby. */ PQclear(res1); return NULL; }
/* * do_bdr_register() * * As each BDR node is its own primary, registering a BDR node * will create the repmgr metadata schema if necessary. */ void do_bdr_register(void) { PGconn *conn = NULL; BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER; ExtensionStatus extension_status = REPMGR_UNKNOWN; t_node_info node_info = T_NODE_INFO_INITIALIZER; RecordStatus record_status = RECORD_NOT_FOUND; PQExpBufferData event_details; bool success = true; char *dbname = NULL; /* sanity-check configuration for BDR-compatability */ if (config_file_options.replication_type != REPLICATION_TYPE_BDR) { log_error(_("cannot run BDR REGISTER on a non-BDR node")); exit(ERR_BAD_CONFIG); } dbname = pg_malloc0(MAXLEN); if (dbname == NULL) { log_error(_("unable to allocate memory; terminating.")); exit(ERR_OUT_OF_MEMORY); } /* store the database name for future reference */ get_conninfo_value(config_file_options.conninfo, "dbname", dbname); conn = establish_db_connection(config_file_options.conninfo, true); if (!is_bdr_db(conn, NULL)) { log_error(_("database \"%s\" is not BDR-enabled"), dbname); log_hint(_("when using repmgr with BDR, the repmgr schema must be stored in the BDR database")); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } /* Check that there are at most 2 BDR nodes */ get_all_bdr_node_records(conn, &bdr_nodes); if (bdr_nodes.node_count == 0) { log_error(_("database \"%s\" is BDR-enabled but no BDR nodes were found"), dbname); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } /* BDR 2 implementation is for 2 nodes only */ if (get_bdr_version_num() < 3 && bdr_nodes.node_count > 2) { log_error(_("repmgr can only support BDR 2.x clusters with 2 nodes")); log_detail(_("this BDR cluster has %i nodes"), bdr_nodes.node_count); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } if (get_bdr_version_num() > 2) { log_error(_("\"repmgr bdr register\" is for BDR 2.x only")); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } /* check for a matching BDR node */ { PQExpBufferData bdr_local_node_name; bool node_match = false; initPQExpBuffer(&bdr_local_node_name); node_match = bdr_node_name_matches(conn, config_file_options.node_name, &bdr_local_node_name); if (node_match == false) { if (strlen(bdr_local_node_name.data)) { log_error(_("local node BDR node name is \"%s\", expected: \"%s\""), bdr_local_node_name.data, config_file_options.node_name); log_hint(_("\"node_name\" in repmgr.conf must match \"node_name\" in bdr.bdr_nodes")); } else { log_error(_("local node does not report BDR node name")); log_hint(_("ensure this is an active BDR node")); } PQfinish(conn); pfree(dbname); termPQExpBuffer(&bdr_local_node_name); exit(ERR_BAD_CONFIG); } termPQExpBuffer(&bdr_local_node_name); } /* check whether repmgr extension exists, and there are no non-BDR nodes registered */ extension_status = get_repmgr_extension_status(conn, NULL); if (extension_status == REPMGR_UNKNOWN) { log_error(_("unable to determine status of \"repmgr\" extension in database \"%s\""), dbname); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } if (extension_status == REPMGR_UNAVAILABLE) { log_error(_("\"repmgr\" extension is not available")); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } if (extension_status == REPMGR_INSTALLED) { if (!is_bdr_repmgr(conn)) { log_error(_("repmgr metadatabase contains records for non-BDR nodes")); PQfinish(conn); pfree(dbname); exit(ERR_BAD_CONFIG); } } else { log_debug("creating repmgr extension in database \"%s\"", dbname); begin_transaction(conn); if (!create_repmgr_extension(conn)) { log_error(_("unable to create repmgr extension - see preceding error message(s); aborting")); rollback_transaction(conn); pfree(dbname); PQfinish(conn); exit(ERR_BAD_CONFIG); } commit_transaction(conn); } pfree(dbname); if (bdr_node_has_repmgr_set(conn, config_file_options.node_name) == false) { log_debug("bdr_node_has_repmgr_set() = false"); bdr_node_set_repmgr_set(conn, config_file_options.node_name); } /* * before adding the extension tables to the replication set, if any other * BDR nodes exist, populate repmgr.nodes with a copy of existing entries * * currently we won't copy the contents of any other tables * */ { NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER; (void) get_all_node_records(conn, &local_node_records); if (local_node_records.node_count == 0) { BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER; BdrNodeInfoListCell *bdr_cell = NULL; get_all_bdr_node_records(conn, &bdr_nodes); if (bdr_nodes.node_count == 0) { log_error(_("unable to retrieve any BDR node records")); log_detail("%s", PQerrorMessage(conn)); PQfinish(conn); exit(ERR_BAD_CONFIG); } for (bdr_cell = bdr_nodes.head; bdr_cell; bdr_cell = bdr_cell->next) { PGconn *bdr_node_conn = NULL; NodeInfoList existing_nodes = T_NODE_INFO_LIST_INITIALIZER; NodeInfoListCell *cell = NULL; ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN; /* skip the local node */ if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0) { continue; } log_debug("connecting to BDR node \"%s\" (conninfo: \"%s\")", bdr_cell->node_info->node_name, bdr_cell->node_info->node_local_dsn); bdr_node_conn = establish_db_connection_quiet(bdr_cell->node_info->node_local_dsn); if (PQstatus(bdr_node_conn) != CONNECTION_OK) { continue; } /* check repmgr schema exists, skip if not */ other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL); if (other_node_extension_status != REPMGR_INSTALLED) { continue; } (void) get_all_node_records(bdr_node_conn, &existing_nodes); for (cell = existing_nodes.head; cell; cell = cell->next) { log_debug("creating record for node \"%s\" (ID: %i)", cell->node_info->node_name, cell->node_info->node_id); create_node_record(conn, "bdr register", cell->node_info); } PQfinish(bdr_node_conn); break; } } } /* Add the repmgr extension tables to a replication set */ if (get_bdr_version_num() < 3) { add_extension_tables_to_bdr_replication_set(conn); } else { /* this is the only table we need to replicate */ char *replication_set = get_default_bdr_replication_set(conn); /* * this probably won't happen, but we need to be sure we're using * the replication set metadata correctly... */ if (conn == NULL) { log_error(_("unable to retrieve default BDR replication set")); log_hint(_("see preceding messages")); log_debug("check query in get_default_bdr_replication_set()"); exit(ERR_BAD_CONFIG); } if (is_table_in_bdr_replication_set(conn, "nodes", replication_set) == false) { add_table_to_bdr_replication_set(conn, "nodes", replication_set); } pfree(replication_set); } initPQExpBuffer(&event_details); begin_transaction(conn); /* * we'll check if a record exists (even if the schema was just created), * as there's a faint chance of a race condition */ record_status = get_node_record(conn, config_file_options.node_id, &node_info); /* Update internal node record */ node_info.type = BDR; node_info.node_id = config_file_options.node_id; node_info.upstream_node_id = NO_UPSTREAM_NODE; node_info.active = true; node_info.priority = config_file_options.priority; strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)); strncpy(node_info.location, config_file_options.location, sizeof(node_info.location)); strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo)); if (record_status == RECORD_FOUND) { bool node_updated = false; /* * At this point we will have established there are no non-BDR * records, so no need to verify the node type */ if (!runtime_options.force) { log_error(_("this node is already registered")); log_hint(_("use -F/--force to overwrite the existing node record")); rollback_transaction(conn); PQfinish(conn); exit(ERR_BAD_CONFIG); } /* * don't permit changing the node name - this must match the BDR node * name set when the node was registered. */ if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0) { log_error(_("a record for node %i is already registered with node_name \"%s\""), config_file_options.node_id, node_info.node_name); log_hint(_("node_name configured in repmgr.conf is \"%s\""), config_file_options.node_name); rollback_transaction(conn); PQfinish(conn); exit(ERR_BAD_CONFIG); } node_updated = update_node_record(conn, "bdr register", &node_info); if (node_updated == true) { appendPQExpBuffer(&event_details, _("node record updated for node \"%s\" (%i)"), config_file_options.node_name, config_file_options.node_id); log_verbose(LOG_NOTICE, "%s", event_details.data); } else { success = false; } } else { /* create new node record */ bool node_created = create_node_record(conn, "bdr register", &node_info); if (node_created == true) { appendPQExpBuffer(&event_details, _("node record created for node \"%s\" (ID: %i)"), config_file_options.node_name, config_file_options.node_id); log_notice("%s", event_details.data); } else { success = false; } } if (success == false) { rollback_transaction(conn); PQfinish(conn); exit(ERR_DB_QUERY); } commit_transaction(conn); /* Log the event */ create_event_notification( conn, &config_file_options, config_file_options.node_id, "bdr_register", true, event_details.data); termPQExpBuffer(&event_details); PQfinish(conn); log_notice(_("BDR node %i registered (conninfo: %s)"), config_file_options.node_id, config_file_options.conninfo); return; }
/* * get_upstream_connection() * * Returns connection to node's upstream node * * NOTE: will attempt to connect even if node is marked as inactive */ PGconn * get_upstream_connection(PGconn *standby_conn, char *cluster, int node_id, int *upstream_node_id_ptr, char *upstream_conninfo_out) { PGconn *upstream_conn = NULL; PGresult *res; char sqlquery[QUERY_STR_LEN]; char upstream_conninfo_stack[MAXCONNINFO]; char *upstream_conninfo = &*upstream_conninfo_stack; /* * If the caller wanted to get a copy of the connection info string, sub * out the local stack pointer for the pointer passed by the caller. */ if (upstream_conninfo_out != NULL) upstream_conninfo = upstream_conninfo_out; sqlquery_snprintf(sqlquery, " SELECT un.conninfo, un.name, un.id " " FROM %s.repl_nodes un " "INNER JOIN %s.repl_nodes n " " ON (un.id = n.upstream_node_id AND un.cluster = n.cluster)" " WHERE n.cluster = '%s' " " AND n.id = %i ", get_repmgr_schema_quoted(standby_conn), get_repmgr_schema_quoted(standby_conn), cluster, node_id); log_debug("get_upstream_connection(): %s\n", sqlquery); res = PQexec(standby_conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("unable to get conninfo for upstream server: %s\n"), PQerrorMessage(standby_conn)); PQclear(res); return NULL; } if (!PQntuples(res)) { log_notice(_("no record found for upstream server")); PQclear(res); return NULL; } strncpy(upstream_conninfo, PQgetvalue(res, 0, 0), MAXCONNINFO); if (upstream_node_id_ptr != NULL) *upstream_node_id_ptr = atoi(PQgetvalue(res, 0, 1)); PQclear(res); log_debug("conninfo is: '%s'\n", upstream_conninfo); upstream_conn = establish_db_connection(upstream_conninfo, false); if (PQstatus(upstream_conn) != CONNECTION_OK) { log_err(_("unable to connect to upstream node: %s\n"), PQerrorMessage(upstream_conn)); return NULL; } return upstream_conn; }
static void do_failover(void) { PGresult *res; char sqlquery[QUERY_STR_LEN]; int total_nodes = 0; int visible_nodes = 0; int ready_nodes = 0; bool find_best = false; int i; int r; uint32 uxlogid; uint32 uxrecoff; XLogRecPtr xlog_recptr; char last_wal_standby_applied[MAXLEN]; PGconn *node_conn = NULL; /* * will get info about until 50 nodes, which seems to be large enough for * most scenarios */ t_node_info nodes[50]; /* initialize to keep compiler quiet */ t_node_info best_candidate = {-1, "", InvalidXLogRecPtr, false, false, false}; /* get a list of standby nodes, including myself */ sprintf(sqlquery, "SELECT id, conninfo, witness " " FROM %s.repl_nodes " " WHERE cluster = '%s' " " ORDER BY priority, id ", repmgr_schema, local_options.cluster_name); res = PQexec(my_local_conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("Can't get nodes' info: %s\n"), PQerrorMessage(my_local_conn)); PQclear(res); terminate(ERR_DB_QUERY); } /* * total nodes that are registered */ total_nodes = PQntuples(res); log_debug(_("%s: there are %d nodes registered\n"), progname, total_nodes); /* * Build an array with the nodes and indicate which ones are visible and * ready */ for (i = 0; i < total_nodes; i++) { nodes[i].node_id = atoi(PQgetvalue(res, i, 0)); strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXLEN); nodes[i].is_witness = (strcmp(PQgetvalue(res, i, 2), "t") == 0) ? true : false; /* * Initialize on false so if we can't reach this node we know that * later */ nodes[i].is_visible = false; nodes[i].is_ready = false; XLAssignValue(nodes[i].xlog_location, 0, 0); log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"), progname, nodes[i].node_id, nodes[i].conninfo_str, (nodes[i].is_witness) ? "true" : "false"); node_conn = establish_db_connection(nodes[i].conninfo_str, false); /* if we can't see the node just skip it */ if (PQstatus(node_conn) != CONNECTION_OK) { if (node_conn != NULL) PQfinish(node_conn); continue; } visible_nodes++; nodes[i].is_visible = true; PQfinish(node_conn); } PQclear(res); log_debug(_("Total nodes counted: registered=%d, visible=%d\n"), total_nodes, visible_nodes); /* * am i on the group that should keep alive? if i see less than half of * total_nodes then i should do nothing */ if (visible_nodes < (total_nodes / 2.0)) { log_err(_("Can't reach most of the nodes.\n" "Let the other standby servers decide which one will be the primary.\n" "Manual action will be needed to readd this node to the cluster.\n")); terminate(ERR_FAILOVER_FAIL); } /* Query all the nodes to determine which ones are ready */ for (i = 0; i < total_nodes; i++) { /* if the node is not visible, skip it */ if (!nodes[i].is_visible) continue; if (nodes[i].is_witness) continue; node_conn = establish_db_connection(nodes[i].conninfo_str, false); /* * XXX This shouldn't happen, if this happens it means this is a major * problem maybe network outages? anyway, is better for a human to * react */ if (PQstatus(node_conn) != CONNECTION_OK) { log_err(_("It seems new problems are arising, manual intervention is needed\n")); terminate(ERR_FAILOVER_FAIL); } uxlogid = 0; uxrecoff = 0; sqlquery_snprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); res = PQexec(node_conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(node_conn)); log_info(_("Connection details: %s\n"), nodes[i].conninfo_str); PQclear(res); PQfinish(node_conn); terminate(ERR_FAILOVER_FAIL); } if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", nodes[i].node_id, uxlogid, uxlogid, uxrecoff, uxrecoff); /* If position is 0/0, error */ if (uxlogid == 0 && uxrecoff == 0) { PQclear(res); PQfinish(node_conn); log_info(_("InvalidXLogRecPtr detected in a standby\n")); terminate(ERR_FAILOVER_FAIL); } XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff); PQclear(res); PQfinish(node_conn); } /* last we get info about this node, and update shared memory */ sprintf(sqlquery, "SELECT pg_last_xlog_receive_location()"); res = PQexec(my_local_conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("PQexec failed: %s.\nReport an invalid value to not be " " considered as new primary and exit.\n"), PQerrorMessage(my_local_conn)); PQclear(res); sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0); update_shared_memory(last_wal_standby_applied); terminate(ERR_DB_QUERY); } /* write last location in shared memory */ update_shared_memory(PQgetvalue(res, 0, 0)); PQclear(res); for (i = 0; i < total_nodes; i++) { while (!nodes[i].is_ready) { /* * the witness will always be masked as ready if it's still not * marked that way and avoid a useless query */ if (nodes[i].is_witness) { if (!nodes[i].is_ready) { nodes[i].is_ready = true; ready_nodes++; } break; } /* if the node is not visible, skip it */ if (!nodes[i].is_visible) break; /* if the node is ready there is nothing to check, skip it too */ if (nodes[i].is_ready) break; node_conn = establish_db_connection(nodes[i].conninfo_str, false); /* * XXX This shouldn't happen, if this happens it means this is a * major problem maybe network outages? anyway, is better for a * human to react */ if (PQstatus(node_conn) != CONNECTION_OK) { /* XXX */ log_info(_("At this point, it could be some race conditions " "that are acceptable, assume the node is restarting " "and starting failover procedure\n")); break; } uxlogid = 0; uxrecoff = 0; sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema); res = PQexec(node_conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("PQexec failed: %s.\nReport an invalid value to not" "be considered as new primary and exit.\n"), PQerrorMessage(node_conn)); PQclear(res); PQfinish(node_conn); terminate(ERR_DB_QUERY); } if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2) { log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0)); /* we can't do anything but fail at this point... */ if (*PQgetvalue(res, 0, 0) == '\0') { log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n"); exit(ERR_BAD_CONFIG); } } PQclear(res); PQfinish(node_conn); /* If position is 0/0, keep checking */ if (uxlogid == 0 && uxrecoff == 0) continue; XLAssignValue(xlog_recptr, uxlogid, uxrecoff); if (XLByteLT(nodes[i].xlog_location, xlog_recptr)) { XLAssignValue(nodes[i].xlog_location, uxlogid, uxrecoff); } log_debug("Last XLog position of node %d: log id=%u (%X), offset=%u (%X)\n", nodes[i].node_id, uxlogid, uxlogid, uxrecoff, uxrecoff); ready_nodes++; nodes[i].is_ready = true; } } /* Close the connection to this server */ PQfinish(my_local_conn); my_local_conn = NULL; /* * determine which one is the best candidate to promote to primary */ for (i = 0; i < total_nodes; i++) { /* witness is never a good candidate */ if (nodes[i].is_witness) continue; if (!nodes[i].is_ready || !nodes[i].is_visible) continue; if (!find_best) { /* * start with the first ready node, and then move on to the next * one */ best_candidate.node_id = nodes[i].node_id; XLAssign(best_candidate.xlog_location, nodes[i].xlog_location); best_candidate.is_ready = nodes[i].is_ready; best_candidate.is_witness = nodes[i].is_witness; find_best = true; } /* we use the macros provided by xlogdefs.h to compare XLogRecPtr */ /* * Nodes are retrieved ordered by priority, so if the current best * candidate is lower than the next node's wal location then assign * next node as the new best candidate. */ if (XLByteLT(best_candidate.xlog_location, nodes[i].xlog_location)) { best_candidate.node_id = nodes[i].node_id; XLAssign(best_candidate.xlog_location, nodes[i].xlog_location); best_candidate.is_ready = nodes[i].is_ready; best_candidate.is_witness = nodes[i].is_witness; } } /* once we know who is the best candidate, promote it */ if (find_best && (best_candidate.node_id == local_options.node)) { if (best_candidate.is_witness) { log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"), progname); terminate(ERR_FAILOVER_FAIL); } /* wait */ sleep(5); if (verbose) log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"), progname); log_debug(_("promote command is: \"%s\"\n"), local_options.promote_command); if (log_type == REPMGR_STDERR && *local_options.logfile) { fflush(stderr); } r = system(local_options.promote_command); if (r != 0) { log_err(_("%s: promote command failed. You could check and try it manually.\n"), progname); terminate(ERR_BAD_CONFIG); } } else if (find_best) { /* wait */ sleep(10); if (verbose) log_info(_("%s: Node %d is the best candidate to be the new primary, we should follow it...\n"), progname, best_candidate.node_id); log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command); /* * New Primary need some time to be promoted. The follow command * should take care of that. */ if (log_type == REPMGR_STDERR && *local_options.logfile) { fflush(stderr); } r = system(local_options.follow_command); if (r != 0) { log_err(_("%s: follow command failed. You could check and try it manually.\n"), progname); terminate(ERR_BAD_CONFIG); } } else { log_err(_("%s: Did not find candidates. You should check and try manually.\n"), progname); terminate(ERR_FAILOVER_FAIL); } /* to force it to re-calculate mode and master node */ failover_done = true; /* and reconnect to the local database */ my_local_conn = establish_db_connection(local_options.conninfo, true); }
int main(int argc, char **argv) { static struct option long_options[] = { {"config-file", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {"monitoring-history", no_argument, NULL, 'm'}, {"daemonize", no_argument, NULL, 'd'}, {"pid-file", required_argument, NULL, 'p'}, {NULL, 0, NULL, 0} }; int optindex; int c, ret; bool daemonize = false; FILE *fd; char standby_version[MAXVERSIONSTR], *ret_ver; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION); exit(SUCCESS); } } while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; case 'm': monitoring_history = true; break; case 'd': daemonize = true; break; case 'p': pid_file = optarg; break; default: usage(); exit(ERR_BAD_CONFIG); } } if (daemonize) { do_daemonize(); } if (pid_file) { check_and_create_pid_file(pid_file); } #ifndef WIN32 setup_event_handlers(); #endif /* * Read the configuration file: repmgr.conf */ parse_config(config_file, &local_options); if (local_options.node == -1) { log_err(_("Node information is missing. " "Check the configuration file, or provide one if you have not done so.\n")); terminate(ERR_BAD_CONFIG); } fd = freopen("/dev/null", "r", stdin); if (fd == NULL) { fprintf(stderr, "error reopening stdin to '/dev/null': %s", strerror(errno)); } fd = freopen("/dev/null", "w", stdout); if (fd == NULL) { fprintf(stderr, "error reopening stdout to '/dev/null': %s", strerror(errno)); } logger_init(&local_options, progname, local_options.loglevel, local_options.logfacility); if (verbose) logger_min_verbose(LOG_INFO); if (log_type == REPMGR_SYSLOG) { fd = freopen("/dev/null", "w", stderr); if (fd == NULL) { fprintf(stderr, "error reopening stderr to '/dev/null': %s", strerror(errno)); } } xsnprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); my_local_conn = establish_db_connection(local_options.conninfo, true); /* should be v9 or better */ log_info(_("%s Connected to database, checking its state\n"), progname); ret_ver = pg_version(my_local_conn, standby_version); if (ret_ver == NULL || strcmp(standby_version, "") == 0) { if (ret_ver != NULL) log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); terminate(ERR_BAD_CONFIG); } /* * MAIN LOOP This loops cicles once per failover and at startup * Requisites: - my_local_conn needs to be already setted with an active * connection - no master connection */ do { /* * Set my server mode, establish a connection to primary and start * monitor */ ret = is_witness(my_local_conn, repmgr_schema, local_options.cluster_name, local_options.node); if (ret == 1) my_local_mode = WITNESS_MODE; else if (ret == 0) { ret = is_standby(my_local_conn); if (ret == 1) my_local_mode = STANDBY_MODE; else if (ret == 0) /* is the master */ my_local_mode = PRIMARY_MODE; } /* * XXX we did this before changing is_standby() to return int; we * should not exit at this point, but for now we do until we have a * better strategy */ if (ret == -1) terminate(1); switch (my_local_mode) { case PRIMARY_MODE: primary_options.node = local_options.node; strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primary_conn = my_local_conn; check_cluster_configuration(my_local_conn); check_node_configuration(); if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); primary_conn = my_local_conn; update_registration(); } log_info(_("%s Starting continuous primary connection check\n"), progname); /* * Check that primary is still alive, and standbies are * sending info */ /* * Every local_options.monitor_interval_secs seconds, do * master checks XXX Check that standbies are sending info */ do { if (check_connection(primary_conn, "master")) { /* * CheckActiveStandbiesConnections(); * CheckInactiveStandbies(); */ sleep(local_options.monitor_interval_secs); } else { /* * XXX May we do something more verbose ? */ terminate(1); } if (got_SIGHUP) { /* * if we can reload, then could need to change * my_local_conn */ if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); primary_conn = my_local_conn; if (*local_options.logfile) { FILE *fd; fd = freopen(local_options.logfile, "a", stderr); if (fd == NULL) { fprintf(stderr, "error reopening stderr to '%s': %s", local_options.logfile, strerror(errno)); } } update_registration(); } got_SIGHUP = false; } } while (!failover_done); break; case WITNESS_MODE: case STANDBY_MODE: /* I need the id of the primary as well as a connection to it */ log_info(_("%s Connecting to primary for cluster '%s'\n"), progname, local_options.cluster_name); primary_conn = get_master_connection(my_local_conn, repmgr_schema, local_options.cluster_name, &primary_options.node, NULL); if (primary_conn == NULL) { terminate(ERR_BAD_CONFIG); } check_cluster_configuration(my_local_conn); check_node_configuration(); if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); update_registration(); } /* * Every local_options.monitor_interval_secs seconds, do * checks */ if (my_local_mode == WITNESS_MODE) { log_info(_("%s Starting continuous witness node monitoring\n"), progname); } else if (my_local_mode == STANDBY_MODE) { log_info(_("%s Starting continuous standby node monitoring\n"), progname); } do { if (my_local_mode == WITNESS_MODE) witness_monitor(); else if (my_local_mode == STANDBY_MODE) standby_monitor(); sleep(local_options.monitor_interval_secs); if (got_SIGHUP) { /* * if we can reload, then could need to change * my_local_conn */ if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); update_registration(); } got_SIGHUP = false; } } while (!failover_done); break; default: log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); } failover_done = false; } while (true); /* close the connection to the database and cleanup */ close_connections(); /* Shuts down logging system */ logger_shutdown(); return 0; }
/* * reload_config() * * This is only called by repmgrd after receiving a SIGHUP or when a monitoring * loop is started up; it therefore only needs to reload options required * by repmgrd, which are as follows: * * changeable options: * - failover * - follow_command * - logfacility * - logfile * - loglevel * - master_response_timeout * - monitor_interval_secs * - priority * - promote_command * - reconnect_attempts * - reconnect_interval * - retry_promote_interval_secs * - witness_repl_nodes_sync_interval_secs * * non-changeable options: * - cluster_name * - conninfo * - node * - node_name * * extract with something like: * grep local_options\\. repmgrd.c | perl -n -e '/local_options\.([\w_]+)/ && print qq|$1\n|;' | sort | uniq */ bool reload_config(t_configuration_options *orig_options) { PGconn *conn; t_configuration_options new_options = T_CONFIGURATION_OPTIONS_INITIALIZER; bool config_changed = false; bool log_config_changed = false; static ItemList config_errors = { NULL, NULL }; /* * Re-read the configuration file: repmgr.conf */ log_info(_("reloading configuration file\n")); _parse_config(&new_options, &config_errors); if (config_errors.head != NULL) { /* XXX dump errors to log */ log_warning(_("unable to parse new configuration, retaining current configuration\n")); return false; } /* The following options cannot be changed */ if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0) { log_warning(_("cluster_name cannot be changed, retaining current configuration\n")); return false; } if (new_options.node != orig_options->node) { log_warning(_("node ID cannot be changed, retaining current configuration\n")); return false; } if (strcmp(new_options.node_name, orig_options->node_name) != 0) { log_warning(_("node_name cannot be changed, keeping current configuration\n")); return false; } if (strcmp(orig_options->conninfo, new_options.conninfo) != 0) { /* Test conninfo string works*/ conn = establish_db_connection(new_options.conninfo, false); if (!conn || (PQstatus(conn) != CONNECTION_OK)) { log_warning(_("'conninfo' string is not valid, retaining current configuration\n")); return false; } PQfinish(conn); } /* * No configuration problems detected - copy any changed values * * NB: keep these in the same order as in config.h to make it easier * to manage them */ /* failover */ if (orig_options->failover != new_options.failover) { orig_options->failover = new_options.failover; config_changed = true; } /* follow_command */ if (strcmp(orig_options->follow_command, new_options.follow_command) != 0) { strcpy(orig_options->follow_command, new_options.follow_command); config_changed = true; } /* master_response_timeout */ if (orig_options->master_response_timeout != new_options.master_response_timeout) { orig_options->master_response_timeout = new_options.master_response_timeout; config_changed = true; } /* monitor_interval_secs */ if (orig_options->monitor_interval_secs != new_options.monitor_interval_secs) { orig_options->monitor_interval_secs = new_options.monitor_interval_secs; config_changed = true; } /* priority */ if (orig_options->priority != new_options.priority) { orig_options->priority = new_options.priority; config_changed = true; } /* promote_command */ if (strcmp(orig_options->promote_command, new_options.promote_command) != 0) { strcpy(orig_options->promote_command, new_options.promote_command); config_changed = true; } /* reconnect_attempts */ if (orig_options->reconnect_attempts != new_options.reconnect_attempts) { orig_options->reconnect_attempts = new_options.reconnect_attempts; config_changed = true; } /* reconnect_interval */ if (orig_options->reconnect_interval != new_options.reconnect_interval) { orig_options->reconnect_interval = new_options.reconnect_interval; config_changed = true; } /* retry_promote_interval_secs */ if (orig_options->retry_promote_interval_secs != new_options.retry_promote_interval_secs) { orig_options->retry_promote_interval_secs = new_options.retry_promote_interval_secs; config_changed = true; } /* witness_repl_nodes_sync_interval_secs */ if (orig_options->witness_repl_nodes_sync_interval_secs != new_options.witness_repl_nodes_sync_interval_secs) { orig_options->witness_repl_nodes_sync_interval_secs = new_options.witness_repl_nodes_sync_interval_secs; config_changed = true; } /* * Handle changes to logging configuration */ if (strcmp(orig_options->logfacility, new_options.logfacility) != 0) { strcpy(orig_options->logfacility, new_options.logfacility); log_config_changed = true; } if (strcmp(orig_options->logfile, new_options.logfile) != 0) { strcpy(orig_options->logfile, new_options.logfile); log_config_changed = true; } if (strcmp(orig_options->loglevel, new_options.loglevel) != 0) { strcpy(orig_options->loglevel, new_options.loglevel); log_config_changed = true; } if (log_config_changed == true) { log_notice(_("restarting logging with changed parameters\n")); logger_shutdown(); logger_init(orig_options, progname()); } if (config_changed == true) { log_notice(_("configuration file reloaded with changed parameters\n")); } /* * if logging configuration changed, don't say the configuration didn't * change, as it clearly has. */ else if (log_config_changed == false) { log_info(_("configuration has not changed\n")); } return config_changed; }
bool reload_config(char *config_file, t_configuration_options * orig_options) { PGconn *conn; t_configuration_options new_options; /* * Re-read the configuration file: repmgr.conf */ log_info(_("Reloading configuration file and updating repmgr tables\n")); parse_config(config_file, &new_options); if (new_options.node == -1) { log_warning(_("Cannot load new configuration, will keep current one.\n")); return false; } if (strcmp(new_options.cluster_name, orig_options->cluster_name) != 0) { log_warning(_("Cannot change cluster name, will keep current configuration.\n")); return false; } if (new_options.node != orig_options->node) { log_warning(_("Cannot change node number, will keep current configuration.\n")); return false; } if (strcmp(new_options.node_name, orig_options->node_name) != 0) { log_warning(_("Cannot change standby name, will keep current configuration.\n")); return false; } if (new_options.failover != MANUAL_FAILOVER && new_options.failover != AUTOMATIC_FAILOVER) { log_warning(_("New value for failover is not valid. Should be MANUAL or AUTOMATIC.\n")); return false; } if (new_options.master_response_timeout <= 0) { log_warning(_("New value for master_response_timeout is not valid. Should be greater than zero.\n")); return false; } if (new_options.reconnect_attempts < 0) { log_warning(_("New value for reconnect_attempts is not valid. Should be greater or equal than zero.\n")); return false; } if (new_options.reconnect_intvl < 0) { log_warning(_("New value for reconnect_interval is not valid. Should be greater or equal than zero.\n")); return false; } /* Test conninfo string */ conn = establish_db_connection(new_options.conninfo, false); if (!conn || (PQstatus(conn) != CONNECTION_OK)) { log_warning(_("conninfo string is not valid, will keep current configuration.\n")); return false; } PQfinish(conn); /* Configuration seems ok, will load new values */ strcpy(orig_options->cluster_name, new_options.cluster_name); orig_options->node = new_options.node; strcpy(orig_options->conninfo, new_options.conninfo); orig_options->failover = new_options.failover; orig_options->priority = new_options.priority; strcpy(orig_options->node_name, new_options.node_name); strcpy(orig_options->promote_command, new_options.promote_command); strcpy(orig_options->follow_command, new_options.follow_command); strcpy(orig_options->rsync_options, new_options.rsync_options); strcpy(orig_options->ssh_options, new_options.ssh_options); orig_options->master_response_timeout = new_options.master_response_timeout; orig_options->reconnect_attempts = new_options.reconnect_attempts; orig_options->reconnect_intvl = new_options.reconnect_intvl; /* * XXX These ones can change with a simple SIGHUP? * * strcpy (orig_options->loglevel, new_options.loglevel); strcpy * (orig_options->logfacility, new_options.logfacility); * * logger_shutdown(); XXX do we have progname here ? logger_init(progname, * orig_options.loglevel, orig_options.logfacility); */ return true; }