/* * Custom key pg.queries.longest * * Returns the duration in seconds of the longest running current query * * Parameters: * 0: connection string * 1: connection database * 2: filter by database oid name * 3: filter by user OID or name * 4: filter by hostname or IP address of the connected host * 5: return only waiting backends * * Returns: d * * Support: Requires PostgreSQL v9.2 or above * * TODO: allow filtering in pg.queries.longest similar to pg.backends.count */ int PG_QUERIES_LONGEST(AGENT_REQUEST *request, AGENT_RESULT *result) { int ret = SYSINFO_RET_FAIL; // Request result code const char *__function_name = "PG_QUERIES_LONGEST"; // Function name for log file char query[MAX_QUERY_LEN]; char clause[MAX_CLAUSE_LEN]; PGparams params = NULL; // freed later in pg_exec zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __function_name); // build the filter clause memset(clause, 0, sizeof(clause)); if (0 == build_activity_clause(request, clause, ¶ms, 1)) goto out; // build the full sql query memset(query, 0, MAX_QUERY_LEN); if (pg_version(request) < 90200) zbx_snprintf(query, MAX_QUERY_LEN, PGSQL_GET_LONGEST_QUERY, clause); else zbx_snprintf(query, MAX_QUERY_LEN, PGSQL_GET_LONGEST_QUERY_92, clause); ret = pg_get_dbl(request, result, query, params); out: zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name); return ret; }
/* * Custom key pg.backends.count * * Returns statistics for connected backends (remote clients) * * Parameters: * 0: connection string * 1: connection database * 2: filter by database oid name * 3: filter by user OID or name * 4: filter by hostname or IP address of the connected host * 5: return only waiting backends * * Returns: u */ int PG_BACKENDS_COUNT(AGENT_REQUEST *request, AGENT_RESULT *result) { int ret = SYSINFO_RET_FAIL; // Request result code const char *__function_name = "PG_BACKENDS_COUNT"; // Function name for log file char query[MAX_QUERY_LEN]; char clause[MAX_CLAUSE_LEN]; char *pid = "pid"; PGparams params = NULL; // freed later in pg_exec zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __function_name); // pid column is named 'procpid' in < v9.2 if (pg_version(request) < 90200) pid = "procpid"; // build the filter clause memset(clause, 0, sizeof(clause)); if (0 == build_activity_clause(request, clause, ¶ms, 1)) goto out; // build the full sql query memset(query, 0, sizeof(query)); zbx_snprintf(query, MAX_QUERY_LEN, PGSQL_GET_BACKENDS, pid, clause); // get results ret = pg_get_int(request, result, query, params); out: zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name); return ret; }
/* * Custom key pg.tablespace.discovery * * Returns all known tablespaces in a PostgreSQL instance * * Parameters: * 0: connection string * * Returns: * { * "data":[ * { * "{#OID}":"12345", * "{#TABLESPACE}":"MyTableSpace", * "{#OWNER}":"postgres", * "{#LOCATION}":"/var/lib/pgsql/9.4/data", * "{#DESCRIPTION}":"Default tablespace"}]} */ int PG_TABLESPACE_DISCOVERY(AGENT_REQUEST *request, AGENT_RESULT *result) { int ret = SYSINFO_RET_FAIL; // Request result code const char *__function_name = "PG_TABLESPACE_DISCOVERY"; // Function name for log file zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __function_name); if (pg_version(request) >= 90200) // tablespace location introduced in v9.2 ret = pg_get_discovery(request, result, PGSQL_DISCOVER_TABLESPACES_92, NULL); else ret = pg_get_discovery(request, result, PGSQL_DISCOVER_TABLESPACES, NULL); zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name); return ret; }
int main(int argc, char **argv) { static struct option long_options[] = { {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {"no-history", no_argument, NULL, 'H'}, {NULL, 0, NULL, 0} }; int optindex; int c; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); exit(SUCCESS); } } while ((c = getopt_long(argc, argv, "f:vH", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; case 'H': /* no-history */ only_one_entry_desired = true; break; default: usage(); exit(ERR_BAD_CONFIG); } } setup_cancel_handler(); /* * Read the configuration file: repmgr.conf */ parse_config(config_file, &local_options); if (local_options.node == -1) { log_err("Node information is missing. " "Check the configuration file, or provide one if you have not done so.\n"); exit(ERR_BAD_CONFIG); } logger_init(progname, local_options.loglevel, local_options.logfacility); if (verbose) logger_min_verbose(LOG_INFO); snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); myLocalConn = establishDBConnection(local_options.conninfo, true); /* should be v9 or better */ log_info(_("%s Connected to database, checking its state\n"), progname); pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(myLocalConn); log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); exit(ERR_BAD_CONFIG); } /* * Set my server mode, establish a connection to primary * and start monitor */ myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; if (myLocalMode == PRIMARY_MODE) { primary_options.node = local_options.node; strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primaryConn = myLocalConn; } else { /* I need the id of the primary as well as a connection to it */ log_info(_("%s Connecting to primary for cluster '%s'\n"), progname, local_options.cluster_name); primaryConn = getMasterConnection(myLocalConn, local_options.node, local_options.cluster_name, &primary_options.node,NULL); if (primaryConn == NULL) { CloseConnections(); exit(ERR_BAD_CONFIG); } } checkClusterConfiguration(myLocalConn,primaryConn); checkNodeConfiguration(local_options.conninfo); if (myLocalMode == STANDBY_MODE) { log_info(_("%s Starting continuous standby node monitoring\n"), progname); MonitorCheck(); } else { log_info(_("%s This is a primary node, program not needed here; exiting'\n"), progname); } /* Prevent a double-free */ if (primaryConn == myLocalConn) myLocalConn = NULL; /* close the connection to the database and cleanup */ CloseConnections(); /* Shuts down logging system */ logger_shutdown(); return 0; }
static void do_standby_promote(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; char script[QUERY_STR_LEN]; char myClusterName[MAXLEN]; int myLocalId = -1; char conninfo[MAXLEN]; PGconn *old_master_conn; int old_master_id; int r; char data_dir[MAXLEN]; char recovery_file_path[MAXLEN]; char recovery_done_path[MAXLEN]; char standby_version[MAXVERSIONSTR]; /* * Read the configuration file: repmgr.conf */ parse_config(config_file, myClusterName, &myLocalId, conninfo); if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " "Check the configuration file.\n"); exit(1); } /* We need to connect to check configuration */ conn = establishDBConnection(conninfo, true); /* we need v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are in a standby node */ if (!is_standby(conn)) { fprintf(stderr, "repmgr: The command should be executed in a standby node\n"); return; } /* we also need to check if there isn't any master already */ old_master_conn = getMasterConnection(conn, myLocalId, myClusterName, &old_master_id); if (old_master_conn != NULL) { PQfinish(old_master_conn); fprintf(stderr, "There is a master already in this cluster"); return; } if (verbose) printf(_("\n%s: Promoting standby...\n"), progname); /* Get the data directory full path and the last subdirectory */ sprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } strcpy(data_dir, PQgetvalue(res, 0, 0)); PQclear(res); PQfinish(conn); sprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); sprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); rename(recovery_file_path, recovery_done_path); /* We assume the pg_ctl script is in the PATH */ sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { fprintf(stderr, "Can't restart service\n"); return; } /* reconnect to check we got promoted */ /* * XXX i'm removing this because it gives an annoying message saying couldn't connect * but is just the server starting up * conn = establishDBConnection(conninfo, true); * if (is_standby(conn)) * fprintf(stderr, "\n%s: STANDBY PROMOTE failed, this is still a standby node.\n", progname); * else * fprintf(stderr, "\n%s: you should REINDEX any hash indexes you have.\n", progname); * PQfinish(conn); */ return; }
static void do_standby_clone(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; int r = 0; int i; bool pg_dir = false; char master_data_directory[MAXLEN]; char master_config_file[MAXLEN]; char master_hba_file[MAXLEN]; char master_ident_file[MAXLEN]; char master_control_file[MAXLEN]; char local_control_file[MAXLEN]; const char *first_wal_segment = NULL; const char *last_wal_segment = NULL; char master_version[MAXVERSIONSTR]; /* if dest_dir hasn't been provided, initialize to current directory */ if (dest_dir == NULL) { dest_dir = malloc(5); strcpy(dest_dir, "."); } /* Check this directory could be used as a PGDATA dir */ switch (check_dir(dest_dir)) { case 0: /* dest_dir not there, must create it */ if (verbose) printf(_("creating directory %s ... "), dest_dir); fflush(stdout); if (!create_directory(dest_dir)) { fprintf(stderr, _("%s: couldn't create directory %s ... "), progname, dest_dir); return; } break; case 1: /* Present but empty, fix permissions and use it */ if (verbose) printf(_("fixing permissions on existing directory %s ... "), dest_dir); fflush(stdout); if (!set_directory_permissions(dest_dir)) { fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), progname, dest_dir, strerror(errno)); return; } break; case 2: /* Present and not empty */ fprintf(stderr, _("%s: directory \"%s\" exists but is not empty\n"), progname, dest_dir); pg_dir = is_pg_dir(dest_dir); if (pg_dir && !force) { fprintf(stderr, _("\nThis looks like a PostgreSQL directroy.\n" "If you are sure you want to clone here, " "please check there is no PostgreSQL server " "running and use the --force option\n")); return; } else if (pg_dir && force) { /* Let it continue */ break; } else return; default: /* Trouble accessing directory */ fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), progname, dest_dir, strerror(errno)); } /* Connection parameters for master only */ keywords[0] = "host"; values[0] = host; keywords[1] = "port"; values[1] = masterport; /* We need to connect to check configuration and start a backup */ conn = PQconnectdbParams(keywords, values, true); if (!conn) { fprintf(stderr, _("%s: could not connect to master\n"), progname); return; } /* primary should be v9 or better */ pg_version(conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are cloning a primary node */ if (is_standby(conn)) { PQfinish(conn); fprintf(stderr, "\nThe command should clone a primary node\n"); return; } /* And check if it is well configured */ if (!guc_setted(conn, "wal_level", "=", "hot_standby")) { PQfinish(conn); fprintf(stderr, _("%s needs parameter 'wal_level' to be set to 'hot_standby'\n"), progname); return; } if (!guc_setted(conn, "wal_keep_segments", ">=", wal_keep_segments)) { PQfinish(conn); fprintf(stderr, _("%s needs parameter 'wal_keep_segments' to be set to %s or greater\n"), wal_keep_segments, progname); return; } if (!guc_setted(conn, "archive_mode", "=", "on")) { PQfinish(conn); fprintf(stderr, _("%s needs parameter 'archive_mode' to be set to 'on'\n"), progname); return; } if (verbose) printf(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); /* Check if the tablespace locations exists and that we can write to them */ sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } for (i = 0; i < PQntuples(res); i++) { char *tblspc_dir = NULL; strcpy(tblspc_dir, PQgetvalue(res, i, 0)); /* Check this directory could be used as a PGDATA dir */ switch (check_dir(tblspc_dir)) { case 0: /* tblspc_dir not there, must create it */ if (verbose) printf(_("creating directory \"%s\"... "), tblspc_dir); fflush(stdout); if (!create_directory(tblspc_dir)) { fprintf(stderr, _("%s: couldn't create directory \"%s\"... "), progname, tblspc_dir); PQclear(res); PQfinish(conn); return; } break; case 1: /* Present but empty, fix permissions and use it */ if (verbose) printf(_("fixing permissions on existing directory \"%s\"... "), tblspc_dir); fflush(stdout); if (!set_directory_permissions(tblspc_dir)) { fprintf(stderr, _("%s: could not change permissions of directory \"%s\": %s\n"), progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); return; } break; case 2: /* Present and not empty */ if (!force) { fprintf(stderr, _("%s: directory \"%s\" exists but is not empty\n"), progname, tblspc_dir); PQclear(res); PQfinish(conn); return; } default: /* Trouble accessing directory */ fprintf(stderr, _("%s: could not access directory \"%s\": %s\n"), progname, tblspc_dir, strerror(errno)); PQclear(res); PQfinish(conn); return; } } fprintf(stderr, "Starting backup...\n"); /* Get the data directory full path and the configuration files location */ sprintf(sqlquery, "SELECT name, setting " " FROM pg_settings " " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about data directory and configuration files: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } for (i = 0; i < PQntuples(res); i++) { if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0) strcpy(master_data_directory, PQgetvalue(res, i, 1)); else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0) strcpy(master_config_file, PQgetvalue(res, i, 1)); else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0) strcpy(master_hba_file, PQgetvalue(res, i, 1)); else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0) strcpy(master_ident_file, PQgetvalue(res, i, 1)); else fprintf(stderr, _("uknown parameter: %s"), PQgetvalue(res, i, 0)); } PQclear(res); /* * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files */ sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't start backup: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } first_wal_segment = PQgetvalue(res, 0, 0); PQclear(res); /* * 1) first move global/pg_control * * 2) then move data_directory ommiting the files we have already moved and pg_xlog * content * * 3) finally We need to backup configuration files (that could be on other directories, debian * like systems likes to do that), so look at config_file, hba_file and ident_file but we * can omit external_pid_file ;) * * On error we need to return but before that execute pg_stop_backup() */ /* need to create the global sub directory */ sprintf(master_control_file, "%s/global/pg_control", master_data_directory); sprintf(local_control_file, "%s/global", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s ... "), progname, dest_dir); goto stop_backup; } r = copy_remote_files(host, remote_user, master_control_file, local_control_file, false); if (r != 0) goto stop_backup; r = copy_remote_files(host, remote_user, master_data_directory, dest_dir, true); if (r != 0) goto stop_backup; /* * Copy tablespace locations, i'm doing this separately because i couldn't find and appropiate * rsync option but besides we could someday make all these rsync happen concurrently */ sprintf(sqlquery, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); goto stop_backup; } for (i = 0; i < PQntuples(res); i++) { r = copy_remote_files(host, remote_user, PQgetvalue(res, i, 0), PQgetvalue(res, i, 0), true); if (r != 0) goto stop_backup; } r = copy_remote_files(host, remote_user, master_config_file, dest_dir, false); if (r != 0) goto stop_backup; r = copy_remote_files(host, remote_user, master_hba_file, dest_dir, false); if (r != 0) goto stop_backup; r = copy_remote_files(host, remote_user, master_ident_file, dest_dir, false); if (r != 0) goto stop_backup; stop_backup: /* inform the master that we have finished the backup */ conn = PQconnectdbParams(keywords, values, true); if (!conn) { fprintf(stderr, _("%s: could not connect to master\n"), progname); return; } fprintf(stderr, "Finishing backup...\n"); sprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't stop backup: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } last_wal_segment = PQgetvalue(res, 0, 0); PQclear(res); PQfinish(conn); /* Now, if the rsync failed then exit */ if (r != 0) return; if (verbose) printf(_("%s requires primary to keep WAL files %s until at least %s\n"), progname, first_wal_segment, last_wal_segment); /* we need to create the pg_xlog sub directory too, i'm reusing a variable here */ sprintf(local_control_file, "%s/pg_xlog", dest_dir); if (!create_directory(local_control_file)) { fprintf(stderr, _("%s: couldn't create directory %s, you will need to do it manually...\n"), progname, dest_dir); } /* Finally, write the recovery.conf file */ create_recovery_file(dest_dir); /* We don't start the service because we still may want to move the directory */ return; }
static void do_standby_register(void) { PGconn *conn; PGconn *master_conn; int master_id; PGresult *res; char sqlquery[QUERY_STR_LEN]; char myClusterName[MAXLEN]; int myLocalId = -1; char conninfo[MAXLEN]; char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; /* * Read the configuration file: repmgr.conf */ parse_config(config_file, myClusterName, &myLocalId, conninfo); if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " "Check the configuration file.\n"); exit(1); } conn = establishDBConnection(conninfo, true); /* should be v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are a standby */ if (!is_standby(conn)) { fprintf(stderr, "repmgr: This node should be a standby\n"); PQfinish(conn); return; } /* Check if there is a schema for this cluster */ sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about tablespaces: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } if (PQntuples(res) == 0) /* schema doesn't exists */ { fprintf(stderr, "Schema repmgr_%s doesn't exists.", myClusterName); PQclear(res); PQfinish(conn); return; } PQclear(res); /* check if there is a master in this cluster */ master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); if (!master_conn) return; /* master should be v9 or better */ pg_version(master_conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* master and standby version should match */ if (strcmp(master_version, standby_version) != 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), progname, master_version, standby_version); return; } /* Now register the standby */ if (force) { sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " " WHERE id = %d", myClusterName, myLocalId); if (!PQexec(master_conn, sqlquery)) { fprintf(stderr, "Cannot delete node details, %s\n", PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); return; } } sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " "VALUES (%d, '%s', '%s')", myClusterName, myLocalId, myClusterName, conninfo); if (!PQexec(master_conn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", PQerrorMessage(master_conn)); PQfinish(master_conn); PQfinish(conn); return; } PQfinish(master_conn); PQfinish(conn); return; }
static void do_master_register(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; char myClusterName[MAXLEN]; int myLocalId = -1; char conninfo[MAXLEN]; bool schema_exists = false; char master_version[MAXVERSIONSTR]; /* * Read the configuration file: repmgr.conf */ parse_config(config_file, myClusterName, &myLocalId, conninfo); if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " "Check the configuration file.\n"); exit(1); } conn = establishDBConnection(conninfo, true); /* master should be v9 or better */ pg_version(conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* Check we are a master */ if (is_standby(conn)) { fprintf(stderr, "repmgr: This node should be a master\n"); PQfinish(conn); return; } /* Check if there is a schema for this cluster */ sprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = 'repmgr_%s'", myClusterName); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about schemas: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } if (PQntuples(res) > 0) /* schema exists */ { if (!force) /* and we are not forcing so error */ { fprintf(stderr, "Schema repmgr_%s already exists.", myClusterName); PQclear(res); PQfinish(conn); return; } schema_exists = true; } PQclear(res); if (!schema_exists) { /* ok, create the schema */ sprintf(sqlquery, "CREATE SCHEMA repmgr_%s", myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the schema repmgr_%s: %s\n", myClusterName, PQerrorMessage(conn)); PQfinish(conn); return; } /* ... the tables */ sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_nodes ( " " id integer primary key, " " cluster text not null, " " conninfo text not null)", myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the table repmgr_%s.repl_nodes: %s\n", myClusterName, PQerrorMessage(conn)); PQfinish(conn); return; } sprintf(sqlquery, "CREATE TABLE repmgr_%s.repl_monitor ( " " primary_node INTEGER NOT NULL, " " standby_node INTEGER NOT NULL, " " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " " last_wal_primary_location TEXT NOT NULL, " " last_wal_standby_location TEXT NOT NULL, " " replication_lag BIGINT NOT NULL, " " apply_lag BIGINT NOT NULL) ", myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the table repmgr_%s.repl_monitor: %s\n", myClusterName, PQerrorMessage(conn)); PQfinish(conn); return; } /* and the view */ sprintf(sqlquery, "CREATE VIEW repmgr_%s.repl_status AS " " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " " ORDER BY last_monitor_time desc) " " FROM repmgr_%s.repl_monitor) " " SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, " " last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, " " pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag " " FROM monitor_info a " " WHERE row_number = 1", myClusterName, myClusterName); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot create the view repmgr_%s.repl_status: %s\n", myClusterName, PQerrorMessage(conn)); PQfinish(conn); return; } } else { PGconn *master_conn; int id; /* Ensure there isn't any other master already registered */ master_conn = getMasterConnection(conn, myLocalId, myClusterName, &id); if (master_conn != NULL) { PQfinish(master_conn); fprintf(stderr, "There is a master already in this cluster"); return; } } /* Now register the master */ if (force) { sprintf(sqlquery, "DELETE FROM repmgr_%s.repl_nodes " " WHERE id = %d", myClusterName, myLocalId); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot delete node details, %s\n", PQerrorMessage(conn)); PQfinish(conn); return; } } sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes " "VALUES (%d, '%s', '%s')", myClusterName, myLocalId, myClusterName, conninfo); if (!PQexec(conn, sqlquery)) { fprintf(stderr, "Cannot insert node details, %s\n", PQerrorMessage(conn)); PQfinish(conn); return; } PQfinish(conn); return; }
static void do_standby_follow(void) { PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; char script[QUERY_STR_LEN]; char myClusterName[MAXLEN]; int myLocalId = -1; char conninfo[MAXLEN]; PGconn *master_conn; int master_id; int r; char data_dir[MAXLEN]; char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; /* * Read the configuration file: repmgr.conf */ parse_config(config_file, myClusterName, &myLocalId, conninfo); if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " "Check the configuration file.\n"); exit(1); } /* We need to connect to check configuration */ conn = establishDBConnection(conninfo, true); /* Check we are in a standby node */ if (!is_standby(conn)) { fprintf(stderr, "\n%s: The command should be executed in a standby node\n", progname); return; } /* should be v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(conn); fprintf(stderr, _("\n%s needs standby to be PostgreSQL 9.0 or better\n"), progname); return; } /* we also need to check if there is any master in the cluster */ master_conn = getMasterConnection(conn, myLocalId, myClusterName, &master_id); if (master_conn == NULL) { PQfinish(conn); fprintf(stderr, "There isn't a master to follow in this cluster"); return; } /* Check we are going to point to a master */ if (is_standby(master_conn)) { PQfinish(conn); fprintf(stderr, "%s: The node to follow should be a master\n", progname); return; } /* should be v9 or better */ pg_version(master_conn, master_version); if (strcmp(master_version, "") == 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs master to be PostgreSQL 9.0 or better\n"), progname); return; } /* master and standby version should match */ if (strcmp(master_version, standby_version) != 0) { PQfinish(conn); PQfinish(master_conn); fprintf(stderr, _("%s needs versions of both master (%s) and standby (%s) to match.\n"), progname, master_version, standby_version); return; } /* * set the host and masterport variables with the master ones * before closing the connection because we will need them to * recreate the recovery.conf file */ host = malloc(20); masterport = malloc(10); strcpy(host, PQhost(master_conn)); strcpy(masterport, PQport(master_conn)); PQfinish(master_conn); if (verbose) printf(_("\n%s: Changing standby's master...\n"), progname); /* Get the data directory full path */ sprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't get info about data directory: %s\n", PQerrorMessage(conn)); PQclear(res); PQfinish(conn); return; } strcpy(data_dir, PQgetvalue(res, 0, 0)); PQclear(res); PQfinish(conn); /* write the recovery.conf file */ if (!create_recovery_file(data_dir)) return; /* Finally, restart the service */ /* We assume the pg_ctl script is in the PATH */ sprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { fprintf(stderr, "Can't restart service\n"); return; } return; }
int main(int argc, char **argv) { static struct option long_options[] = { {"config-file", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {"monitoring-history", no_argument, NULL, 'm'}, {"daemonize", no_argument, NULL, 'd'}, {"pid-file", required_argument, NULL, 'p'}, {NULL, 0, NULL, 0} }; int optindex; int c, ret; bool daemonize = false; FILE *fd; char standby_version[MAXVERSIONSTR], *ret_ver; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s %s (PostgreSQL %s)\n", progname, REPMGR_VERSION, PG_VERSION); exit(SUCCESS); } } while ((c = getopt_long(argc, argv, "f:v:mdp:", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; case 'm': monitoring_history = true; break; case 'd': daemonize = true; break; case 'p': pid_file = optarg; break; default: usage(); exit(ERR_BAD_CONFIG); } } if (daemonize) { do_daemonize(); } if (pid_file) { check_and_create_pid_file(pid_file); } #ifndef WIN32 setup_event_handlers(); #endif /* * Read the configuration file: repmgr.conf */ parse_config(config_file, &local_options); if (local_options.node == -1) { log_err(_("Node information is missing. " "Check the configuration file, or provide one if you have not done so.\n")); terminate(ERR_BAD_CONFIG); } fd = freopen("/dev/null", "r", stdin); if (fd == NULL) { fprintf(stderr, "error reopening stdin to '/dev/null': %s", strerror(errno)); } fd = freopen("/dev/null", "w", stdout); if (fd == NULL) { fprintf(stderr, "error reopening stdout to '/dev/null': %s", strerror(errno)); } logger_init(&local_options, progname, local_options.loglevel, local_options.logfacility); if (verbose) logger_min_verbose(LOG_INFO); if (log_type == REPMGR_SYSLOG) { fd = freopen("/dev/null", "w", stderr); if (fd == NULL) { fprintf(stderr, "error reopening stderr to '/dev/null': %s", strerror(errno)); } } xsnprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); my_local_conn = establish_db_connection(local_options.conninfo, true); /* should be v9 or better */ log_info(_("%s Connected to database, checking its state\n"), progname); ret_ver = pg_version(my_local_conn, standby_version); if (ret_ver == NULL || strcmp(standby_version, "") == 0) { if (ret_ver != NULL) log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); terminate(ERR_BAD_CONFIG); } /* * MAIN LOOP This loops cicles once per failover and at startup * Requisites: - my_local_conn needs to be already setted with an active * connection - no master connection */ do { /* * Set my server mode, establish a connection to primary and start * monitor */ ret = is_witness(my_local_conn, repmgr_schema, local_options.cluster_name, local_options.node); if (ret == 1) my_local_mode = WITNESS_MODE; else if (ret == 0) { ret = is_standby(my_local_conn); if (ret == 1) my_local_mode = STANDBY_MODE; else if (ret == 0) /* is the master */ my_local_mode = PRIMARY_MODE; } /* * XXX we did this before changing is_standby() to return int; we * should not exit at this point, but for now we do until we have a * better strategy */ if (ret == -1) terminate(1); switch (my_local_mode) { case PRIMARY_MODE: primary_options.node = local_options.node; strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primary_conn = my_local_conn; check_cluster_configuration(my_local_conn); check_node_configuration(); if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); primary_conn = my_local_conn; update_registration(); } log_info(_("%s Starting continuous primary connection check\n"), progname); /* * Check that primary is still alive, and standbies are * sending info */ /* * Every local_options.monitor_interval_secs seconds, do * master checks XXX Check that standbies are sending info */ do { if (check_connection(primary_conn, "master")) { /* * CheckActiveStandbiesConnections(); * CheckInactiveStandbies(); */ sleep(local_options.monitor_interval_secs); } else { /* * XXX May we do something more verbose ? */ terminate(1); } if (got_SIGHUP) { /* * if we can reload, then could need to change * my_local_conn */ if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); primary_conn = my_local_conn; if (*local_options.logfile) { FILE *fd; fd = freopen(local_options.logfile, "a", stderr); if (fd == NULL) { fprintf(stderr, "error reopening stderr to '%s': %s", local_options.logfile, strerror(errno)); } } update_registration(); } got_SIGHUP = false; } } while (!failover_done); break; case WITNESS_MODE: case STANDBY_MODE: /* I need the id of the primary as well as a connection to it */ log_info(_("%s Connecting to primary for cluster '%s'\n"), progname, local_options.cluster_name); primary_conn = get_master_connection(my_local_conn, repmgr_schema, local_options.cluster_name, &primary_options.node, NULL); if (primary_conn == NULL) { terminate(ERR_BAD_CONFIG); } check_cluster_configuration(my_local_conn); check_node_configuration(); if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); update_registration(); } /* * Every local_options.monitor_interval_secs seconds, do * checks */ if (my_local_mode == WITNESS_MODE) { log_info(_("%s Starting continuous witness node monitoring\n"), progname); } else if (my_local_mode == STANDBY_MODE) { log_info(_("%s Starting continuous standby node monitoring\n"), progname); } do { if (my_local_mode == WITNESS_MODE) witness_monitor(); else if (my_local_mode == STANDBY_MODE) standby_monitor(); sleep(local_options.monitor_interval_secs); if (got_SIGHUP) { /* * if we can reload, then could need to change * my_local_conn */ if (reload_config(config_file, &local_options)) { PQfinish(my_local_conn); my_local_conn = establish_db_connection(local_options.conninfo, true); update_registration(); } got_SIGHUP = false; } } while (!failover_done); break; default: log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); } failover_done = false; } while (true); /* close the connection to the database and cleanup */ close_connections(); /* Shuts down logging system */ logger_shutdown(); return 0; }
int main(int argc, char **argv) { static struct option long_options[] = { {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; int optindex; int c; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); exit(SUCCESS); } } while ((c = getopt_long(argc, argv, "f:v", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; default: usage(); exit(ERR_BAD_CONFIG); } } setup_event_handlers(); /* * Read the configuration file: repmgr.conf */ parse_config(config_file, &local_options); if (local_options.node == -1) { log_err(_("Node information is missing. " "Check the configuration file, or provide one if you have not done so.\n")); exit(ERR_BAD_CONFIG); } logger_init(progname, local_options.loglevel, local_options.logfacility); if (verbose) logger_min_verbose(LOG_INFO); snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); myLocalConn = establishDBConnection(local_options.conninfo, true); /* should be v9 or better */ log_info(_("%s Connected to database, checking its state\n"), progname); pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); PQfinish(myLocalConn); exit(ERR_BAD_CONFIG); } /* * Set my server mode, establish a connection to primary * and start monitor */ if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node)) myLocalMode = WITNESS_MODE; else if (is_standby(myLocalConn)) myLocalMode = STANDBY_MODE; else /* is the master */ myLocalMode = PRIMARY_MODE; switch (myLocalMode) { case PRIMARY_MODE: primary_options.node = local_options.node; strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primaryConn = myLocalConn; checkClusterConfiguration(myLocalConn, primaryConn); checkNodeConfiguration(local_options.conninfo); if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); primaryConn = myLocalConn; update_registration(); } log_info(_("%s Starting continuous primary connection check\n"), progname); /* Check that primary is still alive, and standbies are sending info */ /* * Every SLEEP_MONITOR seconds, do master checks * XXX * Check that standbies are sending info */ for (;;) { if (CheckPrimaryConnection()) { /* CheckActiveStandbiesConnections(); CheckInactiveStandbies(); */ sleep(SLEEP_MONITOR); } else { /* XXX * May we do something more verbose ? */ exit (1); } if (got_SIGHUP) { /* if we can reload, then could need to change myLocalConn */ if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); primaryConn = myLocalConn; update_registration(); } got_SIGHUP = false; } } break; case WITNESS_MODE: case STANDBY_MODE: /* I need the id of the primary as well as a connection to it */ log_info(_("%s Connecting to primary for cluster '%s'\n"), progname, local_options.cluster_name); primaryConn = getMasterConnection(myLocalConn, repmgr_schema, local_options.node, local_options.cluster_name, &primary_options.node, NULL); if (primaryConn == NULL) { CloseConnections(); exit(ERR_BAD_CONFIG); } checkClusterConfiguration(myLocalConn, primaryConn); checkNodeConfiguration(local_options.conninfo); if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); update_registration(); } /* * Every SLEEP_MONITOR seconds, do checks */ if (myLocalMode == WITNESS_MODE) { log_info(_("%s Starting continuous witness node monitoring\n"), progname); } else if (myLocalMode == STANDBY_MODE) { log_info(_("%s Starting continuous standby node monitoring\n"), progname); } for (;;) { if (myLocalMode == WITNESS_MODE) WitnessMonitor(); else if (myLocalMode == STANDBY_MODE) StandbyMonitor(); sleep(SLEEP_MONITOR); if (got_SIGHUP) { /* if we can reload, then could need to change myLocalConn */ if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); update_registration(); } got_SIGHUP = false; } } break; default: log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); } /* Prevent a double-free */ if (primaryConn == myLocalConn) myLocalConn = NULL; /* close the connection to the database and cleanup */ CloseConnections(); /* Shuts down logging system */ logger_shutdown(); return 0; }
int main(int argc, char **argv) { static struct option long_options[] = { {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; int optindex; int c; char conninfo[MAXLEN]; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); exit(0); } } while ((c = getopt_long(argc, argv, "f:v", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } } setup_cancel_handler(); if (config_file == NULL) { config_file = malloc(5 + sizeof(CONFIG_FILE)); sprintf(config_file, "./%s", CONFIG_FILE); } /* * Read the configuration file: repmgr.conf */ parse_config(config_file, myClusterName, &myLocalId, conninfo); if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " "Check the configuration file.\n"); exit(1); } myLocalConn = establishDBConnection(conninfo, true); /* should be v9 or better */ pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(myLocalConn); fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); exit(1); } /* * Set my server mode, establish a connection to primary * and start monitor */ myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; if (myLocalMode == PRIMARY_MODE) { primaryId = myLocalId; strcpy(primaryConninfo, conninfo); primaryConn = myLocalConn; } else { /* I need the id of the primary as well as a connection to it */ primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); if (primaryConn == NULL) exit(1); } checkClusterConfiguration(); checkNodeConfiguration(conninfo); if (myLocalMode == STANDBY_MODE) { MonitorCheck(); } /* close the connection to the database and cleanup */ CloseConnections(); return 0; }