/* ---------- * remoteListen_forward_confirm * * Read the last confirmed event sequence for all nodes from the remote * database and forward it to the local database so that the cleanup * process can know when all nodes have confirmed an event so it may * be safely thrown away (together with its log data). * ---------- */ static int remoteListen_forward_confirm(SlonNode * node, SlonConn * conn) { SlonDString query; PGresult *res; int ntuples; int tupno; dstring_init(&query); monitor_state("remote listener", node->no_id, conn->conn_pid, "forwarding confirmations", 0, "n/a"); /* * Select the max(con_seqno) grouped by con_origin and con_received from * the sl_confirm table. */ (void) slon_mkquery(&query, "select con_origin, con_received, " " max(con_seqno) as con_seqno, " " max(con_timestamp) as con_timestamp " "from %s.sl_confirm " "where con_received <> %d " "group by con_origin, con_received", rtcfg_namespace, rtcfg_nodeid); res = PQexec(conn->dbconn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_ERROR, "remoteListenThread_%d: \"%s\" %s", node->no_id, dstring_data(&query), PQresultErrorMessage(res)); dstring_free(&query); PQclear(res); return -1; } /* * We actually do not do the forwarding ourself here. We send a special * message to the remote worker for that node. */ ntuples = PQntuples(res); for (tupno = 0; tupno < ntuples; tupno++) { remoteWorker_confirm( node->no_id, PQgetvalue(res, tupno, 0), PQgetvalue(res, tupno, 1), PQgetvalue(res, tupno, 2), PQgetvalue(res, tupno, 3)); } PQclear(res); dstring_free(&query); monitor_state("remote listener", node->no_id, conn->conn_pid, "thread main loop", 0, "n/a"); return 0; }
/* ---------- * get_earliest_xid() * * reads the earliest XID that is still active. * * The idea is that if, between cleanupThread iterations, this XID has * not changed, then an old transaction is still in progress, * PostgreSQL is holding onto the tuples, and there is no value in * doing VACUUMs of the various Slony-I tables. * ---------- */ static unsigned long get_earliest_xid(PGconn *dbconn) { int64 xid; PGresult *res; SlonDString query; dstring_init(&query); (void) slon_mkquery(&query, "select pg_catalog.txid_snapshot_xmin(pg_catalog.txid_current_snapshot());"); res = PQexec(dbconn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "cleanupThread: could not txid_snapshot_xmin()!\n"); PQclear(res); slon_retry(); return (unsigned long) -1; } xid = strtoll(PQgetvalue(res, 0, 0), NULL, 10); slon_log(SLON_DEBUG1, "cleanupThread: minxid: %d\n", xid); PQclear(res); dstring_free(&query); return (unsigned long) xid; }
/* ---------- * slon_localListenThread * * Listen for events on the local database connection. This means, * events generated by the local node only. * ---------- */ void * localListenThread_main(/* @unused@ */ void *dummy) { SlonConn *conn; SlonDString query1; PGconn *dbconn; PGresult *res; int ntuples; int tupno; PGnotify *notification; char restart_notify[256]; int restart_request; int poll_sleep = 0; int node_lock_obtained=0; slon_log(SLON_INFO, "localListenThread: thread starts\n"); /* * Connect to the local database */ if ((conn = slon_connectdb(rtcfg_conninfo, "local_listen")) == NULL) slon_retry(); dbconn = conn->dbconn; /* * Initialize local data */ dstring_init(&query1); sprintf(restart_notify, "_%s_Restart", rtcfg_cluster_name); /* * Listen for local events */ (void) slon_mkquery(&query1, "listen \"_%s_Restart\"; ", rtcfg_cluster_name); res = PQexec(dbconn, dstring_data(&query1)); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "localListenThread: \"%s\" - %s\n", dstring_data(&query1), PQresultErrorMessage(res)); PQclear(res); dstring_free(&query1); pthread_mutex_lock(&slon_wait_listen_lock); slon_listen_started=0; pthread_cond_signal(&slon_wait_listen_cond); pthread_mutex_unlock(&slon_wait_listen_lock); slon_retry(); } PQclear(res); /* * Check that we are the only slon daemon connected. */ #define NODELOCKERROR "ERROR: duplicate key violates unique constraint \"sl_nodelock-pkey\"" (void) slon_mkquery(&query1, "select %s.cleanupNodelock(); " "insert into %s.sl_nodelock values (" " %d, 0, \"pg_catalog\".pg_backend_pid()); ", rtcfg_namespace, rtcfg_namespace, rtcfg_nodeid); while(!node_lock_obtained) { res = PQexec(dbconn, dstring_data(&query1)); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "localListenThread: \"%s\" - %s\n", dstring_data(&query1), PQresultErrorMessage(res)); if (strncmp(NODELOCKERROR, PQresultErrorMessage(res), strlen(NODELOCKERROR)) == 0) { slon_log(SLON_FATAL, "Do you already have a slon running against this node?\n"); slon_log(SLON_FATAL, "Or perhaps a residual idle backend connection from a dead slon?\n"); PQclear(res); if(worker_restarted) { sleep(5); continue; } else { dstring_free(&query1); pthread_mutex_lock(&slon_wait_listen_lock); slon_listen_started=0; pthread_cond_signal(&slon_wait_listen_cond); pthread_mutex_unlock(&slon_wait_listen_lock); slon_abort(); } } PQclear(res); dstring_free(&query1); slon_abort(); } PQclear(res); node_lock_obtained=1; } /* * Flag the main thread that the coast is clear and he can launch all * other threads. */ pthread_mutex_lock(&slon_wait_listen_lock); slon_listen_started=1; pthread_cond_signal(&slon_wait_listen_cond); pthread_mutex_unlock(&slon_wait_listen_lock); /* * Process all events, then wait for notification and repeat until * shutdown time has arrived. */ while (true) { /* * Start the transaction */ res = PQexec(dbconn, "start transaction; " "set transaction isolation level serializable;"); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "localListenThread: cannot start transaction - %s\n", PQresultErrorMessage(res)); PQclear(res); dstring_free(&query1); slon_retry(); break; } PQclear(res); /* * Drain notifications. */ (void) PQconsumeInput(dbconn); restart_request = false; while ((notification = PQnotifies(dbconn)) != NULL) { if (strcmp(restart_notify, notification->relname) == 0) restart_request = true; (void) PQfreemem(notification); } if (restart_request) { slon_log(SLON_INFO, "localListenThread: got restart notification\n"); #ifndef WIN32 slon_restart(); #else /* XXX */ /* Win32 defer to service manager to restart for now */ slon_restart(); #endif } /* * Query the database for new local events */ (void) slon_mkquery(&query1, "select ev_seqno, ev_timestamp, " " 'dummy', 'dummy', 'dummy', " " ev_type, " " ev_data1, ev_data2, ev_data3, ev_data4, " " ev_data5, ev_data6, ev_data7, ev_data8 " "from %s.sl_event " "where ev_origin = '%d' " " and ev_seqno > '%s' " "order by ev_seqno", rtcfg_namespace, rtcfg_nodeid, rtcfg_lastevent); res = PQexec(dbconn, dstring_data(&query1)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "localListenThread: \"%s\" - %s\n", dstring_data(&query1), PQresultErrorMessage(res)); PQclear(res); dstring_free(&query1); slon_retry(); break; } ntuples = PQntuples(res); for (tupno = 0; tupno < ntuples; tupno++) { char *ev_type; /* * Remember the event sequence number for confirmation */ strcpy(rtcfg_lastevent, PQgetvalue(res, tupno, 0)); /* * Get the event type and process configuration events */ ev_type = PQgetvalue(res, tupno, 5); slon_log(SLON_DEBUG2, "localListenThread: " "Received event %d,%s %s\n", rtcfg_nodeid, PQgetvalue(res, tupno, 0), ev_type); if (strcmp(ev_type, "SYNC") == 0) { /* * SYNC - nothing to do */ } else if (strcmp(ev_type, "STORE_NODE") == 0) { /* * STORE_NODE */ int no_id; char *no_comment; no_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); no_comment = PQgetvalue(res, tupno, 7); if (no_id != rtcfg_nodeid) rtcfg_storeNode(no_id, no_comment); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "ENABLE_NODE") == 0) { /* * ENABLE_NODE */ int no_id; no_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); if (no_id != rtcfg_nodeid) rtcfg_enableNode(no_id); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "DROP_NODE") == 0) { /* * DROP_NODE */ int no_id; char notify_query[256]; PGresult *notify_res; no_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); /* * Deactivate the node in the runtime configuration */ rtcfg_disableNode(no_id); /* * And cause the replication daemon to restart to get rid of * it. */ snprintf(notify_query, sizeof(notify_query), "notify \"_%s_Restart\";", rtcfg_cluster_name); notify_res = PQexec(dbconn, notify_query); if (PQresultStatus(notify_res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "localListenThread: \"%s\" %s\n", notify_query, PQresultErrorMessage(notify_res)); PQclear(notify_res); slon_restart(); } PQclear(notify_res); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "CLONE_NODE") == 0) { /* * CLONE_NODE */ int no_id; int no_provider; char *no_comment; no_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); no_provider = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); no_comment = PQgetvalue(res, tupno, 8); rtcfg_storeNode(no_id, no_comment); } else if (strcmp(ev_type, "STORE_PATH") == 0) { /* * STORE_PATH */ int pa_server; int pa_client; char *pa_conninfo; int pa_connretry; pa_server = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); pa_client = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); pa_conninfo = PQgetvalue(res, tupno, 8); pa_connretry = (int)strtol(PQgetvalue(res, tupno, 9), NULL, 10); if (pa_client == rtcfg_nodeid) rtcfg_storePath(pa_server, pa_conninfo, pa_connretry); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "DROP_PATH") == 0) { /* * DROP_PATH */ int pa_server; int pa_client; pa_server = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); pa_client = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); if (pa_client == rtcfg_nodeid) rtcfg_dropPath(pa_server); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "STORE_LISTEN") == 0) { /* * STORE_LISTEN */ int li_origin; int li_provider; int li_receiver; li_origin = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); li_provider = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); li_receiver = (int)strtol(PQgetvalue(res, tupno, 8), NULL, 10); if (li_receiver == rtcfg_nodeid) rtcfg_storeListen(li_origin, li_provider); } else if (strcmp(ev_type, "DROP_LISTEN") == 0) { /* * DROP_LISTEN */ int li_origin; int li_provider; int li_receiver; li_origin = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); li_provider = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); li_receiver = (int)strtol(PQgetvalue(res, tupno, 8), NULL, 10); if (li_receiver == rtcfg_nodeid) rtcfg_dropListen(li_origin, li_provider); } else if (strcmp(ev_type, "STORE_SET") == 0) { /* * STORE_SET */ int set_id; int set_origin; char *set_comment; set_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); set_origin = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); set_comment = PQgetvalue(res, tupno, 8); rtcfg_storeSet(set_id, set_origin, set_comment); } else if (strcmp(ev_type, "DROP_SET") == 0) { /* * DROP_SET */ int set_id; set_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); rtcfg_dropSet(set_id); } else if (strcmp(ev_type, "MERGE_SET") == 0) { /* * MERGE_SET */ int set_id; int add_id; set_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); add_id = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); rtcfg_dropSet(add_id); } else if (strcmp(ev_type, "SET_ADD_TABLE") == 0) { /* * SET_ADD_TABLE */ /* * Nothing to do ATM ... we don't support adding tables to * subscribed sets and table information is not maintained in * the runtime configuration. */ } else if (strcmp(ev_type, "SET_ADD_SEQUENCE") == 0) { /* * SET_ADD_SEQUENCE */ /* * Nothing to do ATM ... we don't support adding sequences to * subscribed sets and table information is not maintained in * the runtime configuration. */ } else if (strcmp(ev_type, "SET_DROP_TABLE") == 0) { /* * SET_DROP_TABLE */ /* * Nothing to do ATM ... table information is not maintained * in the runtime configuration. */ } else if (strcmp(ev_type, "SET_DROP_SEQUENCE") == 0) { /* * SET_DROP_SEQUENCE */ /* * Nothing to do ATM ... table information is not maintained * in the runtime configuration. */ } else if (strcmp(ev_type, "SET_MOVE_TABLE") == 0) { /* * SET_MOVE_TABLE */ /* * Nothing to do ATM ... table information is not maintained * in the runtime configuration. */ } else if (strcmp(ev_type, "SET_MOVE_SEQUENCE") == 0) { /* * SET_MOVE_SEQUENCE */ /* * Nothing to do ATM ... table information is not maintained * in the runtime configuration. */ } else if (strcmp(ev_type, "ADJUST_SEQ") == 0) { /* * ADJUST_SEQ */ } else if (strcmp(ev_type, "STORE_TRIGGER") == 0) { /* * STORE_TRIGGER */ /* * Nothing to do ATM */ } else if (strcmp(ev_type, "DROP_TRIGGER") == 0) { /* * DROP_TRIGGER */ /* * Nothing to do ATM */ } else if (strcmp(ev_type, "MOVE_SET") == 0) { /* * MOVE_SET */ int set_id; int old_origin; int new_origin; PGresult *res2; SlonDString query2; int sub_provider; set_id = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); old_origin = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); new_origin = (int)strtol(PQgetvalue(res, tupno, 8), NULL, 10); /* * We have been the old origin of the set, so according to the * rules we must have a provider now. */ dstring_init(&query2); (void) slon_mkquery(&query2, "select sub_provider from %s.sl_subscribe " " where sub_receiver = %d and sub_set = %d", rtcfg_namespace, rtcfg_nodeid, set_id); res2 = PQexec(dbconn, dstring_data(&query2)); if (PQresultStatus(res2) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "localListenThread: \"%s\" %s\n", dstring_data(&query2), PQresultErrorMessage(res2)); dstring_free(&query2); PQclear(res2); slon_retry(); } if (PQntuples(res2) != 1) { slon_log(SLON_FATAL, "localListenThread: MOVE_SET " "but no provider found for set %d\n", set_id); dstring_free(&query2); PQclear(res2); slon_retry(); } sub_provider = (int)strtol(PQgetvalue(res2, 0, 0), NULL, 10); PQclear(res2); dstring_free(&query2); rtcfg_moveSet(set_id, old_origin, new_origin, sub_provider); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "FAILOVER_SET") == 0) { /* * FAILOVER_SET */ /* * Nothing to do. The stored procedure will restart this * daemon anyway. */ } else if (strcmp(ev_type, "SUBSCRIBE_SET") == 0) { /* * SUBSCRIBE_SET */ int sub_set; int sub_provider; int sub_receiver; char *sub_forward; sub_set = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); sub_provider = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); sub_receiver = (int)strtol(PQgetvalue(res, tupno, 8), NULL, 10); sub_forward = PQgetvalue(res, tupno, 9); if (sub_receiver == rtcfg_nodeid) rtcfg_storeSubscribe(sub_set, sub_provider, sub_forward); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "ENABLE_SUBSCRIPTION") == 0) { /* * ENABLE_SUBSCRIPTION */ int sub_set; int sub_provider; int sub_receiver; char *sub_forward; sub_set = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); sub_provider = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); sub_receiver = (int)strtol(PQgetvalue(res, tupno, 8), NULL, 10); sub_forward = PQgetvalue(res, tupno, 9); if (sub_receiver == rtcfg_nodeid) rtcfg_enableSubscription(sub_set, sub_provider, sub_forward); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "UNSUBSCRIBE_SET") == 0) { /* * UNSUBSCRIBE_SET */ int sub_set; int sub_receiver; sub_set = (int)strtol(PQgetvalue(res, tupno, 6), NULL, 10); sub_receiver = (int)strtol(PQgetvalue(res, tupno, 7), NULL, 10); if (sub_receiver == rtcfg_nodeid) rtcfg_unsubscribeSet(sub_set); rtcfg_reloadListen(dbconn); } else if (strcmp(ev_type, "DDL_SCRIPT") == 0) { /* * DDL_SCRIPT */ /* * Nothing to do ATM */ } else if (strcmp(ev_type, "ACCEPT_SET") == 0) { /* * ACCEPT_SET */ /* * Nothing to do locally */ slon_log(SLON_DEBUG1, "localListenThread: ACCEPT_SET\n"); rtcfg_reloadListen(dbconn); } else { slon_log(SLON_FATAL, "localListenThread: event %s: Unknown event type: %s\n", rtcfg_lastevent, ev_type); slon_abort(); } } PQclear(res); /* * If there were events, commit the transaction. */ if (ntuples > 0) { poll_sleep = 0; /* drop polling time back to 0... */ res = PQexec(dbconn, "commit transaction"); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "localListenThread: \"%s\" - %s\n", dstring_data(&query1), PQresultErrorMessage(res)); PQclear(res); dstring_free(&query1); slon_retry(); break; } PQclear(res); } else { /* * No database events received. Rollback instead. */ /* Increase the amount of time to sleep, to a max of sync_interval_timeout */ poll_sleep += sync_interval; if (poll_sleep > sync_interval_timeout) { poll_sleep = sync_interval_timeout; } res = PQexec(dbconn, "rollback transaction;"); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "localListenThread: \"rollback transaction;\" - %s\n", PQresultErrorMessage(res)); PQclear(res); slon_retry(); break; } PQclear(res); } /* * Wait for notify or for timeout */ if (sched_wait_time(conn, SCHED_WAIT_SOCK_READ, poll_sleep) != SCHED_STATUS_OK) break; } /* * The scheduler asked us to shutdown. Free memory and close the DB * connection. */ dstring_free(&query1); slon_disconnectdb(conn); #ifdef SLON_MEMDEBUG conn = NULL; #endif slon_log(SLON_INFO, "localListenThread: thread done\n"); pthread_exit(NULL); }
/* ---------- * slon_remoteListenThread * * Listen for events on a remote database connection. This means, events * generated by every other node we listen for on this one. * ---------- */ void * remoteListenThread_main(void *cdata) { SlonNode *node = (SlonNode *) cdata; SlonConn *conn = NULL; char *conn_conninfo = NULL; char conn_symname[64]; ScheduleStatus rc; int retVal; SlonDString query1; PGconn *dbconn = NULL; PGresult *res; struct listat *listat_head; struct listat *listat_tail; int64 last_config_seq = 0; int64 new_config_seq = 0; slon_log(SLON_INFO, "remoteListenThread_%d: thread starts\n", node->no_id); /* * Initialize local data */ listat_head = NULL; listat_tail = NULL; dstring_init(&query1); poll_sleep = 0; sprintf(conn_symname, "node_%d_listen", node->no_id); /* * Work until doomsday */ while (true) { if (last_config_seq != (new_config_seq = rtcfg_seq_get())) { /* * Lock the configuration and check if we are (still) supposed to * exist. */ rtcfg_lock(); /* * If we have a database connection to the remote node, check if * there was a change in the connection information. */ if (conn != NULL) { if (node->pa_conninfo == NULL || strcmp(conn_conninfo, node->pa_conninfo) != 0) { slon_log(SLON_CONFIG, "remoteListenThread_%d: " "disconnecting from '%s'\n", node->no_id, conn_conninfo); slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; } } /* * Check our node's listen_status */ if (node->listen_status == SLON_TSTAT_NONE || node->listen_status == SLON_TSTAT_SHUTDOWN || !((bool) node->no_active)) { rtcfg_unlock(); break; } if (node->listen_status == SLON_TSTAT_RESTART) node->listen_status = SLON_TSTAT_RUNNING; /* * Adjust the listat list and see if there is anything to listen * for. If not, sleep for a while and check again, some node * reconfiguration must be going on here. */ remoteListen_adjust_listat(node, &listat_head, &listat_tail); last_config_seq = new_config_seq; if (listat_head == NULL) { rtcfg_unlock(); slon_log(SLON_DEBUG2, "remoteListenThread_%d: nothing to listen for\n", node->no_id); rc = sched_msleep(node, 10000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } rtcfg_unlock(); } /* * Check if we have a database connection */ if (conn == NULL) { int pa_connretry; /* * Make sure we have connection info */ rtcfg_lock(); if (node->pa_conninfo == NULL) { slon_log(SLON_WARN, "remoteListenThread_%d: no conninfo - " "sleep 10 seconds\n", node->no_id); rtcfg_unlock(); rc = sched_msleep(node, 10000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } /* * Try to establish a database connection to the remote node's * database. */ conn_conninfo = strdup(node->pa_conninfo); pa_connretry = node->pa_connretry; rtcfg_unlock(); conn = slon_connectdb(conn_conninfo, conn_symname); if (conn == NULL) { free(conn_conninfo); conn_conninfo = NULL; slon_log(SLON_WARN, "remoteListenThread_%d: DB connection failed - " "sleep %d seconds\n", node->no_id, pa_connretry); rc = sched_msleep(node, pa_connretry * 1000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } dbconn = conn->dbconn; monitor_state("remote listener", node->no_id, conn->conn_pid, "thread main loop", 0, "n/a"); /* * Listen on the connection for events and confirmations and * register the node connection. */ (void) slon_mkquery(&query1, "select %s.registerNodeConnection(%d); ", rtcfg_namespace, rtcfg_nodeid); res = PQexec(dbconn, dstring_data(&query1)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_ERROR, "remoteListenThread_%d: \"%s\" - %s", node->no_id, dstring_data(&query1), PQresultErrorMessage(res)); PQclear(res); slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; rc = sched_msleep(node, pa_connretry * 1000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } PQclear(res); retVal = db_getLocalNodeId(dbconn); if (retVal != node->no_id) { slon_log(SLON_ERROR, "remoteListenThread_%d: db_getLocalNodeId() " "returned %d - wrong database?\n", node->no_id, retVal); slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; rc = sched_msleep(node, pa_connretry * 1000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } if (db_checkSchemaVersion(dbconn) < 0) { slon_log(SLON_ERROR, "remoteListenThread_%d: db_checkSchemaVersion() " "failed\n", node->no_id); slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; rc = sched_msleep(node, pa_connretry * 1000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } if (PQserverVersion(dbconn) >= 90100) { slon_mkquery(&query1, "SET SESSION CHARACTERISTICS AS TRANSACTION read only deferrable"); res = PQexec(dbconn, dstring_data(&query1)); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_ERROR, "remoteListenThread_%d: \"%s\" - %s", node->no_id, dstring_data(&query1), PQresultErrorMessage(res)); PQclear(res); slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; rc = sched_msleep(node, pa_connretry * 1000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } } if (PQserverVersion(dbconn) >= 90100) { slon_mkquery(&query1, "SET SESSION CHARACTERISTICS AS TRANSACTION read only isolation level serializable deferrable"); res = PQexec(dbconn, dstring_data(&query1)); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_ERROR, "remoteListenThread_%d: \"%s\" - %s", node->no_id, dstring_data(&query1), PQresultErrorMessage(res)); PQclear(res); slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; rc = sched_msleep(node, pa_connretry * 1000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } } slon_log(SLON_DEBUG1, "remoteListenThread_%d: connected to '%s'\n", node->no_id, conn_conninfo); } /* * Receive events from the provider node */ retVal = remoteListen_receive_events(node, conn, listat_head); if (retVal < 0) { slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; rc = sched_msleep(node, 10000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } /* * If the remote node notified for new confirmations, read them and * queue them into the remote worker for storage in our local * database. */ retVal = remoteListen_forward_confirm(node, conn); if (retVal < 0) { slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; rc = sched_msleep(node, 10000); if (rc != SCHED_STATUS_OK && rc != SCHED_STATUS_CANCEL) break; continue; } /* * Wait for notification. */ rc = sched_wait_time(conn, SCHED_WAIT_SOCK_READ, poll_sleep); if (rc == SCHED_STATUS_CANCEL) continue; if (rc != SCHED_STATUS_OK) break; } /* * Doomsday! */ if (conn != NULL) { slon_log(SLON_INFO, "remoteListenThread_%d: " "disconnecting from '%s'\n", node->no_id, conn_conninfo); slon_disconnectdb(conn); free(conn_conninfo); conn = NULL; conn_conninfo = NULL; } remoteListen_cleanup(&listat_head, &listat_tail); rtcfg_lock(); node->listen_status = SLON_TSTAT_DONE; rtcfg_unlock(); slon_log(SLON_DEBUG1, "remoteListenThread_%d: thread done\n", node->no_id); dstring_free(&query1); pthread_exit(NULL); }
/* ---------- * remoteListen_receive_events * * Retrieve all new events that origin from nodes for which we listen on this * node as provider and add them to the node specific worker message queue. * ---------- */ static int remoteListen_receive_events(SlonNode * node, SlonConn * conn, struct listat * listat) { SlonNode *origin; SlonDString query; SlonDString q2; char *where_or_or; char seqno_buf[64]; PGresult *res; int ntuples; int tupno; time_t timeout; time_t now; dstring_init(&query); /* * In the runtime configuration info for the node, we remember the last * event sequence that we actually have received. If the remote worker * thread has processed it yet or it isn't important, we have it in the * message queue at least and don't need to select it again. * * So the query we construct contains a qualification (ev_origin = * <remote_node> and ev_seqno > <last_seqno>) per remote node we're listen * for here. */ monitor_state("remote listener", node->no_id, conn->conn_pid, "receiving events", 0, "n/a"); (void) slon_mkquery(&query, "select ev_origin, ev_seqno, ev_timestamp, " " ev_snapshot, " " \"pg_catalog\".txid_snapshot_xmin(ev_snapshot), " " \"pg_catalog\".txid_snapshot_xmax(ev_snapshot), " " ev_type, " " ev_data1, ev_data2, " " ev_data3, ev_data4, " " ev_data5, ev_data6, " " ev_data7, ev_data8 " "from %s.sl_event e", rtcfg_namespace); rtcfg_lock(); where_or_or = "where"; if (lag_interval) { dstring_init(&q2); (void) slon_mkquery(&q2, "where ev_timestamp < now() - '%s'::interval and (", lag_interval); where_or_or = dstring_data(&q2); } while (listat) { if ((origin = rtcfg_findNode(listat->li_origin)) == NULL) { rtcfg_unlock(); slon_log(SLON_ERROR, "remoteListenThread_%d: unknown node %d\n", node->no_id, listat->li_origin); dstring_free(&query); return -1; } sprintf(seqno_buf, INT64_FORMAT, origin->last_event); slon_appendquery(&query, " %s (e.ev_origin = '%d' and e.ev_seqno > '%s')", where_or_or, listat->li_origin, seqno_buf); where_or_or = "or"; listat = listat->next; } if (lag_interval) { slon_appendquery(&query, ")"); } /* * Limit the result set size to: sync_group_maxsize * 2, if it's set 100, * if sync_group_maxsize isn't set */ slon_appendquery(&query, " order by e.ev_origin, e.ev_seqno limit %d", (sync_group_maxsize > 0) ? sync_group_maxsize * 2 : 100); rtcfg_unlock(); if (PQsendQuery(conn->dbconn, dstring_data(&query)) == 0) { slon_log(SLON_ERROR, "remoteListenThread_%d: \"%s\" - %s", node->no_id, dstring_data(&query), PQerrorMessage(conn->dbconn)); dstring_free(&query); return -1; } (void) time(&timeout); timeout += remote_listen_timeout; while (PQisBusy(conn->dbconn) != 0) { (void) time(&now); if (now >= timeout) { slon_log(SLON_ERROR, "remoteListenThread_%d: timeout (%d s) for event selection\n", node->no_id, remote_listen_timeout); dstring_free(&query); return -1; } if (PQconsumeInput(conn->dbconn) == 0) { slon_log(SLON_ERROR, "remoteListenThread_%d: \"%s\" - %s", node->no_id, dstring_data(&query), PQerrorMessage(conn->dbconn)); dstring_free(&query); return -1; } if (PQisBusy(conn->dbconn) != 0) sched_wait_time(conn, SCHED_WAIT_SOCK_READ, 10000); } res = PQgetResult(conn->dbconn); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_ERROR, "remoteListenThread_%d: \"%s\" - %s", node->no_id, dstring_data(&query), PQresultErrorMessage(res)); PQclear(res); dstring_free(&query); return -1; } dstring_free(&query); /* * Add all events found to the remote worker message queue. */ ntuples = PQntuples(res); /* If we drew in the maximum number of events */ if (ntuples == ((sync_group_maxsize > 0) ? sync_group_maxsize * 2 : 100)) sel_max_events++; /* Add to the count... */ else sel_max_events = 0; /* reset the count */ for (tupno = 0; tupno < ntuples; tupno++) { int ev_origin; int64 ev_seqno; ev_origin = (int) strtol(PQgetvalue(res, tupno, 0), NULL, 10); (void) slon_scanint64(PQgetvalue(res, tupno, 1), &ev_seqno); slon_log(SLON_DEBUG2, "remoteListenThread_%d: " "queue event %d,%s %s\n", node->no_id, ev_origin, PQgetvalue(res, tupno, 1), PQgetvalue(res, tupno, 6)); remoteWorker_event(node->no_id, ev_origin, ev_seqno, PQgetvalue(res, tupno, 2), /* ev_timestamp */ PQgetvalue(res, tupno, 3), /* ev_snapshot */ PQgetvalue(res, tupno, 4), /* mintxid */ PQgetvalue(res, tupno, 5), /* maxtxid */ PQgetvalue(res, tupno, 6), /* ev_type */ (PQgetisnull(res, tupno, 7)) ? NULL : PQgetvalue(res, tupno, 7), (PQgetisnull(res, tupno, 8)) ? NULL : PQgetvalue(res, tupno, 8), (PQgetisnull(res, tupno, 9)) ? NULL : PQgetvalue(res, tupno, 9), (PQgetisnull(res, tupno, 10)) ? NULL : PQgetvalue(res, tupno, 10), (PQgetisnull(res, tupno, 11)) ? NULL : PQgetvalue(res, tupno, 11), (PQgetisnull(res, tupno, 12)) ? NULL : PQgetvalue(res, tupno, 12), (PQgetisnull(res, tupno, 13)) ? NULL : PQgetvalue(res, tupno, 13), (PQgetisnull(res, tupno, 14)) ? NULL : PQgetvalue(res, tupno, 14)); } if (ntuples > 0) { if ((sel_max_events > 2) && (sync_group_maxsize > 100)) { slon_log(SLON_INFO, "remoteListenThread_%d: drew maximum # of events for %d iterations\n", node->no_id, sel_max_events); sched_msleep(node, 10000 + (1000 * sel_max_events)); } else { poll_sleep = 0; } } else { poll_sleep = poll_sleep * 2 + sync_interval; if (poll_sleep > sync_interval_timeout) { poll_sleep = sync_interval_timeout; } } PQclear(res); monitor_state("remote listener", node->no_id, conn->conn_pid, "thread main loop", 0, "n/a"); return 0; }
/* ---------- * SlonMain * ---------- */ static void SlonMain(void) { PGresult *res; SlonDString query; int i, n; PGconn *startup_conn; slon_pid = getpid(); #ifndef WIN32 slon_worker_pid = slon_pid; #endif if (pthread_mutex_init(&slon_wait_listen_lock, NULL) < 0) { slon_log(SLON_FATAL, "main: pthread_mutex_init() failed - %s\n", strerror(errno)); slon_abort(); } if (pthread_cond_init(&slon_wait_listen_cond, NULL) < 0) { slon_log(SLON_FATAL, "main: pthread_cond_init() failed - %s\n", strerror(errno)); slon_abort(); } /* * Dump out current configuration - all elements of the various arrays... */ dump_configuration(); /* * Connect to the local database to read the initial configuration */ startup_conn = PQconnectdb(rtcfg_conninfo); if (startup_conn == NULL) { slon_log(SLON_FATAL, "main: PQconnectdb() failed - sleep 10s\n"); sleep(10); slon_retry(); exit(-1); } if (PQstatus(startup_conn) != CONNECTION_OK) { slon_log(SLON_FATAL, "main: Cannot connect to local database - %s - sleep 10s\n", PQerrorMessage(startup_conn)); PQfinish(startup_conn); sleep(10); slon_retry(); exit(-1); } /* * Get our local node ID */ rtcfg_nodeid = db_getLocalNodeId(startup_conn); if (rtcfg_nodeid < 0) { slon_log(SLON_FATAL, "main: Node is not initialized properly - sleep 10s\n"); sleep(10); slon_retry(); exit(-1); } if (db_checkSchemaVersion(startup_conn) < 0) { slon_log(SLON_FATAL, "main: Node has wrong Slony-I schema or module version loaded\n"); slon_abort(); } slon_log(SLON_CONFIG, "main: local node id = %d\n", rtcfg_nodeid); dstring_init(&query); slon_mkquery(&query, "select %s.slon_node_health_check();", rtcfg_namespace); res = PQexec(startup_conn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "could not call slon_node_health_check() - %", PQresultErrorMessage(res)); slon_abort(); } else { if (PQntuples(res) != 1) { slon_log(SLON_FATAL, "query '%s' returned %d rows (expected 1)\n", query, PQntuples(res)); slon_abort(); } else { if (*(PQgetvalue(res, 0, 0)) == 'f') { slon_log(SLON_FATAL, "slon_node_health_check() returned false - fatal health problem!\n%s\nREPAIR CONFIG may be helpful to rectify this problem\n", PQresultErrorMessage(res)); slon_abort(); } } } PQclear(res); dstring_free(&query); #ifndef WIN32 if (signal(SIGHUP, SIG_IGN) == SIG_ERR) { slon_log(SLON_FATAL, "main: SIGHUP signal handler setup failed -(%d) %s\n", errno, strerror(errno)); slon_abort(); } if (signal(SIGINT, SIG_IGN) == SIG_ERR) { slon_log(SLON_FATAL, "main: SIGINT signal handler setup failed -(%d) %s\n", errno, strerror(errno)); slon_abort(); } if (signal(SIGTERM, SIG_IGN) == SIG_ERR) { slon_log(SLON_FATAL, "main: SIGTERM signal handler setup failed -(%d) %s\n", errno, strerror(errno)); slon_abort(); } if (signal(SIGCHLD, SIG_IGN) == SIG_ERR) { slon_log(SLON_FATAL, "main: SIGCHLD signal handler setup failed -(%d) %s\n", errno, strerror(errno)); slon_abort(); } if (signal(SIGQUIT, SIG_IGN) == SIG_ERR) { slon_log(SLON_FATAL, "main: SIGQUIT signal handler setup failed -(%d) %s\n", errno, strerror(errno)); slon_abort(); } #endif slon_log(SLON_INFO, "main: main process started\n"); /* * Start the event scheduling system */ slon_log(SLON_CONFIG, "main: launching sched_start_mainloop\n"); if (sched_start_mainloop() < 0) slon_retry(); slon_log(SLON_CONFIG, "main: loading current cluster configuration\n"); /* * Begin a transaction */ res = PQexec(startup_conn, "start transaction; " "set transaction isolation level serializable;"); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "Cannot start transaction - %s - sleep 10s\n", PQresultErrorMessage(res)); sleep(10); PQclear(res); slon_retry(); } PQclear(res); /* * Read configuration table sl_node */ dstring_init(&query); slon_mkquery(&query, "select no_id, no_active, no_comment, " " (select coalesce(max(con_seqno),0) from %s.sl_confirm " " where con_origin = no_id and con_received = %d) " " as last_event, " " (select ev_snapshot from %s.sl_event " " where ev_origin = no_id " " and ev_seqno = (select max(ev_seqno) " " from %s.sl_event " " where ev_origin = no_id " " and ev_type = 'SYNC')) as last_snapshot " "from %s.sl_node " "order by no_id; ", rtcfg_namespace, rtcfg_nodeid, rtcfg_namespace, rtcfg_namespace, rtcfg_namespace); res = PQexec(startup_conn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "main: Cannot get node list - %s\n", PQresultErrorMessage(res)); PQclear(res); dstring_free(&query); slon_retry(); } for (i = 0, n = PQntuples(res); i < n; i++) { int no_id = (int) strtol(PQgetvalue(res, i, 0), NULL, 10); int no_active = (*PQgetvalue(res, i, 1) == 't') ? 1 : 0; char *no_comment = PQgetvalue(res, i, 2); int64 last_event; if (no_id == rtcfg_nodeid) { /* * Complete our own local node entry */ rtcfg_nodeactive = no_active; rtcfg_nodecomment = strdup(no_comment); } else { /* * Add a remote node */ slon_scanint64(PQgetvalue(res, i, 3), &last_event); rtcfg_storeNode(no_id, no_comment); rtcfg_setNodeLastEvent(no_id, last_event); rtcfg_setNodeLastSnapshot(no_id, PQgetvalue(res, i, 4)); /* * If it is active, remember for activation just before we start * processing events. */ if (no_active) rtcfg_needActivate(no_id); } } PQclear(res); /* * Read configuration table sl_path - the interesting pieces */ slon_mkquery(&query, "select pa_server, pa_conninfo, pa_connretry " "from %s.sl_path where pa_client = %d" " and pa_conninfo<>'<event pending>'", rtcfg_namespace, rtcfg_nodeid); res = PQexec(startup_conn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "main: Cannot get path config - %s\n", PQresultErrorMessage(res)); PQclear(res); dstring_free(&query); slon_retry(); } for (i = 0, n = PQntuples(res); i < n; i++) { int pa_server = (int) strtol(PQgetvalue(res, i, 0), NULL, 10); char *pa_conninfo = PQgetvalue(res, i, 1); int pa_connretry = (int) strtol(PQgetvalue(res, i, 2), NULL, 10); rtcfg_storePath(pa_server, pa_conninfo, pa_connretry); } PQclear(res); /* * Load the initial listen configuration */ rtcfg_reloadListen(startup_conn); /* * Read configuration table sl_set */ slon_mkquery(&query, "select set_id, set_origin, set_comment " "from %s.sl_set", rtcfg_namespace); res = PQexec(startup_conn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "main: Cannot get set config - %s\n", PQresultErrorMessage(res)); PQclear(res); dstring_free(&query); slon_retry(); } for (i = 0, n = PQntuples(res); i < n; i++) { int set_id = (int) strtol(PQgetvalue(res, i, 0), NULL, 10); int set_origin = (int) strtol(PQgetvalue(res, i, 1), NULL, 10); char *set_comment = PQgetvalue(res, i, 2); rtcfg_storeSet(set_id, set_origin, set_comment); } PQclear(res); /* * Read configuration table sl_subscribe - only subscriptions for local * node */ slon_mkquery(&query, "select sub_set, sub_provider, sub_forward, sub_active " "from %s.sl_subscribe " "where sub_receiver = %d", rtcfg_namespace, rtcfg_nodeid); res = PQexec(startup_conn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "main: Cannot get subscription config - %s\n", PQresultErrorMessage(res)); PQclear(res); dstring_free(&query); slon_retry(); } for (i = 0, n = PQntuples(res); i < n; i++) { int sub_set = (int) strtol(PQgetvalue(res, i, 0), NULL, 10); int sub_provider = (int) strtol(PQgetvalue(res, i, 1), NULL, 10); char *sub_forward = PQgetvalue(res, i, 2); char *sub_active = PQgetvalue(res, i, 3); rtcfg_storeSubscribe(sub_set, sub_provider, sub_forward); if (*sub_active == 't') rtcfg_enableSubscription(sub_set, sub_provider, sub_forward); } PQclear(res); /* * Remember the last known local event sequence */ slon_mkquery(&query, "select coalesce(max(ev_seqno), -1) from %s.sl_event " "where ev_origin = '%d'", rtcfg_namespace, rtcfg_nodeid); res = PQexec(startup_conn, dstring_data(&query)); if (PQresultStatus(res) != PGRES_TUPLES_OK) { slon_log(SLON_FATAL, "main: Cannot get last local eventid - %s\n", PQresultErrorMessage(res)); PQclear(res); dstring_free(&query); slon_retry(); } if (PQntuples(res) == 0) strcpy(rtcfg_lastevent, "-1"); else if (PQgetisnull(res, 0, 0)) strcpy(rtcfg_lastevent, "-1"); else strcpy(rtcfg_lastevent, PQgetvalue(res, 0, 0)); PQclear(res); dstring_free(&query); slon_log(SLON_CONFIG, "main: last local event sequence = %s\n", rtcfg_lastevent); /* * Rollback the transaction we used to get the config snapshot */ res = PQexec(startup_conn, "rollback transaction;"); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "main: Cannot rollback transaction - %s\n", PQresultErrorMessage(res)); PQclear(res); slon_retry(); } PQclear(res); /* * Done with the startup, don't need the local connection any more. */ PQfinish(startup_conn); slon_log(SLON_CONFIG, "main: configuration complete - starting threads\n"); /* * Create the local event thread that monitors the local node for * administrative events to adjust the configuration at runtime. We wait * here until the local listen thread has checked that there is no other * slon daemon running. */ pthread_mutex_lock(&slon_wait_listen_lock); if (pthread_create(&local_event_thread, NULL, localListenThread_main, NULL) < 0) { slon_log(SLON_FATAL, "main: cannot create localListenThread - %s\n", strerror(errno)); slon_retry(); } pthread_cond_wait(&slon_wait_listen_cond, &slon_wait_listen_lock); if (!slon_listen_started) { /** * The local listen thread did not start up properly. */ slon_log(SLON_FATAL, "main: localListenThread did not start\n"); slon_abort(); } pthread_mutex_unlock(&slon_wait_listen_lock); /* * Enable all nodes that are active */ rtcfg_doActivate(); /* * Create the local cleanup thread that will remove old events and log * data. */ if (pthread_create(&local_cleanup_thread, NULL, cleanupThread_main, NULL) < 0) { slon_log(SLON_FATAL, "main: cannot create cleanupThread - %s\n", strerror(errno)); slon_retry(); } /* * Create the local sync thread that will generate SYNC events if we had * local database updates. */ if (pthread_create(&local_sync_thread, NULL, syncThread_main, NULL) < 0) { slon_log(SLON_FATAL, "main: cannot create syncThread - %s\n", strerror(errno)); slon_retry(); } /* * Create the local monitor thread that will process monitoring requests */ if (monitor_threads) { if (pthread_create(&local_monitor_thread, NULL, monitorThread_main, NULL) < 0) { slon_log(SLON_FATAL, "main: cannot create monitorThread - %s\n", strerror(errno)); slon_retry(); } } /* * Wait until the scheduler has shut down all remote connections */ slon_log(SLON_INFO, "main: running scheduler mainloop\n"); if (sched_wait_mainloop() < 0) { slon_log(SLON_FATAL, "main: scheduler returned with error\n"); slon_retry(); } slon_log(SLON_INFO, "main: scheduler mainloop returned\n"); /* * Wait for all remote threads to finish */ main_thread = pthread_self(); slon_log(SLON_CONFIG, "main: wait for remote threads\n"); rtcfg_joinAllRemoteThreads(); /* * Wait for the local threads to finish */ if (pthread_join(local_event_thread, NULL) < 0) slon_log(SLON_ERROR, "main: cannot join localListenThread - %s\n", strerror(errno)); if (pthread_join(local_cleanup_thread, NULL) < 0) slon_log(SLON_ERROR, "main: cannot join cleanupThread - %s\n", strerror(errno)); if (pthread_join(local_sync_thread, NULL) < 0) slon_log(SLON_ERROR, "main: cannot join syncThread - %s\n", strerror(errno)); if (pthread_join(local_monitor_thread, NULL) < 0) slon_log(SLON_ERROR, "main: cannot join monitorThread - %s\n", strerror(errno)); slon_log(SLON_CONFIG, "main: done\n"); exit(0); }
/* ---------- * cleanupThread_main * * Periodically calls the stored procedure to remove old events and log data and * vacuums those tables. * ---------- */ void * cleanupThread_main( /* @unused@ */ void *dummy) { SlonConn *conn; SlonDString query_baseclean; SlonDString query2; SlonDString query_pertbl; PGconn *dbconn; PGresult *res; PGresult *res2; struct timeval tv_start; struct timeval tv_end; int t; int vac_count = 0; int vac_enable = SLON_VACUUM_FREQUENCY; char *vacuum_action; int ntuples; slon_log(SLON_CONFIG, "cleanupThread: thread starts\n"); /* * Want the vacuum time bias to be between 0 and 100 seconds, hence * between 0 and 100000 */ if (vac_bias == 0) { vac_bias = rand() % (SLON_CLEANUP_SLEEP * 166); } slon_log(SLON_CONFIG, "cleanupThread: bias = %d\n", vac_bias); /* * Connect to the local database */ if ((conn = slon_connectdb(rtcfg_conninfo, "local_cleanup")) == NULL) { #ifndef WIN32 (void) kill(getpid(), SIGTERM); pthread_exit(NULL); #else exit(0); #endif /* slon_retry(); */ } dbconn = conn->dbconn; monitor_state("local_cleanup", 0, conn->conn_pid, "thread main loop", 0, "n/a"); /* * Build the query string for calling the cleanupEvent() stored procedure */ dstring_init(&query_baseclean); slon_mkquery(&query_baseclean, "begin;" "lock table %s.sl_config_lock;" "select %s.cleanupEvent('%s'::interval);" "commit;", rtcfg_namespace, rtcfg_namespace, cleanup_interval ); dstring_init(&query2); /* * Loop until shutdown time arrived * * Note the introduction of vac_bias and an up-to-100s random "fuzz"; this * reduces the likelihood that having multiple slons hitting the same * cluster will run into conflicts due to trying to vacuum common tables * * such as pg_listener concurrently */ while (sched_wait_time(conn, SCHED_WAIT_SOCK_READ, SLON_CLEANUP_SLEEP * 1000 + vac_bias + (rand() % (SLON_CLEANUP_SLEEP * 166))) == SCHED_STATUS_OK) { /* * Call the stored procedure cleanupEvent() */ monitor_state("local_cleanup", 0, conn->conn_pid, "cleanupEvent", 0, "n/a"); gettimeofday(&tv_start, NULL); res = PQexec(dbconn, dstring_data(&query_baseclean)); if (PQresultStatus(res) != PGRES_COMMAND_OK) { slon_log(SLON_FATAL, "cleanupThread: \"%s\" - %s", dstring_data(&query_baseclean), PQresultErrorMessage(res)); PQclear(res); slon_retry(); break; } PQclear(res); gettimeofday(&tv_end, NULL); slon_log(SLON_INFO, "cleanupThread: %8.3f seconds for cleanupEvent()\n", TIMEVAL_DIFF(&tv_start, &tv_end)); /* * Detain the usual suspects (vacuum event and log data) */ if (vac_frequency != 0) { vac_enable = vac_frequency; } if (++vac_count >= vac_enable) { unsigned long latest_xid; vac_count = 0; latest_xid = get_earliest_xid(dbconn); vacuum_action = ""; if (earliest_xid == latest_xid) { slon_log(SLON_INFO, "cleanupThread: xid %d still active - analyze instead\n", earliest_xid); } else { if (vac_enable == vac_frequency) { vacuum_action = "vacuum "; } } earliest_xid = latest_xid; /* * Build the query string for vacuuming replication runtime data * and event tables */ gettimeofday(&tv_start, NULL); slon_mkquery(&query2, "select nspname, relname from %s.TablesToVacuum();", rtcfg_namespace); res = PQexec(dbconn, dstring_data(&query2)); /* * for each table... and we should set up the query to return not * only the table name, but also a boolean to support what's in * the SELECT below; that'll nicely simplify this process... */ if (PQresultStatus(res) != PGRES_TUPLES_OK) /* query error */ { slon_log(SLON_ERROR, "cleanupThread: \"%s\" - %s", dstring_data(&query2), PQresultErrorMessage(res)); } ntuples = PQntuples(res); slon_log(SLON_DEBUG1, "cleanupThread: number of tables to clean: %d\n", ntuples); monitor_state("local_cleanup", 0, conn->conn_pid, "vacuumTables", 0, "n/a"); for (t = 0; t < ntuples; t++) { char *tab_nspname = PQgetvalue(res, t, 0); char *tab_relname = PQgetvalue(res, t, 1); ExecStatusType vrc; slon_log(SLON_DEBUG1, "cleanupThread: %s analyze \"%s\".%s;\n", vacuum_action, tab_nspname, tab_relname); dstring_init(&query_pertbl); slon_mkquery(&query_pertbl, "%s analyze \"%s\".%s;", vacuum_action, tab_nspname, tab_relname); res2 = PQexec(dbconn, dstring_data(&query_pertbl)); vrc = PQresultStatus(res2); if (vrc == PGRES_FATAL_ERROR) { slon_log(SLON_ERROR, "cleanupThread: \"%s\" - %s\n", dstring_data(&query_pertbl), PQresultErrorMessage(res2)); /* * slon_retry(); break; */ } else { if (vrc == PGRES_NONFATAL_ERROR) { slon_log(SLON_WARN, "cleanupThread: \"%s\" - %s\n", dstring_data(&query_pertbl), PQresultErrorMessage(res2)); } } PQclear(res2); dstring_reset(&query_pertbl); } gettimeofday(&tv_end, NULL); slon_log(SLON_INFO, "cleanupThread: %8.3f seconds for vacuuming\n", TIMEVAL_DIFF(&tv_start, &tv_end)); /* * Free Resources */ dstring_free(&query_pertbl); PQclear(res); monitor_state("local_cleanup", 0, conn->conn_pid, "thread main loop", 0, "n/a"); } } /* * Free Resources */ dstring_free(&query_baseclean); dstring_free(&query2); /* * Disconnect from the database */ slon_disconnectdb(conn); /* * Terminate this thread */ slon_log(SLON_DEBUG1, "cleanupThread: thread done\n"); pthread_exit(NULL); }