static void checkClusterConfiguration(PGconn *conn, PGconn *primary) { PGresult *res; log_info(_("%s Checking cluster configuration with schema '%s'\n"), progname, repmgr_schema); sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class " " WHERE oid = '%s.repl_nodes'::regclass", repmgr_schema); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err("PQexec failed: %s\n", PQerrorMessage(conn)); PQclear(res); CloseConnections(); exit(ERR_DB_QUERY); } /* * If there isn't any results then we have not configured a primary node * yet in repmgr or the connection string is pointing to the wrong * database. * * XXX if we are the primary, should we try to create the tables needed? */ if (PQntuples(res) == 0) { log_err("The replication cluster is not configured\n"); PQclear(res); CloseConnections(); exit(ERR_BAD_CONFIG); } PQclear(res); }
static void checkNodeConfiguration(char *conninfo) { PGresult *res; /* * Check if we have my node information in repl_nodes */ log_info(_("%s Checking node %d in cluster '%s'\n"), progname, local_options.node, local_options.cluster_name); sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " " WHERE id = %d AND cluster = '%s' ", repmgr_schema, local_options.node, local_options.cluster_name); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn)); PQclear(res); CloseConnections(); exit(ERR_BAD_CONFIG); } /* * If there isn't any results then we have not configured this node yet * in repmgr, if that is the case we will insert the node to the cluster, * except if it is a witness */ if (PQntuples(res) == 0) { PQclear(res); if (myLocalMode == WITNESS_MODE) { log_err(_("The witness is not configured\n")); CloseConnections(); exit(ERR_BAD_CONFIG); } /* Adding the node */ log_info(_("%s Adding node %d to cluster '%s'\n"), progname, local_options.node, local_options.cluster_name); sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " "VALUES (%d, '%s', '%s', 'f')", repmgr_schema, local_options.node, local_options.cluster_name, local_options.conninfo); if (!PQexec(primaryConn, sqlquery)) { log_err(_("Cannot insert node details, %s\n"), PQerrorMessage(primaryConn)); CloseConnections(); exit(ERR_BAD_CONFIG); } } PQclear(res); }
void TorcNetwork::SetAllowed(bool Allow) { if (!Allow) CloseConnections(); gLocalContext->NotifyEvent(Allow ? Torc::NetworkEnabled : Torc::NetworkDisabled); setNetworkAccessible(Allow ? Accessible : NotAccessible); LOG(VB_GENERAL, LOG_INFO, QString("Network access %1").arg(Allow ? "allowed" : "not allowed")); }
ConnectionProvider::~ConnectionProvider() { CloseConnections(); for ( ConnectionVector::iterator i = connections_.begin(); i != connections_.end(); ++i) { Connection* connection = *i; SAFE_DELETE(connection); } connections_.clear(); }
/* * CompleteShardPlacementTransactions commits or aborts pending shard placement * transactions when the local transaction commits or aborts. */ void CompleteShardPlacementTransactions(XactEvent event, void *arg) { if (shardPlacementConnectionList == NIL) { /* nothing to do */ return; } else if (event == XACT_EVENT_PRE_COMMIT) { /* * Any failure here will cause local changes to be rolled back, * and remote changes to either roll back (1PC) or, in case of * connection or node failure, leave a prepared transaction * (2PC). */ if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC) { PrepareRemoteTransactions(shardPlacementConnectionList); } return; } else if (event == XACT_EVENT_COMMIT) { /* * A failure here will cause some remote changes to either * roll back (1PC) or, in case of connection or node failure, * leave a prepared transaction (2PC). However, the local * changes have already been committed. */ CommitRemoteTransactions(shardPlacementConnectionList, false); } else if (event == XACT_EVENT_ABORT) { /* * A failure here will cause some remote changes to either * roll back (1PC) or, in case of connection or node failure, * leave a prepared transaction (2PC). The local changes have * already been rolled back. */ AbortRemoteTransactions(shardPlacementConnectionList); } else { return; } CloseConnections(shardPlacementConnectionList); shardPlacementConnectionList = NIL; }
bool ConnectionProvider::HandleNetworkStateEvent(event_id_t event_id, Foundation::EventDataInterface* data) { if (event_id == ProtocolUtilities::Events::EVENT_SERVER_CONNECTED) { //! @todo agent_id to credentials //! TODO: CHECK THIS // Communication::Credentials credentials(OPENSIM_IM_PROTOCOL, "", "", "", 0); // Communication::ConnectionInterface* conn = OpenConnection(credentials); } if (event_id == ProtocolUtilities::Events::EVENT_SERVER_DISCONNECTED || event_id == ProtocolUtilities::Events::EVENT_CONNECTION_FAILED) { CloseConnections(); } return false; }
TorcNetwork::~TorcNetwork() { // release any outstanding requests CloseConnections(); // remove settings if (m_networkAllowedInbound) { m_networkAllowedInbound->Remove(); m_networkAllowedInbound->DownRef(); } if (m_networkAllowedOutbound) { m_networkAllowedOutbound->Remove(); m_networkAllowedOutbound->DownRef(); } if (m_networkAllowed) { m_networkAllowed->Remove(); m_networkAllowed->DownRef(); } if (m_networkGroup) { m_networkGroup->Remove(); m_networkGroup->DownRef(); } m_networkAllowedInbound = NULL; m_networkAllowedOutbound = NULL; m_networkAllowed = NULL; m_networkGroup = NULL; // delete the configuration manager if (m_manager) m_manager->deleteLater(); m_manager = NULL; LOG(VB_GENERAL, LOG_INFO, "Closing network access manager"); }
static void update_registration(void) { PGresult *res; sqlquery_snprintf(sqlquery, "UPDATE %s.repl_nodes " " SET conninfo = '%s', " " priority = %d " " WHERE id = %d", repmgr_schema, local_options.conninfo, local_options.priority, local_options.node); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_COMMAND_OK) { log_err(_("Cannot update registration: %s\n"), PQerrorMessage(primaryConn)); CloseConnections(); exit(ERR_DB_CON); } PQclear(res); }
bool ServerNetwork::acceptNewClient() { // if client waiting, accept the connection and save the socket SOCKET ClientSocket = accept( ListenSocket, NULL, NULL ); if (ClientSocket != INVALID_SOCKET) { //disable nagle on the client's socket // char value = 1; // setsockopt( ClientSocket, IPPROTO_TCP, TCP_NODELAY, &value, sizeof( value ) ); // insert new client into session id table sessions.insert( std::pair<unsigned int, SOCKET>( ClientNr, ClientSocket) ); ClientNr++; return true; } CloseConnections(); return false; }
/* * Insert monitor info, this is basically the time and xlog replayed, * applied on standby and current xlog location in primary. * Also do the math to see how far are we in bytes for being uptodate */ static void MonitorExecute(void) { PGresult *res; char monitor_standby_timestamp[MAXLEN]; char last_wal_primary_location[MAXLEN]; char last_wal_standby_received[MAXLEN]; char last_wal_standby_applied[MAXLEN]; unsigned long long int lsn_primary; unsigned long long int lsn_standby_received; unsigned long long int lsn_standby_applied; int connection_retries; /* * Check if the master is still available, if after 5 minutes of retries * we cannot reconnect, try to get a new master. */ for (connection_retries = 0; connection_retries < 15; connection_retries++) { if (PQstatus(primaryConn) != CONNECTION_OK) { log_warning(_("Connection to master has been lost, trying to recover...\n")); /* wait 20 seconds between retries */ sleep(20); PQreset(primaryConn); } else { if (connection_retries > 0) { log_notice(_("Connection to master has been restored, continue monitoring.\n")); } break; } } if (PQstatus(primaryConn) != CONNECTION_OK) { log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n")); for (connection_retries = 0; connection_retries < 6; connection_retries++) { primaryConn = getMasterConnection(myLocalConn, local_options.node, local_options.cluster_name, &primary_options.node,NULL); if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */ log_err(_("Connected to node %d, continue monitoring.\n"), primary_options.node); break; } else { log_err(_("We haven't found a new master, waiting before retry...\n")); /* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */ sleep(300); } } } if (PQstatus(primaryConn) != CONNECTION_OK) { log_err(_("We couldn't reconnect for long enough, exiting...\n")); exit(ERR_DB_CON); } /* Check if we still are a standby, we could have been promoted */ if (!is_standby(myLocalConn)) { log_err(_("It seems like we have been promoted, so exit from monitoring...\n")); CloseConnections(); exit(ERR_PROMOTED); } /* * first check if there is a command being executed, * and if that is the case, cancel the query so i can * insert the current record */ if (PQisBusy(primaryConn) == 1) CancelQuery(); /* Get local xlog info */ sqlquery_snprintf( sqlquery, "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " "pg_last_xlog_replay_location()"); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err("PQexec failed: %s\n", PQerrorMessage(myLocalConn)); PQclear(res); /* if there is any error just let it be and retry in next loop */ return; } strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN); strncpy(last_wal_standby_received , PQgetvalue(res, 0, 1), MAXLEN); strncpy(last_wal_standby_applied , PQgetvalue(res, 0, 2), MAXLEN); PQclear(res); /* Get primary xlog info */ sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() "); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err("PQexec failed: %s\n", PQerrorMessage(primaryConn)); PQclear(res); return; } strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN); PQclear(res); /* Calculate the lag */ lsn_primary = walLocationToBytes(last_wal_primary_location); lsn_standby_received = walLocationToBytes(last_wal_standby_received); lsn_standby_applied = walLocationToBytes(last_wal_standby_applied); if (only_one_entry && only_one_entry_desired) { sqlquery_snprintf(sqlquery, "UPDATE %s.repl_monitor " "VALUES(%d, %d, '%s'::timestamp with time zone, " " '%s', '%s', " " %lld, %lld)" "WHERE primary_node=%d AND secondary_node=%d", repmgr_schema, primary_options.node, local_options.node, monitor_standby_timestamp, last_wal_primary_location, last_wal_standby_received, (lsn_primary - lsn_standby_received), (lsn_standby_received - lsn_standby_applied)); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err("PQexec failed: %s\n", PQerrorMessage(conn)); PQclear(res); CloseConnections(); exit(ERR_DB_QUERY); } if (PQntuples(res) != 1) { only_one_entry = false; } PQclear(res); } else { /* * Build and send insert */ sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_monitor " "VALUES(%d, %d, '%s'::timestamp with time zone, " " '%s', '%s', " " %lld, %lld)", repmgr_schema, primary_options.node, local_options.node, monitor_standby_timestamp, last_wal_primary_location, last_wal_standby_received, (lsn_primary - lsn_standby_received), (lsn_standby_received - lsn_standby_applied)); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err("PQexec failed: %s\n", PQerrorMessage(conn)); PQclear(res); CloseConnections(); exit(ERR_DB_QUERY); } PQclear(res); if (only_one_entry_desired) { /* * Build the SQL to execute on primary */ sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_monitor " "WHERE primary_node=%d AND standby_node=%d AND last_monitor_time < '%s'::timestamp with time zone", repmgr_schema, primary_options.node, local_options.node, monitor_standby_timestamp); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err("PQexec failed: %s\n", PQerrorMessage(conn)); PQclear(res); CloseConnections(); exit(ERR_DB_QUERY); } PQclear(res); only_one_entry = true; } } }
int main(int argc, char **argv) { static struct option long_options[] = { {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {"no-history", no_argument, NULL, 'H'}, {NULL, 0, NULL, 0} }; int optindex; int c; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); exit(SUCCESS); } } while ((c = getopt_long(argc, argv, "f:vH", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; case 'H': /* no-history */ only_one_entry_desired = true; break; default: usage(); exit(ERR_BAD_CONFIG); } } setup_cancel_handler(); /* * Read the configuration file: repmgr.conf */ parse_config(config_file, &local_options); if (local_options.node == -1) { log_err("Node information is missing. " "Check the configuration file, or provide one if you have not done so.\n"); exit(ERR_BAD_CONFIG); } logger_init(progname, local_options.loglevel, local_options.logfacility); if (verbose) logger_min_verbose(LOG_INFO); snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); myLocalConn = establishDBConnection(local_options.conninfo, true); /* should be v9 or better */ log_info(_("%s Connected to database, checking its state\n"), progname); pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(myLocalConn); log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); exit(ERR_BAD_CONFIG); } /* * Set my server mode, establish a connection to primary * and start monitor */ myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; if (myLocalMode == PRIMARY_MODE) { primary_options.node = local_options.node; strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primaryConn = myLocalConn; } else { /* I need the id of the primary as well as a connection to it */ log_info(_("%s Connecting to primary for cluster '%s'\n"), progname, local_options.cluster_name); primaryConn = getMasterConnection(myLocalConn, local_options.node, local_options.cluster_name, &primary_options.node,NULL); if (primaryConn == NULL) { CloseConnections(); exit(ERR_BAD_CONFIG); } } checkClusterConfiguration(myLocalConn,primaryConn); checkNodeConfiguration(local_options.conninfo); if (myLocalMode == STANDBY_MODE) { log_info(_("%s Starting continuous standby node monitoring\n"), progname); MonitorCheck(); } else { log_info(_("%s This is a primary node, program not needed here; exiting'\n"), progname); } /* Prevent a double-free */ if (primaryConn == myLocalConn) myLocalConn = NULL; /* close the connection to the database and cleanup */ CloseConnections(); /* Shuts down logging system */ logger_shutdown(); return 0; }
/* ** Called when an operation fails. Cleans up any bad connections and, if we've ** run out, frees up a connection so that others may connect. */ static void CheckConnections(void) { if(CloseDisconnections() == 0) { CloseConnections(0, 1, 1); } }
int main(int argc, char **argv) { static struct option long_options[] = { {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; int optindex; int c; char conninfo[MAXLEN]; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); exit(0); } } while ((c = getopt_long(argc, argv, "f:v", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } } setup_cancel_handler(); if (config_file == NULL) { config_file = malloc(5 + sizeof(CONFIG_FILE)); sprintf(config_file, "./%s", CONFIG_FILE); } /* * Read the configuration file: repmgr.conf */ parse_config(config_file, myClusterName, &myLocalId, conninfo); if (myLocalId == -1) { fprintf(stderr, "Node information is missing. " "Check the configuration file.\n"); exit(1); } myLocalConn = establishDBConnection(conninfo, true); /* should be v9 or better */ pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { PQfinish(myLocalConn); fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); exit(1); } /* * Set my server mode, establish a connection to primary * and start monitor */ myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; if (myLocalMode == PRIMARY_MODE) { primaryId = myLocalId; strcpy(primaryConninfo, conninfo); primaryConn = myLocalConn; } else { /* I need the id of the primary as well as a connection to it */ primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); if (primaryConn == NULL) exit(1); } checkClusterConfiguration(); checkNodeConfiguration(conninfo); if (myLocalMode == STANDBY_MODE) { MonitorCheck(); } /* close the connection to the database and cleanup */ CloseConnections(); return 0; }
/* * CitusCopyFrom implements the COPY table_name FROM ... for hash-partitioned * and range-partitioned tables. */ void CitusCopyFrom(CopyStmt *copyStatement, char *completionTag) { Oid tableId = RangeVarGetRelid(copyStatement->relation, NoLock, false); char *relationName = get_rel_name(tableId); Relation distributedRelation = NULL; char partitionMethod = '\0'; Var *partitionColumn = NULL; TupleDesc tupleDescriptor = NULL; uint32 columnCount = 0; Datum *columnValues = NULL; bool *columnNulls = NULL; TypeCacheEntry *typeEntry = NULL; FmgrInfo *hashFunction = NULL; FmgrInfo *compareFunction = NULL; int shardCount = 0; List *shardIntervalList = NULL; ShardInterval **shardIntervalCache = NULL; bool useBinarySearch = false; HTAB *shardConnectionHash = NULL; ShardConnections *shardConnections = NULL; List *connectionList = NIL; EState *executorState = NULL; MemoryContext executorTupleContext = NULL; ExprContext *executorExpressionContext = NULL; CopyState copyState = NULL; CopyOutState copyOutState = NULL; FmgrInfo *columnOutputFunctions = NULL; uint64 processedRowCount = 0; /* disallow COPY to/from file or program except for superusers */ if (copyStatement->filename != NULL && !superuser()) { if (copyStatement->is_program) { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to COPY to or from an external program"), errhint("Anyone can COPY to stdout or from stdin. " "psql's \\copy command also works for anyone."))); } else { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to COPY to or from a file"), errhint("Anyone can COPY to stdout or from stdin. " "psql's \\copy command also works for anyone."))); } } partitionColumn = PartitionColumn(tableId, 0); partitionMethod = PartitionMethod(tableId); if (partitionMethod != DISTRIBUTE_BY_RANGE && partitionMethod != DISTRIBUTE_BY_HASH) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY is only supported for hash- and " "range-partitioned tables"))); } /* resolve hash function for partition column */ typeEntry = lookup_type_cache(partitionColumn->vartype, TYPECACHE_HASH_PROC_FINFO); hashFunction = &(typeEntry->hash_proc_finfo); /* resolve compare function for shard intervals */ compareFunction = ShardIntervalCompareFunction(partitionColumn, partitionMethod); /* allocate column values and nulls arrays */ distributedRelation = heap_open(tableId, RowExclusiveLock); tupleDescriptor = RelationGetDescr(distributedRelation); columnCount = tupleDescriptor->natts; columnValues = palloc0(columnCount * sizeof(Datum)); columnNulls = palloc0(columnCount * sizeof(bool)); /* load the list of shards and verify that we have shards to copy into */ shardIntervalList = LoadShardIntervalList(tableId); if (shardIntervalList == NIL) { if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not find any shards into which to copy"), errdetail("No shards exist for distributed table \"%s\".", relationName), errhint("Run master_create_worker_shards to create shards " "and try again."))); } else { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not find any shards into which to copy"), errdetail("No shards exist for distributed table \"%s\".", relationName))); } } /* prevent concurrent placement changes and non-commutative DML statements */ LockAllShards(shardIntervalList); /* initialize the shard interval cache */ shardCount = list_length(shardIntervalList); shardIntervalCache = SortedShardIntervalArray(shardIntervalList); /* determine whether to use binary search */ if (partitionMethod != DISTRIBUTE_BY_HASH || !IsUniformHashDistribution(shardIntervalCache, shardCount)) { useBinarySearch = true; } /* initialize copy state to read from COPY data source */ copyState = BeginCopyFrom(distributedRelation, copyStatement->filename, copyStatement->is_program, copyStatement->attlist, copyStatement->options); executorState = CreateExecutorState(); executorTupleContext = GetPerTupleMemoryContext(executorState); executorExpressionContext = GetPerTupleExprContext(executorState); copyOutState = (CopyOutState) palloc0(sizeof(CopyOutStateData)); copyOutState->binary = true; copyOutState->fe_msgbuf = makeStringInfo(); copyOutState->rowcontext = executorTupleContext; columnOutputFunctions = ColumnOutputFunctions(tupleDescriptor, copyOutState->binary); /* * Create a mapping of shard id to a connection for each of its placements. * The hash should be initialized before the PG_TRY, since it is used and * PG_CATCH. Otherwise, it may be undefined in the PG_CATCH (see sigsetjmp * documentation). */ shardConnectionHash = CreateShardConnectionHash(); /* we use a PG_TRY block to roll back on errors (e.g. in NextCopyFrom) */ PG_TRY(); { ErrorContextCallback errorCallback; /* set up callback to identify error line number */ errorCallback.callback = CopyFromErrorCallback; errorCallback.arg = (void *) copyState; errorCallback.previous = error_context_stack; error_context_stack = &errorCallback; /* ensure transactions have unique names on worker nodes */ InitializeDistributedTransaction(); while (true) { bool nextRowFound = false; Datum partitionColumnValue = 0; ShardInterval *shardInterval = NULL; int64 shardId = 0; bool shardConnectionsFound = false; MemoryContext oldContext = NULL; ResetPerTupleExprContext(executorState); oldContext = MemoryContextSwitchTo(executorTupleContext); /* parse a row from the input */ nextRowFound = NextCopyFrom(copyState, executorExpressionContext, columnValues, columnNulls, NULL); if (!nextRowFound) { MemoryContextSwitchTo(oldContext); break; } CHECK_FOR_INTERRUPTS(); /* find the partition column value */ if (columnNulls[partitionColumn->varattno - 1]) { ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("cannot copy row with NULL value " "in partition column"))); } partitionColumnValue = columnValues[partitionColumn->varattno - 1]; /* find the shard interval and id for the partition column value */ shardInterval = FindShardInterval(partitionColumnValue, shardIntervalCache, shardCount, partitionMethod, compareFunction, hashFunction, useBinarySearch); if (shardInterval == NULL) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not find shard for partition column " "value"))); } shardId = shardInterval->shardId; MemoryContextSwitchTo(oldContext); /* get existing connections to the shard placements, if any */ shardConnections = GetShardConnections(shardConnectionHash, shardId, &shardConnectionsFound); if (!shardConnectionsFound) { /* open connections and initiate COPY on shard placements */ OpenCopyTransactions(copyStatement, shardConnections); /* send binary headers to shard placements */ resetStringInfo(copyOutState->fe_msgbuf); AppendCopyBinaryHeaders(copyOutState); SendCopyDataToAll(copyOutState->fe_msgbuf, shardConnections->connectionList); } /* replicate row to shard placements */ resetStringInfo(copyOutState->fe_msgbuf); AppendCopyRowData(columnValues, columnNulls, tupleDescriptor, copyOutState, columnOutputFunctions); SendCopyDataToAll(copyOutState->fe_msgbuf, shardConnections->connectionList); processedRowCount += 1; } connectionList = ConnectionList(shardConnectionHash); /* send binary footers to all shard placements */ resetStringInfo(copyOutState->fe_msgbuf); AppendCopyBinaryFooters(copyOutState); SendCopyDataToAll(copyOutState->fe_msgbuf, connectionList); /* all lines have been copied, stop showing line number in errors */ error_context_stack = errorCallback.previous; /* close the COPY input on all shard placements */ EndRemoteCopy(connectionList, true); if (CopyTransactionManager == TRANSACTION_MANAGER_2PC) { PrepareRemoteTransactions(connectionList); } EndCopyFrom(copyState); heap_close(distributedRelation, NoLock); /* check for cancellation one last time before committing */ CHECK_FOR_INTERRUPTS(); } PG_CATCH(); { List *abortConnectionList = NIL; /* roll back all transactions */ abortConnectionList = ConnectionList(shardConnectionHash); EndRemoteCopy(abortConnectionList, false); AbortRemoteTransactions(abortConnectionList); CloseConnections(abortConnectionList); PG_RE_THROW(); } PG_END_TRY(); /* * Ready to commit the transaction, this code is below the PG_TRY block because * we do not want any of the transactions rolled back if a failure occurs. Instead, * they should be rolled forward. */ CommitRemoteTransactions(connectionList); CloseConnections(connectionList); if (completionTag != NULL) { snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "COPY " UINT64_FORMAT, processedRowCount); } }
ServerNetwork::ServerNetwork(char *IP, char *Port) { // create WSADATA object WSADATA wsaData; MaxSocketBufferSize = 0; // our sockets for the server ListenSocket = INVALID_SOCKET; // ClientSocket = INVALID_SOCKET; // address info for the server to listen to struct addrinfo *result = NULL; struct addrinfo hints; // Initialize Winsock iResult = WSAStartup(MAKEWORD(2,2), &wsaData); if (iResult != 0) { printf("WSAStartup failed with error: %d\n", iResult); exit(1); } // set address information ZeroMemory(&hints, sizeof(hints)); hints.ai_family = AF_INET; hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; // TCP connection!!! hints.ai_flags = AI_PASSIVE; // Resolve the server address and port iResult = getaddrinfo(NULL, Port, &hints, &result); if ( iResult != 0 ) { printf("getaddrinfo failed with error: %d\n", iResult); CloseConnections(); return; } // Create a SOCKET for connecting to server ListenSocket = socket(result->ai_family, result->ai_socktype, result->ai_protocol); if (ListenSocket == INVALID_SOCKET) { printf("socket failed with error: %ld\n", WSAGetLastError()); freeaddrinfo(result); CloseConnections(); return; } MaxSocketBufferSize = 1 * 65535; if( setsockopt(ListenSocket, SOL_SOCKET, SO_SNDBUF, (char*)&MaxSocketBufferSize, sizeof(MaxSocketBufferSize) ) == -1 ) printf( "Error setting socket opts: %s\n", WSAGetLastError() ); int optlen = sizeof(MaxSocketBufferSize); iResult = getsockopt(ListenSocket, SOL_SOCKET, SO_SNDBUF, (char*)&MaxSocketBufferSize, &optlen ); // timeval tv; // tv.tv_sec = 1; // setsockopt(ListenSocket, SOL_SOCKET, SO_RCVTIMEO,(const char *)&tv,sizeof(tv)); // tv.tv_sec = 10; // setsockopt(ListenSocket, SOL_SOCKET, SO_SNDTIMEO,(const char *)&tv,sizeof(tv)); // Set the mode of the socket to be nonblocking /* u_long iMode = 1; iResult = ioctlsocket(ListenSocket, FIONBIO, &iMode); if (iResult == SOCKET_ERROR) { printf("ioctlsocket failed with error: %d\n", WSAGetLastError()); closesocket(ListenSocket); WSACleanup(); exit(1); }/**/ // Setup the TCP listening socket iResult = bind( ListenSocket, result->ai_addr, (int)result->ai_addrlen); if (iResult == SOCKET_ERROR) { printf("bind failed with error: %d\n", WSAGetLastError()); freeaddrinfo(result); CloseConnections(); return; } // no longer need address information freeaddrinfo(result); // start listening for new clients attempting to connect iResult = listen(ListenSocket, SOMAXCONN); if (iResult == SOCKET_ERROR) { printf("listen failed with error: %d\n", WSAGetLastError()); CloseConnections(); } }
ServerNetwork::~ServerNetwork(void) { CloseConnections(); }
static void handle_sigint(SIGNAL_ARGS) { CloseConnections(); }
int main(int argc, char **argv) { static struct option long_options[] = { {"config", required_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; int optindex; int c; char standby_version[MAXVERSIONSTR]; progname = get_progname(argv[0]); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { help(progname); exit(SUCCESS); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { printf("%s (PostgreSQL) " PG_VERSION "\n", progname); exit(SUCCESS); } } while ((c = getopt_long(argc, argv, "f:v", long_options, &optindex)) != -1) { switch (c) { case 'f': config_file = optarg; break; case 'v': verbose = true; break; default: usage(); exit(ERR_BAD_CONFIG); } } setup_event_handlers(); /* * Read the configuration file: repmgr.conf */ parse_config(config_file, &local_options); if (local_options.node == -1) { log_err(_("Node information is missing. " "Check the configuration file, or provide one if you have not done so.\n")); exit(ERR_BAD_CONFIG); } logger_init(progname, local_options.loglevel, local_options.logfacility); if (verbose) logger_min_verbose(LOG_INFO); snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo); myLocalConn = establishDBConnection(local_options.conninfo, true); /* should be v9 or better */ log_info(_("%s Connected to database, checking its state\n"), progname); pg_version(myLocalConn, standby_version); if (strcmp(standby_version, "") == 0) { log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname); PQfinish(myLocalConn); exit(ERR_BAD_CONFIG); } /* * Set my server mode, establish a connection to primary * and start monitor */ if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node)) myLocalMode = WITNESS_MODE; else if (is_standby(myLocalConn)) myLocalMode = STANDBY_MODE; else /* is the master */ myLocalMode = PRIMARY_MODE; switch (myLocalMode) { case PRIMARY_MODE: primary_options.node = local_options.node; strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN); primaryConn = myLocalConn; checkClusterConfiguration(myLocalConn, primaryConn); checkNodeConfiguration(local_options.conninfo); if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); primaryConn = myLocalConn; update_registration(); } log_info(_("%s Starting continuous primary connection check\n"), progname); /* Check that primary is still alive, and standbies are sending info */ /* * Every SLEEP_MONITOR seconds, do master checks * XXX * Check that standbies are sending info */ for (;;) { if (CheckPrimaryConnection()) { /* CheckActiveStandbiesConnections(); CheckInactiveStandbies(); */ sleep(SLEEP_MONITOR); } else { /* XXX * May we do something more verbose ? */ exit (1); } if (got_SIGHUP) { /* if we can reload, then could need to change myLocalConn */ if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); primaryConn = myLocalConn; update_registration(); } got_SIGHUP = false; } } break; case WITNESS_MODE: case STANDBY_MODE: /* I need the id of the primary as well as a connection to it */ log_info(_("%s Connecting to primary for cluster '%s'\n"), progname, local_options.cluster_name); primaryConn = getMasterConnection(myLocalConn, repmgr_schema, local_options.node, local_options.cluster_name, &primary_options.node, NULL); if (primaryConn == NULL) { CloseConnections(); exit(ERR_BAD_CONFIG); } checkClusterConfiguration(myLocalConn, primaryConn); checkNodeConfiguration(local_options.conninfo); if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); update_registration(); } /* * Every SLEEP_MONITOR seconds, do checks */ if (myLocalMode == WITNESS_MODE) { log_info(_("%s Starting continuous witness node monitoring\n"), progname); } else if (myLocalMode == STANDBY_MODE) { log_info(_("%s Starting continuous standby node monitoring\n"), progname); } for (;;) { if (myLocalMode == WITNESS_MODE) WitnessMonitor(); else if (myLocalMode == STANDBY_MODE) StandbyMonitor(); sleep(SLEEP_MONITOR); if (got_SIGHUP) { /* if we can reload, then could need to change myLocalConn */ if (reload_configuration(config_file, &local_options)) { PQfinish(myLocalConn); myLocalConn = establishDBConnection(local_options.conninfo, true); update_registration(); } got_SIGHUP = false; } } break; default: log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node); } /* Prevent a double-free */ if (primaryConn == myLocalConn) myLocalConn = NULL; /* close the connection to the database and cleanup */ CloseConnections(); /* Shuts down logging system */ logger_shutdown(); return 0; }
/* * Insert monitor info, this is basically the time and xlog replayed, * applied on standby and current xlog location in primary. * Also do the math to see how far are we in bytes for being uptodate */ static void StandbyMonitor(void) { PGresult *res; char monitor_standby_timestamp[MAXLEN]; char last_wal_primary_location[MAXLEN]; char last_wal_standby_received[MAXLEN]; char last_wal_standby_applied[MAXLEN]; unsigned long long int lsn_primary; unsigned long long int lsn_standby_received; unsigned long long int lsn_standby_applied; int connection_retries; /* * Check if the master is still available, if after 5 minutes of retries * we cannot reconnect, try to get a new master. */ CheckPrimaryConnection(); // this take up to NUM_RETRY * SLEEP_RETRY seconds if (PQstatus(primaryConn) != CONNECTION_OK) { if (local_options.failover == MANUAL_FAILOVER) { log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.\n")); for (connection_retries = 0; connection_retries < 6; connection_retries++) { primaryConn = getMasterConnection(myLocalConn, repmgr_schema, local_options.node, local_options.cluster_name, &primary_options.node, NULL); if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */ log_err(_("Connected to node %d, continue monitoring.\n"), primary_options.node); break; } else { log_err(_("We haven't found a new master, waiting before retry...\n")); /* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */ sleep(300); } } if (PQstatus(primaryConn) != CONNECTION_OK) { log_err(_("We couldn't reconnect for long enough, exiting...\n")); exit(ERR_DB_CON); } } else if (local_options.failover == AUTOMATIC_FAILOVER) { /* * When we returns from this function we will have a new primary and * a new primaryConn */ do_failover(); } } /* Check if we still are a standby, we could have been promoted */ if (!is_standby(myLocalConn)) { log_err(_("It seems like we have been promoted, so exit from monitoring...\n")); CloseConnections(); exit(ERR_PROMOTED); } /* * first check if there is a command being executed, * and if that is the case, cancel the query so i can * insert the current record */ if (PQisBusy(primaryConn) == 1) CancelQuery(); /* Get local xlog info */ sqlquery_snprintf( sqlquery, "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " "pg_last_xlog_replay_location()"); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("PQexec failed: %s\n"), PQerrorMessage(myLocalConn)); PQclear(res); /* if there is any error just let it be and retry in next loop */ return; } strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN); strncpy(last_wal_standby_received , PQgetvalue(res, 0, 1), MAXLEN); strncpy(last_wal_standby_applied , PQgetvalue(res, 0, 2), MAXLEN); PQclear(res); /* Get primary xlog info */ sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() "); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_err(_("PQexec failed: %s\n"), PQerrorMessage(primaryConn)); PQclear(res); return; } strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN); PQclear(res); /* Calculate the lag */ lsn_primary = walLocationToBytes(last_wal_primary_location); lsn_standby_received = walLocationToBytes(last_wal_standby_received); lsn_standby_applied = walLocationToBytes(last_wal_standby_applied); /* * Build the SQL to execute on primary */ sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_monitor " "VALUES(%d, %d, '%s'::timestamp with time zone, " " '%s', '%s', " " %lld, %lld)", repmgr_schema, primary_options.node, local_options.node, monitor_standby_timestamp, last_wal_primary_location, last_wal_standby_received, (lsn_primary - lsn_standby_received), (lsn_standby_received - lsn_standby_applied)); /* * Execute the query asynchronously, but don't check for a result. We * will check the result next time we pause for a monitor step. */ if (PQsendQuery(primaryConn, sqlquery) == 0) log_warning(_("Query could not be sent to primary. %s\n"), PQerrorMessage(primaryConn)); }
/* * Insert monitor info, this is basically the time and xlog replayed, * applied on standby and current xlog location in primary. * Also do the math to see how far are we in bytes for being uptodate */ static void MonitorExecute(void) { PGresult *res; char monitor_standby_timestamp[MAXLEN]; char last_wal_primary_location[MAXLEN]; char last_wal_standby_received[MAXLEN]; char last_wal_standby_applied[MAXLEN]; unsigned long long int lsn_primary; unsigned long long int lsn_standby_received; unsigned long long int lsn_standby_applied; int connection_retries; /* * Check if the master is still available, if after 5 minutes of retries * we cannot reconnect, try to get a new master. */ for (connection_retries = 0; connection_retries < 15; connection_retries++) { if (PQstatus(primaryConn) != CONNECTION_OK) { fprintf(stderr, "\n%s: Connection to master has been lost, trying to recover...\n", progname); /* wait 20 seconds between retries */ sleep(20); PQreset(primaryConn); } else { fprintf(stderr, "\n%s: Connection to master has been restored, continue monitoring.\n", progname); break; } } if (PQstatus(primaryConn) != CONNECTION_OK) { fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if ", progname); fprintf(stderr, "%s: another node has been promoted.\n", progname); for (connection_retries = 0; connection_retries < 6; connection_retries++) { primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId); if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */ fprintf(stderr, "\n%s: Connected to node %d, continue monitoring.\n", progname, primaryId); break; } else { fprintf(stderr, "\n%s: We haven't found a new master, waiting before retry...\n", progname); /* wait 5 minutes before retries, after 6 failures (30 minutes) we stop trying */ sleep(300); } } } if (PQstatus(primaryConn) != CONNECTION_OK) { fprintf(stderr, "\n%s: We couldn't reconnect for long enough, exiting...\n", progname); exit(1); } /* Check if we still are a standby, we could have been promoted */ if (!is_standby(myLocalConn)) { fprintf(stderr, "\n%s: seems like we have been promoted, so exit from monitoring...\n", progname); CloseConnections(); exit(1); } /* * first check if there is a command being executed, * and if that is the case, cancel the query so i can * insert the current record */ if (PQisBusy(primaryConn) == 1) CancelQuery(); /* Get local xlog info */ sprintf(sqlquery, "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " "pg_last_xlog_replay_location()"); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn)); PQclear(res); /* if there is any error just let it be and retry in next loop */ return; } strcpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0)); strcpy(last_wal_standby_received , PQgetvalue(res, 0, 1)); strcpy(last_wal_standby_applied , PQgetvalue(res, 0, 2)); PQclear(res); /* Get primary xlog info */ sprintf(sqlquery, "SELECT pg_current_xlog_location() "); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(primaryConn)); PQclear(res); return; } strcpy(last_wal_primary_location, PQgetvalue(res, 0, 0)); PQclear(res); /* Calculate the lag */ lsn_primary = walLocationToBytes(last_wal_primary_location); lsn_standby_received = walLocationToBytes(last_wal_standby_received); lsn_standby_applied = walLocationToBytes(last_wal_standby_applied); /* * Build the SQL to execute on primary */ sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_monitor " "VALUES(%d, %d, '%s'::timestamp with time zone, " " '%s', '%s', " " %lld, %lld)", myClusterName, primaryId, myLocalId, monitor_standby_timestamp, last_wal_primary_location, last_wal_standby_received, (lsn_primary - lsn_standby_received), (lsn_standby_received - lsn_standby_applied)); /* * Execute the query asynchronously, but don't check for a result. We * will check the result next time we pause for a monitor step. */ if (PQsendQuery(primaryConn, sqlquery) == 0) fprintf(stderr, "Query could not be sent to primary. %s\n", PQerrorMessage(primaryConn)); }