/* * count_remote_temp_table_rows just returns the integer count of rows in the * table created by initialize_remote_temp_table. If no such table exists, this * function emits a warning and returns -1. */ Datum count_remote_temp_table_rows(PG_FUNCTION_ARGS) { char *nodeName = PG_GETARG_CSTRING(0); int32 nodePort = PG_GETARG_INT32(1); Datum count = Int32GetDatum(-1); PGresult *result = NULL; PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { PG_RETURN_DATUM(count); } result = PQexec(connection, COUNT_TEMP_TABLE); if (PQresultStatus(result) != PGRES_TUPLES_OK) { WarnRemoteError(connection, result); } else { char *countText = PQgetvalue(result, 0, 0); count = ExtractIntegerDatum(countText); } PQclear(result); PG_RETURN_DATUM(count); }
/* * PrepareRemoteTransactions prepares all transactions on connections in * connectionList for commit if the 2PC commit protocol is enabled. * On failure, it reports an error and stops. */ void PrepareRemoteTransactions(List *connectionList) { ListCell *connectionCell = NULL; foreach(connectionCell, connectionList) { TransactionConnection *transactionConnection = (TransactionConnection *) lfirst(connectionCell); PGconn *connection = transactionConnection->connection; int64 connectionId = transactionConnection->connectionId; PGresult *result = NULL; StringInfo command = makeStringInfo(); StringInfo transactionName = BuildTransactionName(connectionId); appendStringInfo(command, "PREPARE TRANSACTION '%s'", transactionName->data); result = PQexec(connection, command->data); if (PQresultStatus(result) != PGRES_COMMAND_OK) { /* a failure to prepare is an implicit rollback */ transactionConnection->transactionState = TRANSACTION_STATE_CLOSED; WarnRemoteError(connection, result); PQclear(result); ereport(ERROR, (errcode(ERRCODE_IO_ERROR), errmsg("failed to prepare transaction"))); } PQclear(result); transactionConnection->transactionState = TRANSACTION_STATE_PREPARED; }
/* * MultiClientBatchResult returns results for a "batch" of queries, meaning a * string containing multiple select statements separated by semicolons. This * function should be called multiple times to retrieve the results for all the * queries, until CLIENT_BATCH_QUERY_DONE is returned (even if a failure occurs). * If a query in the batch fails, the remaining queries will not be executed. On * success, queryResult, rowCount and columnCount will be set to the appropriate * values. After use, queryResult should be cleared using ClientClearResult. */ BatchQueryStatus MultiClientBatchResult(int32 connectionId, void **queryResult, int *rowCount, int *columnCount) { PGconn *connection = NULL; PGresult *result = NULL; ConnStatusType connStatusType = CONNECTION_OK; ExecStatusType resultStatus = PGRES_COMMAND_OK; BatchQueryStatus queryStatus = CLIENT_INVALID_BATCH_QUERY; Assert(connectionId != INVALID_CONNECTION_ID); connection = ClientConnectionArray[connectionId]; Assert(connection != NULL); /* set default result */ (*queryResult) = NULL; (*rowCount) = -1; (*columnCount) = -1; connStatusType = PQstatus(connection); if (connStatusType == CONNECTION_BAD) { ereport(WARNING, (errmsg("could not maintain connection to worker node"))); return CLIENT_BATCH_QUERY_FAILED; } result = PQgetResult(connection); if (result == NULL) { return CLIENT_BATCH_QUERY_DONE; } resultStatus = PQresultStatus(result); if (resultStatus == PGRES_TUPLES_OK) { (*queryResult) = (void **) result; (*rowCount) = PQntuples(result); (*columnCount) = PQnfields(result); queryStatus = CLIENT_BATCH_QUERY_CONTINUE; } else if (resultStatus == PGRES_COMMAND_OK) { (*queryResult) = (void **) result; queryStatus = CLIENT_BATCH_QUERY_CONTINUE; } else { WarnRemoteError(connection, result); PQclear(result); queryStatus = CLIENT_BATCH_QUERY_FAILED; } return queryStatus; }
/* MultiClientConnectPoll returns the status of client connection. */ ConnectStatus MultiClientConnectPoll(int32 connectionId) { PGconn *connection = NULL; PostgresPollingStatusType pollingStatus = PGRES_POLLING_OK; ConnectStatus connectStatus = CLIENT_INVALID_CONNECT; Assert(connectionId != INVALID_CONNECTION_ID); connection = ClientConnectionArray[connectionId]; Assert(connection != NULL); pollingStatus = ClientPollingStatusArray[connectionId]; if (pollingStatus == PGRES_POLLING_OK) { connectStatus = CLIENT_CONNECTION_READY; } else if (pollingStatus == PGRES_POLLING_READING) { bool readReady = ClientConnectionReady(connection, PGRES_POLLING_READING); if (readReady) { ClientPollingStatusArray[connectionId] = PQconnectPoll(connection); connectStatus = CLIENT_CONNECTION_BUSY; } else { connectStatus = CLIENT_CONNECTION_BUSY_READ; } } else if (pollingStatus == PGRES_POLLING_WRITING) { bool writeReady = ClientConnectionReady(connection, PGRES_POLLING_WRITING); if (writeReady) { ClientPollingStatusArray[connectionId] = PQconnectPoll(connection); connectStatus = CLIENT_CONNECTION_BUSY; } else { connectStatus = CLIENT_CONNECTION_BUSY_WRITE; } } else if (pollingStatus == PGRES_POLLING_FAILED) { WarnRemoteError(connection, NULL); connectStatus = CLIENT_CONNECTION_BAD; } return connectStatus; }
/* * MultiClientConnectStart asynchronously tries to establish a connection. If it * succeeds, it returns the connection id. Otherwise, it reports connection * error and returns INVALID_CONNECTION_ID. */ int32 MultiClientConnectStart(const char *nodeName, uint32 nodePort, const char *nodeDatabase) { PGconn *connection = NULL; char connInfoString[STRING_BUFFER_SIZE]; ConnStatusType connStatusType = CONNECTION_BAD; char *userName = CurrentUserName(); int32 connectionId = AllocateConnectionId(); if (connectionId == INVALID_CONNECTION_ID) { ereport(WARNING, (errmsg("could not allocate connection in connection pool"))); return connectionId; } if (XactModificationLevel > XACT_MODIFICATION_NONE) { ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("cannot open new connections after the first modification " "command within a transaction"))); } /* transcribe connection paremeters to string */ snprintf(connInfoString, STRING_BUFFER_SIZE, CONN_INFO_TEMPLATE, nodeName, nodePort, nodeDatabase, userName, CLIENT_CONNECT_TIMEOUT); /* prepare asynchronous request for worker node connection */ connection = PQconnectStart(connInfoString); connStatusType = PQstatus(connection); /* * If prepared, we save the connection, and set its initial polling status * to PGRES_POLLING_WRITING as specified in "Database Connection Control * Functions" section of the PostgreSQL documentation. */ if (connStatusType != CONNECTION_BAD) { ClientConnectionArray[connectionId] = connection; ClientPollingStatusArray[connectionId] = PGRES_POLLING_WRITING; } else { WarnRemoteError(connection, NULL); PQfinish(connection); connectionId = INVALID_CONNECTION_ID; } return connectionId; }
/* * ConnectToNode opens a connection to a remote PostgreSQL server. The function * configures the connection's fallback application name to 'citus' and sets * the remote encoding to match the local one. All parameters are required to * be non NULL. * * We attempt to connect up to MAX_CONNECT_ATTEMPT times. After that we give up * and return NULL. */ PGconn * ConnectToNode(char *nodeName, int32 nodePort, char *nodeUser) { PGconn *connection = NULL; const char *clientEncoding = GetDatabaseEncodingName(); const char *dbname = get_database_name(MyDatabaseId); int attemptIndex = 0; const char *keywordArray[] = { "host", "port", "fallback_application_name", "client_encoding", "connect_timeout", "dbname", "user", NULL }; char nodePortString[12]; const char *valueArray[] = { nodeName, nodePortString, "citus", clientEncoding, CLIENT_CONNECT_TIMEOUT_SECONDS, dbname, nodeUser, NULL }; sprintf(nodePortString, "%d", nodePort); Assert(sizeof(keywordArray) == sizeof(valueArray)); for (attemptIndex = 0; attemptIndex < MAX_CONNECT_ATTEMPTS; attemptIndex++) { connection = PQconnectdbParams(keywordArray, valueArray, false); if (PQstatus(connection) == CONNECTION_OK) { break; } else { /* warn if still erroring on final attempt */ if (attemptIndex == MAX_CONNECT_ATTEMPTS - 1) { WarnRemoteError(connection, NULL); } PQfinish(connection); connection = NULL; } } return connection; }
/* MultiClientQueryResult gets results for an asynchronous query. */ bool MultiClientQueryResult(int32 connectionId, void **queryResult, int *rowCount, int *columnCount) { PGconn *connection = NULL; PGresult *result = NULL; ConnStatusType connStatusType = CONNECTION_OK; ExecStatusType resultStatus = PGRES_COMMAND_OK; Assert(connectionId != INVALID_CONNECTION_ID); connection = ClientConnectionArray[connectionId]; Assert(connection != NULL); connStatusType = PQstatus(connection); if (connStatusType == CONNECTION_BAD) { ereport(WARNING, (errmsg("could not maintain connection to worker node"))); return false; } result = PQgetResult(connection); resultStatus = PQresultStatus(result); if (resultStatus == PGRES_TUPLES_OK) { (*queryResult) = (void **) result; (*rowCount) = PQntuples(result); (*columnCount) = PQnfields(result); } else { WarnRemoteError(connection, result); PQclear(result); return false; } /* clear extra result objects */ ClearRemainingResults(connection); return true; }
/* * initialize_remote_temp_table connects to a specified host on a specified * port and creates a temporary table with 100 rows. Because the table is * temporary, it will be visible if a connection is reused but not if a new * connection is opened to the node. */ Datum initialize_remote_temp_table(PG_FUNCTION_ARGS) { char *nodeName = PG_GETARG_CSTRING(0); int32 nodePort = PG_GETARG_INT32(1); PGresult *result = NULL; PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { PG_RETURN_BOOL(false); } result = PQexec(connection, POPULATE_TEMP_TABLE); if (PQresultStatus(result) != PGRES_COMMAND_OK) { WarnRemoteError(connection, result); } PQclear(result); PG_RETURN_BOOL(true); }
/* * MultiClientConnect synchronously tries to establish a connection. If it * succeeds, it returns the connection id. Otherwise, it reports connection * error and returns INVALID_CONNECTION_ID. * * nodeDatabase and userName can be NULL, in which case values from the * current session are used. */ int32 MultiClientConnect(const char *nodeName, uint32 nodePort, const char *nodeDatabase, const char *userName) { PGconn *connection = NULL; char connInfoString[STRING_BUFFER_SIZE]; ConnStatusType connStatusType = CONNECTION_OK; int32 connectionId = AllocateConnectionId(); char *effectiveDatabaseName = NULL; char *effectiveUserName = NULL; if (XactModificationLevel > XACT_MODIFICATION_NONE) { ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("cannot open new connections after the first modification " "command within a transaction"))); } if (connectionId == INVALID_CONNECTION_ID) { ereport(WARNING, (errmsg("could not allocate connection in connection pool"))); return connectionId; } if (nodeDatabase == NULL) { effectiveDatabaseName = get_database_name(MyDatabaseId); } else { effectiveDatabaseName = pstrdup(nodeDatabase); } if (userName == NULL) { effectiveUserName = CurrentUserName(); } else { effectiveUserName = pstrdup(userName); } /* * FIXME: This code is bad on several levels. It completely forgoes any * escaping, it misses setting a number of parameters, it works with a * limited string size without erroring when it's too long. We shouldn't * even build a query string this way, there's PQconnectdbParams()! */ /* transcribe connection paremeters to string */ snprintf(connInfoString, STRING_BUFFER_SIZE, CONN_INFO_TEMPLATE, nodeName, nodePort, effectiveDatabaseName, effectiveUserName, CLIENT_CONNECT_TIMEOUT); /* establish synchronous connection to worker node */ connection = PQconnectdb(connInfoString); connStatusType = PQstatus(connection); if (connStatusType == CONNECTION_OK) { ClientConnectionArray[connectionId] = connection; } else { WarnRemoteError(connection, NULL); PQfinish(connection); connectionId = INVALID_CONNECTION_ID; } pfree(effectiveDatabaseName); pfree(effectiveUserName); return connectionId; }
/* MultiClientCopyData copies data from the file. */ CopyStatus MultiClientCopyData(int32 connectionId, int32 fileDescriptor) { PGconn *connection = NULL; char *receiveBuffer = NULL; int consumed = 0; int receiveLength = 0; const int asynchronous = 1; CopyStatus copyStatus = CLIENT_INVALID_COPY; Assert(connectionId != INVALID_CONNECTION_ID); connection = ClientConnectionArray[connectionId]; Assert(connection != NULL); /* * Consume input to handle the case where previous copy operation might have * received zero bytes. */ consumed = PQconsumeInput(connection); if (consumed == 0) { ereport(WARNING, (errmsg("could not read data from worker node"))); return CLIENT_COPY_FAILED; } /* receive copy data message in an asynchronous manner */ receiveLength = PQgetCopyData(connection, &receiveBuffer, asynchronous); while (receiveLength > 0) { /* received copy data; append these data to file */ int appended = -1; errno = 0; appended = write(fileDescriptor, receiveBuffer, receiveLength); if (appended != receiveLength) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) { errno = ENOSPC; } ereport(FATAL, (errcode_for_file_access(), errmsg("could not append to copied file: %m"))); } PQfreemem(receiveBuffer); receiveLength = PQgetCopyData(connection, &receiveBuffer, asynchronous); } /* we now check the last received length returned by copy data */ if (receiveLength == 0) { /* we cannot read more data without blocking */ copyStatus = CLIENT_COPY_MORE; } else if (receiveLength == -1) { /* received copy done message */ PGresult *result = PQgetResult(connection); ExecStatusType resultStatus = PQresultStatus(result); if (resultStatus == PGRES_COMMAND_OK) { copyStatus = CLIENT_COPY_DONE; } else { copyStatus = CLIENT_COPY_FAILED; WarnRemoteError(connection, result); } PQclear(result); } else if (receiveLength == -2) { /* received an error */ copyStatus = CLIENT_COPY_FAILED; WarnRemoteError(connection, NULL); } /* if copy out completed, make sure we drain all results from libpq */ if (receiveLength < 0) { ClearRemainingResults(connection); } return copyStatus; }
/* MultiClientQueryStatus returns the query status. */ QueryStatus MultiClientQueryStatus(int32 connectionId) { PGconn *connection = NULL; PGresult *result = NULL; int tupleCount PG_USED_FOR_ASSERTS_ONLY = 0; bool copyResults = false; ConnStatusType connStatusType = CONNECTION_OK; ExecStatusType resultStatus = PGRES_COMMAND_OK; QueryStatus queryStatus = CLIENT_INVALID_QUERY; Assert(connectionId != INVALID_CONNECTION_ID); connection = ClientConnectionArray[connectionId]; Assert(connection != NULL); connStatusType = PQstatus(connection); if (connStatusType == CONNECTION_BAD) { ereport(WARNING, (errmsg("could not maintain connection to worker node"))); return CLIENT_QUERY_FAILED; } /* * We now read the result object and check its status. If the result object * isn't ready yet (the caller didn't wait for the connection to be ready), * we will block on this call. */ result = PQgetResult(connection); resultStatus = PQresultStatus(result); if (resultStatus == PGRES_COMMAND_OK) { queryStatus = CLIENT_QUERY_DONE; } else if (resultStatus == PGRES_TUPLES_OK) { queryStatus = CLIENT_QUERY_DONE; /* * We use the client executor to only issue a select query that returns * a void value. We therefore should not have more than one value here. */ tupleCount = PQntuples(result); Assert(tupleCount <= 1); } else if (resultStatus == PGRES_COPY_OUT) { queryStatus = CLIENT_QUERY_COPY; copyResults = true; } else { queryStatus = CLIENT_QUERY_FAILED; if (resultStatus == PGRES_COPY_IN) { copyResults = true; } WarnRemoteError(connection, result); } /* clear the result object */ PQclear(result); /* * When using the async query mechanism, we need to keep reading results * until we get null. The exception to this rule is the copy protocol. */ if (!copyResults) { ClearRemainingResults(connection); } return queryStatus; }