/* * count_remote_temp_table_rows just returns the integer count of rows in the * table created by initialize_remote_temp_table. If no such table exists, this * function emits a warning and returns -1. */ Datum count_remote_temp_table_rows(PG_FUNCTION_ARGS) { char *nodeName = PG_GETARG_CSTRING(0); int32 nodePort = PG_GETARG_INT32(1); Datum count = Int32GetDatum(-1); PGresult *result = NULL; PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { PG_RETURN_DATUM(count); } result = PQexec(connection, COUNT_TEMP_TABLE); if (PQresultStatus(result) != PGRES_TUPLES_OK) { WarnRemoteError(connection, result); } else { char *countText = PQgetvalue(result, 0, 0); count = ExtractIntegerDatum(countText); } PQclear(result); PG_RETURN_DATUM(count); }
/* * set_connection_status_bad does not remove the given connection from the connection hash. * It simply shuts down the underlying socket. On success, it returns true. */ Datum set_connection_status_bad(PG_FUNCTION_ARGS) { char *nodeName = PG_GETARG_CSTRING(0); int32 nodePort = PG_GETARG_INT32(1); int socket = -1; int shutdownStatus = 0; int pqStatus PG_USED_FOR_ASSERTS_ONLY = 0; PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { PG_RETURN_BOOL(false); } /* Prevent further reads/writes... */ socket = PQsocket(connection); shutdownStatus = shutdown(socket, SHUT_RDWR); if (shutdownStatus != 0) { ereport(ERROR, (errcode_for_socket_access(), errmsg("shutdown failed"))); } /* ... and make libpq notice by reading data. */ pqStatus = PQconsumeInput(connection); Assert(pqStatus == 0); /* expect failure */ PG_RETURN_BOOL(true); }
/* * get_and_purge_connection first gets a connection using the provided hostname * and port before immediately passing that connection to PurgeConnection. * Simply a wrapper around PurgeConnection that uses hostname/port rather than * PGconn. */ Datum get_and_purge_connection(PG_FUNCTION_ARGS) { char *nodeName = PG_GETARG_CSTRING(0); int32 nodePort = PG_GETARG_INT32(1); PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { PG_RETURN_BOOL(false); } PurgeConnection(connection); PG_RETURN_BOOL(true); }
/* * initialize_remote_temp_table connects to a specified host on a specified * port and creates a temporary table with 100 rows. Because the table is * temporary, it will be visible if a connection is reused but not if a new * connection is opened to the node. */ Datum initialize_remote_temp_table(PG_FUNCTION_ARGS) { char *nodeName = PG_GETARG_CSTRING(0); int32 nodePort = PG_GETARG_INT32(1); PGresult *result = NULL; PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { PG_RETURN_BOOL(false); } result = PQexec(connection, POPULATE_TEMP_TABLE); if (PQresultStatus(result) != PGRES_COMMAND_OK) { WarnRemoteError(connection, result); } PQclear(result); PG_RETURN_BOOL(true); }
/* * ExecuteTaskAndStoreResults executes the task on the remote node, retrieves * the results and stores them, if SELECT or RETURNING is used, in a tuple * store. * * If the task fails on one of the placements, the function retries it on * other placements (SELECT), reraises the remote error (constraint violation * in DML), marks the affected placement as invalid (DML on some placement * failed), or errors out (DML failed on all placements). */ static bool ExecuteTaskAndStoreResults(QueryDesc *queryDesc, Task *task, bool isModificationQuery, bool expectResults) { TupleDesc tupleDescriptor = queryDesc->tupDesc; EState *executorState = queryDesc->estate; MaterialState *routerState = (MaterialState *) queryDesc->planstate; bool resultsOK = false; List *taskPlacementList = task->taskPlacementList; ListCell *taskPlacementCell = NULL; List *failedPlacementList = NIL; ListCell *failedPlacementCell = NULL; int64 affectedTupleCount = -1; bool gotResults = false; /* * Try to run the query to completion on one placement. If the query fails * attempt the query on the next placement. */ foreach(taskPlacementCell, taskPlacementList) { ShardPlacement *taskPlacement = (ShardPlacement *) lfirst(taskPlacementCell); char *nodeName = taskPlacement->nodeName; int32 nodePort = taskPlacement->nodePort; bool queryOK = false; int64 currentAffectedTupleCount = 0; PGconn *connection = GetOrEstablishConnection(nodeName, nodePort); if (connection == NULL) { failedPlacementList = lappend(failedPlacementList, taskPlacement); continue; } queryOK = SendQueryInSingleRowMode(connection, task->queryString); if (!queryOK) { PurgeConnection(connection); failedPlacementList = lappend(failedPlacementList, taskPlacement); continue; } /* * If caller is interested, store query results the first time * through. The output of the query's execution on other shards is * discarded if we run there (because it's a modification query). */ if (!gotResults && expectResults) { queryOK = StoreQueryResult(routerState, connection, tupleDescriptor, ¤tAffectedTupleCount); } else { queryOK = ConsumeQueryResult(connection, ¤tAffectedTupleCount); } if (queryOK) { if ((affectedTupleCount == -1) || (affectedTupleCount == currentAffectedTupleCount)) { affectedTupleCount = currentAffectedTupleCount; } else { ereport(WARNING, (errmsg("modified "INT64_FORMAT " tuples, but expected " "to modify "INT64_FORMAT, currentAffectedTupleCount, affectedTupleCount), errdetail("modified placement on %s:%d", nodeName, nodePort))); } #if (PG_VERSION_NUM < 90600) /* before 9.6, PostgreSQL used a uint32 for this field, so check */ Assert(currentAffectedTupleCount <= 0xFFFFFFFF); #endif resultsOK = true; gotResults = true; /* * Modifications have to be executed on all placements, but for * read queries we can stop here. */ if (!isModificationQuery) { break; } } else { PurgeConnection(connection); failedPlacementList = lappend(failedPlacementList, taskPlacement); continue; } }