/* * DropShards drops all given shards in a relation. The id, name and schema * for the relation are explicitly provided, since this function may be * called when the table is already dropped. * * We mark shard placements that we couldn't drop as to be deleted later, but * we do delete the shard metadadata. */ static int DropShards(Oid relationId, char *schemaName, char *relationName, List *deletableShardIntervalList) { ListCell *shardIntervalCell = NULL; int droppedShardCount = 0; BeginOrContinueCoordinatedTransaction(); /* At this point we intentionally decided to not use 2PC for reference tables */ if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC) { CoordinatedTransactionUse2PC(); } foreach(shardIntervalCell, deletableShardIntervalList) { List *shardPlacementList = NIL; ListCell *shardPlacementCell = NULL; ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell); uint64 shardId = shardInterval->shardId; char *quotedShardName = NULL; char *shardRelationName = pstrdup(relationName); Assert(shardInterval->relationId == relationId); /* Build shard relation name. */ AppendShardIdToName(&shardRelationName, shardId); quotedShardName = quote_qualified_identifier(schemaName, shardRelationName); shardPlacementList = ShardPlacementList(shardId); foreach(shardPlacementCell, shardPlacementList) { ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell); char *workerName = shardPlacement->nodeName; uint32 workerPort = shardPlacement->nodePort; StringInfo workerDropQuery = makeStringInfo(); MultiConnection *connection = NULL; uint32 connectionFlags = FOR_DDL; char storageType = shardInterval->storageType; if (storageType == SHARD_STORAGE_TABLE) { appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND, quotedShardName); } else if (storageType == SHARD_STORAGE_COLUMNAR || storageType == SHARD_STORAGE_FOREIGN) { appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND, quotedShardName); } connection = GetPlacementConnection(connectionFlags, shardPlacement, NULL); RemoteTransactionBeginIfNecessary(connection); if (PQstatus(connection->pgConn) != CONNECTION_OK) { uint64 placementId = shardPlacement->placementId; ereport(WARNING, (errmsg("could not connect to shard \"%s\" on node " "\"%s:%u\"", shardRelationName, workerName, workerPort), errdetail("Marking this shard placement for " "deletion"))); UpdateShardPlacementState(placementId, FILE_TO_DELETE); continue; } MarkRemoteTransactionCritical(connection); ExecuteCriticalRemoteCommand(connection, workerDropQuery->data); DeleteShardPlacementRow(shardPlacement->placementId); }
/* * OpenTransactionsToAllShardPlacements opens connections to all placements * using the provided shard identifier list and returns it as a shard ID -> * ShardConnections hash. connectionFlags can be used to specify whether * the command is FOR_DML or FOR_DDL. */ HTAB * OpenTransactionsToAllShardPlacements(List *shardIntervalList, int connectionFlags) { HTAB *shardConnectionHash = NULL; ListCell *shardIntervalCell = NULL; List *newConnectionList = NIL; shardConnectionHash = CreateShardConnectionHash(CurrentMemoryContext); /* open connections to shards which don't have connections yet */ foreach(shardIntervalCell, shardIntervalList) { ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell); uint64 shardId = shardInterval->shardId; ShardConnections *shardConnections = NULL; bool shardConnectionsFound = false; List *shardPlacementList = NIL; ListCell *placementCell = NULL; shardConnections = GetShardHashConnections(shardConnectionHash, shardId, &shardConnectionsFound); if (shardConnectionsFound) { continue; } shardPlacementList = FinalizedShardPlacementList(shardId); if (shardPlacementList == NIL) { /* going to have to have some placements to do any work */ ereport(ERROR, (errmsg("could not find any shard placements for the shard " UINT64_FORMAT, shardId))); } foreach(placementCell, shardPlacementList) { ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(placementCell); MultiConnection *connection = NULL; WorkerNode *workerNode = FindWorkerNode(shardPlacement->nodeName, shardPlacement->nodePort); if (workerNode == NULL) { ereport(ERROR, (errmsg("could not find worker node %s:%d", shardPlacement->nodeName, shardPlacement->nodePort))); } connection = StartPlacementConnection(connectionFlags, shardPlacement, NULL); ClaimConnectionExclusively(connection); shardConnections->connectionList = lappend(shardConnections->connectionList, connection); newConnectionList = lappend(newConnectionList, connection); /* * Every individual failure should cause entire distributed * transaction to fail. */ MarkRemoteTransactionCritical(connection); }
/* * OpenTransactionsForAllTasks opens a connection for each task, * taking into account which shards are read and modified by the task * to select the appopriate connection, or error out if no appropriate * connection can be found. The set of connections is returned as an * anchor shard ID -> ShardConnections hash. */ HTAB * OpenTransactionsForAllTasks(List *taskList, int connectionFlags) { HTAB *shardConnectionHash = NULL; ListCell *taskCell = NULL; List *newConnectionList = NIL; shardConnectionHash = CreateShardConnectionHash(CurrentMemoryContext); connectionFlags |= CONNECTION_PER_PLACEMENT; /* open connections to shards which don't have connections yet */ foreach(taskCell, taskList) { Task *task = (Task *) lfirst(taskCell); ShardPlacementAccessType accessType = PLACEMENT_ACCESS_SELECT; uint64 shardId = task->anchorShardId; ShardConnections *shardConnections = NULL; bool shardConnectionsFound = false; List *shardPlacementList = NIL; ListCell *placementCell = NULL; shardConnections = GetShardHashConnections(shardConnectionHash, shardId, &shardConnectionsFound); if (shardConnectionsFound) { continue; } shardPlacementList = FinalizedShardPlacementList(shardId); if (shardPlacementList == NIL) { /* going to have to have some placements to do any work */ ereport(ERROR, (errmsg("could not find any shard placements for the shard " UINT64_FORMAT, shardId))); } if (task->taskType == MODIFY_TASK) { accessType = PLACEMENT_ACCESS_DML; } else { /* can only open connections for DDL and DML commands */ Assert(task->taskType == DDL_TASK || VACUUM_ANALYZE_TASK); accessType = PLACEMENT_ACCESS_DDL; } foreach(placementCell, shardPlacementList) { ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(placementCell); ShardPlacementAccess placementModification; List *placementAccessList = NIL; MultiConnection *connection = NULL; WorkerNode *workerNode = FindWorkerNode(shardPlacement->nodeName, shardPlacement->nodePort); if (workerNode == NULL) { ereport(ERROR, (errmsg("could not find worker node %s:%d", shardPlacement->nodeName, shardPlacement->nodePort))); } /* add placement access for modification */ placementModification.placement = shardPlacement; placementModification.accessType = accessType; placementAccessList = lappend(placementAccessList, &placementModification); if (accessType == PLACEMENT_ACCESS_DDL) { List *placementDDLList = BuildPlacementDDLList(shardPlacement->groupId, task->relationShardList); /* * All relations appearing inter-shard DDL commands should be marked * with DDL access. */ placementAccessList = list_concat(placementAccessList, placementDDLList); } else { List *placementSelectList = BuildPlacementSelectList(shardPlacement->groupId, task->relationShardList); /* add additional placement accesses for subselects (e.g. INSERT .. SELECT) */ placementAccessList = list_concat(placementAccessList, placementSelectList); } /* * Find a connection that sees preceding writes and cannot self-deadlock, * or error out if no such connection exists. */ connection = StartPlacementListConnection(connectionFlags, placementAccessList, NULL); ClaimConnectionExclusively(connection); shardConnections->connectionList = lappend(shardConnections->connectionList, connection); newConnectionList = lappend(newConnectionList, connection); /* * Every individual failure should cause entire distributed * transaction to fail. */ MarkRemoteTransactionCritical(connection); }