/* * ShardLength finds shard placements for the given shardId, extracts the length * of a finalized shard, and returns the shard's length. This function errors * out if we cannot find any finalized shard placements for the given shardId. */ uint64 ShardLength(uint64 shardId) { uint64 shardLength = 0; List *shardPlacementList = FinalizedShardPlacementList(shardId); if (shardPlacementList == NIL) { ereport(ERROR, (errmsg("could not find length of shard " UINT64_FORMAT, shardId), errdetail("Could not find any shard placements for the shard."))); } else { ShardPlacement *shardPlacement = (ShardPlacement *) linitial(shardPlacementList); shardLength = shardPlacement->shardLength; } return shardLength; }
/* * master_append_table_to_shard appends the given table's contents to the given * shard, and updates shard metadata on the master node. If the function fails * to append table data to all shard placements, it doesn't update any metadata * and errors out. Else if the function fails to append table data to some of * the shard placements, it marks those placements as invalid. These invalid * placements will get cleaned up during shard rebalancing. */ Datum master_append_table_to_shard(PG_FUNCTION_ARGS) { uint64 shardId = PG_GETARG_INT64(0); text *sourceTableNameText = PG_GETARG_TEXT_P(1); text *sourceNodeNameText = PG_GETARG_TEXT_P(2); uint32 sourceNodePort = PG_GETARG_UINT32(3); char *sourceTableName = text_to_cstring(sourceTableNameText); char *sourceNodeName = text_to_cstring(sourceNodeNameText); char *shardName = NULL; List *shardPlacementList = NIL; List *succeededPlacementList = NIL; List *failedPlacementList = NIL; ListCell *shardPlacementCell = NULL; ListCell *failedPlacementCell = NULL; uint64 newShardSize = 0; uint64 shardMaxSizeInBytes = 0; float4 shardFillLevel = 0.0; char partitionMethod = 0; ShardInterval *shardInterval = LoadShardInterval(shardId); Oid relationId = shardInterval->relationId; bool cstoreTable = CStoreTable(relationId); char storageType = shardInterval->storageType; EnsureTablePermissions(relationId, ACL_INSERT); if (storageType != SHARD_STORAGE_TABLE && !cstoreTable) { ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId), errdetail("The underlying shard is not a regular table"))); } partitionMethod = PartitionMethod(relationId); if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId), errdetail("We currently don't support appending to shards " "in hash-partitioned tables"))); } /* * We lock on the shardId, but do not unlock. When the function returns, and * the transaction for this function commits, this lock will automatically * be released. This ensures appends to a shard happen in a serial manner. */ LockShardResource(shardId, AccessExclusiveLock); /* if shard doesn't have an alias, extend regular table name */ shardName = LoadShardAlias(relationId, shardId); if (shardName == NULL) { shardName = get_rel_name(relationId); AppendShardIdToName(&shardName, shardId); } shardPlacementList = FinalizedShardPlacementList(shardId); if (shardPlacementList == NIL) { ereport(ERROR, (errmsg("could not find any shard placements for shardId " UINT64_FORMAT, shardId), errhint("Try running master_create_empty_shard() first"))); } /* issue command to append table to each shard placement */ foreach(shardPlacementCell, shardPlacementList) { ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell); char *workerName = shardPlacement->nodeName; uint32 workerPort = shardPlacement->nodePort; List *queryResultList = NIL; StringInfo workerAppendQuery = makeStringInfo(); appendStringInfo(workerAppendQuery, WORKER_APPEND_TABLE_TO_SHARD, quote_literal_cstr(shardName), quote_literal_cstr(sourceTableName), quote_literal_cstr(sourceNodeName), sourceNodePort); /* inserting data should be performed by the current user */ queryResultList = ExecuteRemoteQuery(workerName, workerPort, NULL, workerAppendQuery); if (queryResultList != NIL) { succeededPlacementList = lappend(succeededPlacementList, shardPlacement); } else { failedPlacementList = lappend(failedPlacementList, shardPlacement); } }
/* * OpenTransactionsToAllShardPlacements opens connections to all placements * using the provided shard identifier list and returns it as a shard ID -> * ShardConnections hash. connectionFlags can be used to specify whether * the command is FOR_DML or FOR_DDL. */ HTAB * OpenTransactionsToAllShardPlacements(List *shardIntervalList, int connectionFlags) { HTAB *shardConnectionHash = NULL; ListCell *shardIntervalCell = NULL; List *newConnectionList = NIL; shardConnectionHash = CreateShardConnectionHash(CurrentMemoryContext); /* open connections to shards which don't have connections yet */ foreach(shardIntervalCell, shardIntervalList) { ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell); uint64 shardId = shardInterval->shardId; ShardConnections *shardConnections = NULL; bool shardConnectionsFound = false; List *shardPlacementList = NIL; ListCell *placementCell = NULL; shardConnections = GetShardHashConnections(shardConnectionHash, shardId, &shardConnectionsFound); if (shardConnectionsFound) { continue; } shardPlacementList = FinalizedShardPlacementList(shardId); if (shardPlacementList == NIL) { /* going to have to have some placements to do any work */ ereport(ERROR, (errmsg("could not find any shard placements for the shard " UINT64_FORMAT, shardId))); } foreach(placementCell, shardPlacementList) { ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(placementCell); MultiConnection *connection = NULL; WorkerNode *workerNode = FindWorkerNode(shardPlacement->nodeName, shardPlacement->nodePort); if (workerNode == NULL) { ereport(ERROR, (errmsg("could not find worker node %s:%d", shardPlacement->nodeName, shardPlacement->nodePort))); } connection = StartPlacementConnection(connectionFlags, shardPlacement, NULL); ClaimConnectionExclusively(connection); shardConnections->connectionList = lappend(shardConnections->connectionList, connection); newConnectionList = lappend(newConnectionList, connection); /* * Every individual failure should cause entire distributed * transaction to fail. */ MarkRemoteTransactionCritical(connection); }
/* * OpenTransactionsForAllTasks opens a connection for each task, * taking into account which shards are read and modified by the task * to select the appopriate connection, or error out if no appropriate * connection can be found. The set of connections is returned as an * anchor shard ID -> ShardConnections hash. */ HTAB * OpenTransactionsForAllTasks(List *taskList, int connectionFlags) { HTAB *shardConnectionHash = NULL; ListCell *taskCell = NULL; List *newConnectionList = NIL; shardConnectionHash = CreateShardConnectionHash(CurrentMemoryContext); connectionFlags |= CONNECTION_PER_PLACEMENT; /* open connections to shards which don't have connections yet */ foreach(taskCell, taskList) { Task *task = (Task *) lfirst(taskCell); ShardPlacementAccessType accessType = PLACEMENT_ACCESS_SELECT; uint64 shardId = task->anchorShardId; ShardConnections *shardConnections = NULL; bool shardConnectionsFound = false; List *shardPlacementList = NIL; ListCell *placementCell = NULL; shardConnections = GetShardHashConnections(shardConnectionHash, shardId, &shardConnectionsFound); if (shardConnectionsFound) { continue; } shardPlacementList = FinalizedShardPlacementList(shardId); if (shardPlacementList == NIL) { /* going to have to have some placements to do any work */ ereport(ERROR, (errmsg("could not find any shard placements for the shard " UINT64_FORMAT, shardId))); } if (task->taskType == MODIFY_TASK) { accessType = PLACEMENT_ACCESS_DML; } else { /* can only open connections for DDL and DML commands */ Assert(task->taskType == DDL_TASK || VACUUM_ANALYZE_TASK); accessType = PLACEMENT_ACCESS_DDL; } foreach(placementCell, shardPlacementList) { ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(placementCell); ShardPlacementAccess placementModification; List *placementAccessList = NIL; MultiConnection *connection = NULL; WorkerNode *workerNode = FindWorkerNode(shardPlacement->nodeName, shardPlacement->nodePort); if (workerNode == NULL) { ereport(ERROR, (errmsg("could not find worker node %s:%d", shardPlacement->nodeName, shardPlacement->nodePort))); } /* add placement access for modification */ placementModification.placement = shardPlacement; placementModification.accessType = accessType; placementAccessList = lappend(placementAccessList, &placementModification); if (accessType == PLACEMENT_ACCESS_DDL) { List *placementDDLList = BuildPlacementDDLList(shardPlacement->groupId, task->relationShardList); /* * All relations appearing inter-shard DDL commands should be marked * with DDL access. */ placementAccessList = list_concat(placementAccessList, placementDDLList); } else { List *placementSelectList = BuildPlacementSelectList(shardPlacement->groupId, task->relationShardList); /* add additional placement accesses for subselects (e.g. INSERT .. SELECT) */ placementAccessList = list_concat(placementAccessList, placementSelectList); } /* * Find a connection that sees preceding writes and cannot self-deadlock, * or error out if no such connection exists. */ connection = StartPlacementListConnection(connectionFlags, placementAccessList, NULL); ClaimConnectionExclusively(connection); shardConnections->connectionList = lappend(shardConnections->connectionList, connection); newConnectionList = lappend(newConnectionList, connection); /* * Every individual failure should cause entire distributed * transaction to fail. */ MarkRemoteTransactionCritical(connection); }