/*
 * master_append_table_to_shard appends the given table's contents to the given
 * shard, and updates shard metadata on the master node. If the function fails
 * to append table data to all shard placements, it doesn't update any metadata
 * and errors out. Else if the function fails to append table data to some of
 * the shard placements, it marks those placements as invalid. These invalid
 * placements will get cleaned up during shard rebalancing.
 */
Datum
master_append_table_to_shard(PG_FUNCTION_ARGS)
{
	uint64 shardId = PG_GETARG_INT64(0);
	text *sourceTableNameText = PG_GETARG_TEXT_P(1);
	text *sourceNodeNameText = PG_GETARG_TEXT_P(2);
	uint32 sourceNodePort = PG_GETARG_UINT32(3);
	char *sourceTableName = text_to_cstring(sourceTableNameText);
	char *sourceNodeName = text_to_cstring(sourceNodeNameText);

	char *shardName = NULL;
	List *shardPlacementList = NIL;
	List *succeededPlacementList = NIL;
	List *failedPlacementList = NIL;
	ListCell *shardPlacementCell = NULL;
	ListCell *failedPlacementCell = NULL;
	uint64 newShardSize = 0;
	uint64 shardMaxSizeInBytes = 0;
	float4 shardFillLevel = 0.0;
	char partitionMethod = 0;

	ShardInterval *shardInterval = LoadShardInterval(shardId);
	Oid relationId = shardInterval->relationId;
	bool cstoreTable = CStoreTable(relationId);

	char storageType = shardInterval->storageType;

	EnsureTablePermissions(relationId, ACL_INSERT);

	if (storageType != SHARD_STORAGE_TABLE && !cstoreTable)
	{
		ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId),
						errdetail("The underlying shard is not a regular table")));
	}

	partitionMethod = PartitionMethod(relationId);
	if (partitionMethod == DISTRIBUTE_BY_HASH)
	{
		ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId),
						errdetail("We currently don't support appending to shards "
								  "in hash-partitioned tables")));
	}

	/*
	 * We lock on the shardId, but do not unlock. When the function returns, and
	 * the transaction for this function commits, this lock will automatically
	 * be released. This ensures appends to a shard happen in a serial manner.
	 */
	LockShardResource(shardId, AccessExclusiveLock);

	/* if shard doesn't have an alias, extend regular table name */
	shardName = LoadShardAlias(relationId, shardId);
	if (shardName == NULL)
	{
		shardName = get_rel_name(relationId);
		AppendShardIdToName(&shardName, shardId);
	}

	shardPlacementList = FinalizedShardPlacementList(shardId);
	if (shardPlacementList == NIL)
	{
		ereport(ERROR, (errmsg("could not find any shard placements for shardId "
							   UINT64_FORMAT, shardId),
						errhint("Try running master_create_empty_shard() first")));
	}

	/* issue command to append table to each shard placement */
	foreach(shardPlacementCell, shardPlacementList)
	{
		ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell);
		char *workerName = shardPlacement->nodeName;
		uint32 workerPort = shardPlacement->nodePort;
		List *queryResultList = NIL;

		StringInfo workerAppendQuery = makeStringInfo();
		appendStringInfo(workerAppendQuery, WORKER_APPEND_TABLE_TO_SHARD,
						 quote_literal_cstr(shardName),
						 quote_literal_cstr(sourceTableName),
						 quote_literal_cstr(sourceNodeName), sourceNodePort);

		/* inserting data should be performed by the current user */
		queryResultList = ExecuteRemoteQuery(workerName, workerPort, NULL,
											 workerAppendQuery);
		if (queryResultList != NIL)
		{
			succeededPlacementList = lappend(succeededPlacementList, shardPlacement);
		}
		else
		{
			failedPlacementList = lappend(failedPlacementList, shardPlacement);
		}
	}
示例#2
0
/*
 * DropShards drops all given shards in a relation. The id, name and schema
 * for the relation are explicitly provided, since this function may be
 * called when the table is already dropped.
 *
 * We mark shard placements that we couldn't drop as to be deleted later, but
 * we do delete the shard metadadata.
 */
static int
DropShards(Oid relationId, char *schemaName, char *relationName,
		   List *deletableShardIntervalList)
{
	ListCell *shardIntervalCell = NULL;
	int droppedShardCount = 0;

	BeginOrContinueCoordinatedTransaction();

	/* At this point we intentionally decided to not use 2PC for reference tables */
	if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
	{
		CoordinatedTransactionUse2PC();
	}

	foreach(shardIntervalCell, deletableShardIntervalList)
	{
		List *shardPlacementList = NIL;
		ListCell *shardPlacementCell = NULL;
		ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell);
		uint64 shardId = shardInterval->shardId;
		char *quotedShardName = NULL;
		char *shardRelationName = pstrdup(relationName);

		Assert(shardInterval->relationId == relationId);

		/* Build shard relation name. */
		AppendShardIdToName(&shardRelationName, shardId);
		quotedShardName = quote_qualified_identifier(schemaName, shardRelationName);

		shardPlacementList = ShardPlacementList(shardId);
		foreach(shardPlacementCell, shardPlacementList)
		{
			ShardPlacement *shardPlacement =
				(ShardPlacement *) lfirst(shardPlacementCell);
			char *workerName = shardPlacement->nodeName;
			uint32 workerPort = shardPlacement->nodePort;
			StringInfo workerDropQuery = makeStringInfo();
			MultiConnection *connection = NULL;
			uint32 connectionFlags = FOR_DDL;

			char storageType = shardInterval->storageType;
			if (storageType == SHARD_STORAGE_TABLE)
			{
				appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND,
								 quotedShardName);
			}
			else if (storageType == SHARD_STORAGE_COLUMNAR ||
					 storageType == SHARD_STORAGE_FOREIGN)
			{
				appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND,
								 quotedShardName);
			}

			connection = GetPlacementConnection(connectionFlags, shardPlacement, NULL);

			RemoteTransactionBeginIfNecessary(connection);

			if (PQstatus(connection->pgConn) != CONNECTION_OK)
			{
				uint64 placementId = shardPlacement->placementId;

				ereport(WARNING, (errmsg("could not connect to shard \"%s\" on node "
										 "\"%s:%u\"", shardRelationName, workerName,
										 workerPort),
								  errdetail("Marking this shard placement for "
											"deletion")));

				UpdateShardPlacementState(placementId, FILE_TO_DELETE);

				continue;
			}

			MarkRemoteTransactionCritical(connection);

			ExecuteCriticalRemoteCommand(connection, workerDropQuery->data);

			DeleteShardPlacementRow(shardPlacement->placementId);
		}