示例#1
0
/*
 * master_copy_shard_placement implements a user-facing UDF to copy data from
 * a healthy (source) node to an inactive (target) node. To accomplish this it
 * entirely recreates the table structure before copying all data. During this
 * time all modifications are paused to the shard. After successful repair, the
 * inactive placement is marked healthy and modifications may continue. If the
 * repair fails at any point, this function throws an error, leaving the node
 * in an unhealthy state.
 */
Datum
master_copy_shard_placement(PG_FUNCTION_ARGS)
{
    int64 shardId = PG_GETARG_INT64(0);
    text *sourceNodeName = PG_GETARG_TEXT_P(1);
    int32 sourceNodePort = PG_GETARG_INT32(2);
    text *targetNodeName = PG_GETARG_TEXT_P(3);
    int32 targetNodePort = PG_GETARG_INT32(4);
    ShardInterval *shardInterval = LoadShardInterval(shardId);
    Oid distributedTableId = shardInterval->relationId;

    List *shardPlacementList = NIL;
    ShardPlacement *sourcePlacement = NULL;
    ShardPlacement *targetPlacement = NULL;
    List *ddlCommandList = NIL;
    bool recreated = false;
    bool dataCopied = false;

    /*
     * By taking an exclusive lock on the shard, we both stop all modifications
     * (INSERT, UPDATE, or DELETE) and prevent concurrent repair operations from
     * being able to operate on this shard.
     */
    LockShard(shardId, ExclusiveLock);

    shardPlacementList = LoadShardPlacementList(shardId);

    sourcePlacement = SearchShardPlacementInList(shardPlacementList, sourceNodeName,
                      sourceNodePort);
    if (sourcePlacement->shardState != STATE_FINALIZED)
    {
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                        errmsg("source placement must be in finalized state")));
    }

    targetPlacement = SearchShardPlacementInList(shardPlacementList, targetNodeName,
                      targetNodePort);
    if (targetPlacement->shardState != STATE_INACTIVE)
    {
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                        errmsg("target placement must be in inactive state")));
    }

    /* retrieve the DDL commands for the table and run them */
    ddlCommandList = RecreateTableDDLCommandList(distributedTableId, shardId);

    recreated = ExecuteRemoteCommandList(targetPlacement->nodeName,
                                         targetPlacement->nodePort,
                                         ddlCommandList);
    if (!recreated)
    {
        ereport(ERROR, (errmsg("could not recreate shard table"),
                        errhint("Consult recent messages in the server logs for "
                                "details.")));
    }

    HOLD_INTERRUPTS();

    dataCopied = CopyDataFromFinalizedPlacement(distributedTableId, shardId,
                 sourcePlacement, targetPlacement);
    if (!dataCopied)
    {
        ereport(ERROR, (errmsg("could not copy shard data"),
                        errhint("Consult recent messages in the server logs for "
                                "details.")));
    }

    /* the placement is repaired, so return to finalized state */
    DeleteShardPlacementRow(targetPlacement->id);
    InsertShardPlacementRow(targetPlacement->id, targetPlacement->shardId,
                            STATE_FINALIZED, targetPlacement->nodeName,
                            targetPlacement->nodePort);

    RESUME_INTERRUPTS();

    PG_RETURN_VOID();
}
示例#2
0
/*
 * DropShards drops all given shards in a relation. The id, name and schema
 * for the relation are explicitly provided, since this function may be
 * called when the table is already dropped.
 *
 * We mark shard placements that we couldn't drop as to be deleted later, but
 * we do delete the shard metadadata.
 */
static int
DropShards(Oid relationId, char *schemaName, char *relationName,
		   List *deletableShardIntervalList)
{
	ListCell *shardIntervalCell = NULL;
	int droppedShardCount = 0;

	BeginOrContinueCoordinatedTransaction();

	/* At this point we intentionally decided to not use 2PC for reference tables */
	if (MultiShardCommitProtocol == COMMIT_PROTOCOL_2PC)
	{
		CoordinatedTransactionUse2PC();
	}

	foreach(shardIntervalCell, deletableShardIntervalList)
	{
		List *shardPlacementList = NIL;
		ListCell *shardPlacementCell = NULL;
		ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell);
		uint64 shardId = shardInterval->shardId;
		char *quotedShardName = NULL;
		char *shardRelationName = pstrdup(relationName);

		Assert(shardInterval->relationId == relationId);

		/* Build shard relation name. */
		AppendShardIdToName(&shardRelationName, shardId);
		quotedShardName = quote_qualified_identifier(schemaName, shardRelationName);

		shardPlacementList = ShardPlacementList(shardId);
		foreach(shardPlacementCell, shardPlacementList)
		{
			ShardPlacement *shardPlacement =
				(ShardPlacement *) lfirst(shardPlacementCell);
			char *workerName = shardPlacement->nodeName;
			uint32 workerPort = shardPlacement->nodePort;
			StringInfo workerDropQuery = makeStringInfo();
			MultiConnection *connection = NULL;
			uint32 connectionFlags = FOR_DDL;

			char storageType = shardInterval->storageType;
			if (storageType == SHARD_STORAGE_TABLE)
			{
				appendStringInfo(workerDropQuery, DROP_REGULAR_TABLE_COMMAND,
								 quotedShardName);
			}
			else if (storageType == SHARD_STORAGE_COLUMNAR ||
					 storageType == SHARD_STORAGE_FOREIGN)
			{
				appendStringInfo(workerDropQuery, DROP_FOREIGN_TABLE_COMMAND,
								 quotedShardName);
			}

			connection = GetPlacementConnection(connectionFlags, shardPlacement, NULL);

			RemoteTransactionBeginIfNecessary(connection);

			if (PQstatus(connection->pgConn) != CONNECTION_OK)
			{
				uint64 placementId = shardPlacement->placementId;

				ereport(WARNING, (errmsg("could not connect to shard \"%s\" on node "
										 "\"%s:%u\"", shardRelationName, workerName,
										 workerPort),
								  errdetail("Marking this shard placement for "
											"deletion")));

				UpdateShardPlacementState(placementId, FILE_TO_DELETE);

				continue;
			}

			MarkRemoteTransactionCritical(connection);

			ExecuteCriticalRemoteCommand(connection, workerDropQuery->data);

			DeleteShardPlacementRow(shardPlacement->placementId);
		}