Пример #1
0
/*
 * shards_colocated checks if given two shards are co-located or not. If they are
 * co-located, this function returns true.
 */
Datum
shards_colocated(PG_FUNCTION_ARGS)
{
	uint32 leftShardId = PG_GETARG_UINT32(0);
	uint32 rightShardId = PG_GETARG_UINT32(1);
	ShardInterval *leftShard = LoadShardInterval(leftShardId);
	ShardInterval *rightShard = LoadShardInterval(rightShardId);

	bool shardsColocated = ShardsColocated(leftShard, rightShard);

	PG_RETURN_BOOL(shardsColocated);
}
Пример #2
0
/*
 * RebuildQueryStrings deparses the job query for each task to
 * include execution-time changes such as function evaluation.
 */
void
RebuildQueryStrings(Query *originalQuery, List *taskList)
{
	ListCell *taskCell = NULL;
	Oid relationId = ((RangeTblEntry *) linitial(originalQuery->rtable))->relid;

	foreach(taskCell, taskList)
	{
		Task *task = (Task *) lfirst(taskCell);
		StringInfo newQueryString = makeStringInfo();
		Query *query = originalQuery;

		if (task->insertSelectQuery)
		{
			/* for INSERT..SELECT, adjust shard names in SELECT part */
			RangeTblEntry *copiedInsertRte = NULL;
			RangeTblEntry *copiedSubqueryRte = NULL;
			Query *copiedSubquery = NULL;
			List *relationShardList = task->relationShardList;
			ShardInterval *shardInterval = LoadShardInterval(task->anchorShardId);

			query = copyObject(originalQuery);

			copiedInsertRte = ExtractInsertRangeTableEntry(query);
			copiedSubqueryRte = ExtractSelectRangeTableEntry(query);
			copiedSubquery = copiedSubqueryRte->subquery;

			AddShardIntervalRestrictionToSelect(copiedSubquery, shardInterval);
			ReorderInsertSelectTargetLists(query, copiedInsertRte, copiedSubqueryRte);

			/* setting an alias simplifies deparsing of RETURNING */
			if (copiedInsertRte->alias == NULL)
			{
				Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL);
				copiedInsertRte->alias = alias;
			}

			UpdateRelationToShardNames((Node *) copiedSubquery, relationShardList);
		}

		deparse_shard_query(query, relationId, task->anchorShardId,
							newQueryString);

		ereport(DEBUG4, (errmsg("query before rebuilding: %s",
								task->queryString)));
		ereport(DEBUG4, (errmsg("query after rebuilding:  %s",
								newQueryString->data)));

		task->queryString = newQueryString->data;
	}
Пример #3
0
/*
 * master_append_table_to_shard appends the given table's contents to the given
 * shard, and updates shard metadata on the master node. If the function fails
 * to append table data to all shard placements, it doesn't update any metadata
 * and errors out. Else if the function fails to append table data to some of
 * the shard placements, it marks those placements as invalid. These invalid
 * placements will get cleaned up during shard rebalancing.
 */
Datum
master_append_table_to_shard(PG_FUNCTION_ARGS)
{
	uint64 shardId = PG_GETARG_INT64(0);
	text *sourceTableNameText = PG_GETARG_TEXT_P(1);
	text *sourceNodeNameText = PG_GETARG_TEXT_P(2);
	uint32 sourceNodePort = PG_GETARG_UINT32(3);
	char *sourceTableName = text_to_cstring(sourceTableNameText);
	char *sourceNodeName = text_to_cstring(sourceNodeNameText);

	char *shardName = NULL;
	List *shardPlacementList = NIL;
	List *succeededPlacementList = NIL;
	List *failedPlacementList = NIL;
	ListCell *shardPlacementCell = NULL;
	ListCell *failedPlacementCell = NULL;
	uint64 newShardSize = 0;
	uint64 shardMaxSizeInBytes = 0;
	float4 shardFillLevel = 0.0;
	char partitionMethod = 0;

	ShardInterval *shardInterval = LoadShardInterval(shardId);
	Oid relationId = shardInterval->relationId;
	bool cstoreTable = CStoreTable(relationId);

	char storageType = shardInterval->storageType;

	EnsureTablePermissions(relationId, ACL_INSERT);

	if (storageType != SHARD_STORAGE_TABLE && !cstoreTable)
	{
		ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId),
						errdetail("The underlying shard is not a regular table")));
	}

	partitionMethod = PartitionMethod(relationId);
	if (partitionMethod == DISTRIBUTE_BY_HASH)
	{
		ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId),
						errdetail("We currently don't support appending to shards "
								  "in hash-partitioned tables")));
	}

	/*
	 * We lock on the shardId, but do not unlock. When the function returns, and
	 * the transaction for this function commits, this lock will automatically
	 * be released. This ensures appends to a shard happen in a serial manner.
	 */
	LockShardResource(shardId, AccessExclusiveLock);

	/* if shard doesn't have an alias, extend regular table name */
	shardName = LoadShardAlias(relationId, shardId);
	if (shardName == NULL)
	{
		shardName = get_rel_name(relationId);
		AppendShardIdToName(&shardName, shardId);
	}

	shardPlacementList = FinalizedShardPlacementList(shardId);
	if (shardPlacementList == NIL)
	{
		ereport(ERROR, (errmsg("could not find any shard placements for shardId "
							   UINT64_FORMAT, shardId),
						errhint("Try running master_create_empty_shard() first")));
	}

	/* issue command to append table to each shard placement */
	foreach(shardPlacementCell, shardPlacementList)
	{
		ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell);
		char *workerName = shardPlacement->nodeName;
		uint32 workerPort = shardPlacement->nodePort;
		List *queryResultList = NIL;

		StringInfo workerAppendQuery = makeStringInfo();
		appendStringInfo(workerAppendQuery, WORKER_APPEND_TABLE_TO_SHARD,
						 quote_literal_cstr(shardName),
						 quote_literal_cstr(sourceTableName),
						 quote_literal_cstr(sourceNodeName), sourceNodePort);

		/* inserting data should be performed by the current user */
		queryResultList = ExecuteRemoteQuery(workerName, workerPort, NULL,
											 workerAppendQuery);
		if (queryResultList != NIL)
		{
			succeededPlacementList = lappend(succeededPlacementList, shardPlacement);
		}
		else
		{
			failedPlacementList = lappend(failedPlacementList, shardPlacement);
		}
	}
Пример #4
0
/*
 * master_copy_shard_placement implements a user-facing UDF to copy data from
 * a healthy (source) node to an inactive (target) node. To accomplish this it
 * entirely recreates the table structure before copying all data. During this
 * time all modifications are paused to the shard. After successful repair, the
 * inactive placement is marked healthy and modifications may continue. If the
 * repair fails at any point, this function throws an error, leaving the node
 * in an unhealthy state.
 */
Datum
master_copy_shard_placement(PG_FUNCTION_ARGS)
{
    int64 shardId = PG_GETARG_INT64(0);
    text *sourceNodeName = PG_GETARG_TEXT_P(1);
    int32 sourceNodePort = PG_GETARG_INT32(2);
    text *targetNodeName = PG_GETARG_TEXT_P(3);
    int32 targetNodePort = PG_GETARG_INT32(4);
    ShardInterval *shardInterval = LoadShardInterval(shardId);
    Oid distributedTableId = shardInterval->relationId;

    List *shardPlacementList = NIL;
    ShardPlacement *sourcePlacement = NULL;
    ShardPlacement *targetPlacement = NULL;
    List *ddlCommandList = NIL;
    bool recreated = false;
    bool dataCopied = false;

    /*
     * By taking an exclusive lock on the shard, we both stop all modifications
     * (INSERT, UPDATE, or DELETE) and prevent concurrent repair operations from
     * being able to operate on this shard.
     */
    LockShard(shardId, ExclusiveLock);

    shardPlacementList = LoadShardPlacementList(shardId);

    sourcePlacement = SearchShardPlacementInList(shardPlacementList, sourceNodeName,
                      sourceNodePort);
    if (sourcePlacement->shardState != STATE_FINALIZED)
    {
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                        errmsg("source placement must be in finalized state")));
    }

    targetPlacement = SearchShardPlacementInList(shardPlacementList, targetNodeName,
                      targetNodePort);
    if (targetPlacement->shardState != STATE_INACTIVE)
    {
        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                        errmsg("target placement must be in inactive state")));
    }

    /* retrieve the DDL commands for the table and run them */
    ddlCommandList = RecreateTableDDLCommandList(distributedTableId, shardId);

    recreated = ExecuteRemoteCommandList(targetPlacement->nodeName,
                                         targetPlacement->nodePort,
                                         ddlCommandList);
    if (!recreated)
    {
        ereport(ERROR, (errmsg("could not recreate shard table"),
                        errhint("Consult recent messages in the server logs for "
                                "details.")));
    }

    HOLD_INTERRUPTS();

    dataCopied = CopyDataFromFinalizedPlacement(distributedTableId, shardId,
                 sourcePlacement, targetPlacement);
    if (!dataCopied)
    {
        ereport(ERROR, (errmsg("could not copy shard data"),
                        errhint("Consult recent messages in the server logs for "
                                "details.")));
    }

    /* the placement is repaired, so return to finalized state */
    DeleteShardPlacementRow(targetPlacement->id);
    InsertShardPlacementRow(targetPlacement->id, targetPlacement->shardId,
                            STATE_FINALIZED, targetPlacement->nodeName,
                            targetPlacement->nodePort);

    RESUME_INTERRUPTS();

    PG_RETURN_VOID();
}