/* * shards_colocated checks if given two shards are co-located or not. If they are * co-located, this function returns true. */ Datum shards_colocated(PG_FUNCTION_ARGS) { uint32 leftShardId = PG_GETARG_UINT32(0); uint32 rightShardId = PG_GETARG_UINT32(1); ShardInterval *leftShard = LoadShardInterval(leftShardId); ShardInterval *rightShard = LoadShardInterval(rightShardId); bool shardsColocated = ShardsColocated(leftShard, rightShard); PG_RETURN_BOOL(shardsColocated); }
/* * RebuildQueryStrings deparses the job query for each task to * include execution-time changes such as function evaluation. */ void RebuildQueryStrings(Query *originalQuery, List *taskList) { ListCell *taskCell = NULL; Oid relationId = ((RangeTblEntry *) linitial(originalQuery->rtable))->relid; foreach(taskCell, taskList) { Task *task = (Task *) lfirst(taskCell); StringInfo newQueryString = makeStringInfo(); Query *query = originalQuery; if (task->insertSelectQuery) { /* for INSERT..SELECT, adjust shard names in SELECT part */ RangeTblEntry *copiedInsertRte = NULL; RangeTblEntry *copiedSubqueryRte = NULL; Query *copiedSubquery = NULL; List *relationShardList = task->relationShardList; ShardInterval *shardInterval = LoadShardInterval(task->anchorShardId); query = copyObject(originalQuery); copiedInsertRte = ExtractInsertRangeTableEntry(query); copiedSubqueryRte = ExtractSelectRangeTableEntry(query); copiedSubquery = copiedSubqueryRte->subquery; AddShardIntervalRestrictionToSelect(copiedSubquery, shardInterval); ReorderInsertSelectTargetLists(query, copiedInsertRte, copiedSubqueryRte); /* setting an alias simplifies deparsing of RETURNING */ if (copiedInsertRte->alias == NULL) { Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL); copiedInsertRte->alias = alias; } UpdateRelationToShardNames((Node *) copiedSubquery, relationShardList); } deparse_shard_query(query, relationId, task->anchorShardId, newQueryString); ereport(DEBUG4, (errmsg("query before rebuilding: %s", task->queryString))); ereport(DEBUG4, (errmsg("query after rebuilding: %s", newQueryString->data))); task->queryString = newQueryString->data; }
/* * master_append_table_to_shard appends the given table's contents to the given * shard, and updates shard metadata on the master node. If the function fails * to append table data to all shard placements, it doesn't update any metadata * and errors out. Else if the function fails to append table data to some of * the shard placements, it marks those placements as invalid. These invalid * placements will get cleaned up during shard rebalancing. */ Datum master_append_table_to_shard(PG_FUNCTION_ARGS) { uint64 shardId = PG_GETARG_INT64(0); text *sourceTableNameText = PG_GETARG_TEXT_P(1); text *sourceNodeNameText = PG_GETARG_TEXT_P(2); uint32 sourceNodePort = PG_GETARG_UINT32(3); char *sourceTableName = text_to_cstring(sourceTableNameText); char *sourceNodeName = text_to_cstring(sourceNodeNameText); char *shardName = NULL; List *shardPlacementList = NIL; List *succeededPlacementList = NIL; List *failedPlacementList = NIL; ListCell *shardPlacementCell = NULL; ListCell *failedPlacementCell = NULL; uint64 newShardSize = 0; uint64 shardMaxSizeInBytes = 0; float4 shardFillLevel = 0.0; char partitionMethod = 0; ShardInterval *shardInterval = LoadShardInterval(shardId); Oid relationId = shardInterval->relationId; bool cstoreTable = CStoreTable(relationId); char storageType = shardInterval->storageType; EnsureTablePermissions(relationId, ACL_INSERT); if (storageType != SHARD_STORAGE_TABLE && !cstoreTable) { ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId), errdetail("The underlying shard is not a regular table"))); } partitionMethod = PartitionMethod(relationId); if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errmsg("cannot append to shardId " UINT64_FORMAT, shardId), errdetail("We currently don't support appending to shards " "in hash-partitioned tables"))); } /* * We lock on the shardId, but do not unlock. When the function returns, and * the transaction for this function commits, this lock will automatically * be released. This ensures appends to a shard happen in a serial manner. */ LockShardResource(shardId, AccessExclusiveLock); /* if shard doesn't have an alias, extend regular table name */ shardName = LoadShardAlias(relationId, shardId); if (shardName == NULL) { shardName = get_rel_name(relationId); AppendShardIdToName(&shardName, shardId); } shardPlacementList = FinalizedShardPlacementList(shardId); if (shardPlacementList == NIL) { ereport(ERROR, (errmsg("could not find any shard placements for shardId " UINT64_FORMAT, shardId), errhint("Try running master_create_empty_shard() first"))); } /* issue command to append table to each shard placement */ foreach(shardPlacementCell, shardPlacementList) { ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell); char *workerName = shardPlacement->nodeName; uint32 workerPort = shardPlacement->nodePort; List *queryResultList = NIL; StringInfo workerAppendQuery = makeStringInfo(); appendStringInfo(workerAppendQuery, WORKER_APPEND_TABLE_TO_SHARD, quote_literal_cstr(shardName), quote_literal_cstr(sourceTableName), quote_literal_cstr(sourceNodeName), sourceNodePort); /* inserting data should be performed by the current user */ queryResultList = ExecuteRemoteQuery(workerName, workerPort, NULL, workerAppendQuery); if (queryResultList != NIL) { succeededPlacementList = lappend(succeededPlacementList, shardPlacement); } else { failedPlacementList = lappend(failedPlacementList, shardPlacement); } }
/* * master_copy_shard_placement implements a user-facing UDF to copy data from * a healthy (source) node to an inactive (target) node. To accomplish this it * entirely recreates the table structure before copying all data. During this * time all modifications are paused to the shard. After successful repair, the * inactive placement is marked healthy and modifications may continue. If the * repair fails at any point, this function throws an error, leaving the node * in an unhealthy state. */ Datum master_copy_shard_placement(PG_FUNCTION_ARGS) { int64 shardId = PG_GETARG_INT64(0); text *sourceNodeName = PG_GETARG_TEXT_P(1); int32 sourceNodePort = PG_GETARG_INT32(2); text *targetNodeName = PG_GETARG_TEXT_P(3); int32 targetNodePort = PG_GETARG_INT32(4); ShardInterval *shardInterval = LoadShardInterval(shardId); Oid distributedTableId = shardInterval->relationId; List *shardPlacementList = NIL; ShardPlacement *sourcePlacement = NULL; ShardPlacement *targetPlacement = NULL; List *ddlCommandList = NIL; bool recreated = false; bool dataCopied = false; /* * By taking an exclusive lock on the shard, we both stop all modifications * (INSERT, UPDATE, or DELETE) and prevent concurrent repair operations from * being able to operate on this shard. */ LockShard(shardId, ExclusiveLock); shardPlacementList = LoadShardPlacementList(shardId); sourcePlacement = SearchShardPlacementInList(shardPlacementList, sourceNodeName, sourceNodePort); if (sourcePlacement->shardState != STATE_FINALIZED) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("source placement must be in finalized state"))); } targetPlacement = SearchShardPlacementInList(shardPlacementList, targetNodeName, targetNodePort); if (targetPlacement->shardState != STATE_INACTIVE) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("target placement must be in inactive state"))); } /* retrieve the DDL commands for the table and run them */ ddlCommandList = RecreateTableDDLCommandList(distributedTableId, shardId); recreated = ExecuteRemoteCommandList(targetPlacement->nodeName, targetPlacement->nodePort, ddlCommandList); if (!recreated) { ereport(ERROR, (errmsg("could not recreate shard table"), errhint("Consult recent messages in the server logs for " "details."))); } HOLD_INTERRUPTS(); dataCopied = CopyDataFromFinalizedPlacement(distributedTableId, shardId, sourcePlacement, targetPlacement); if (!dataCopied) { ereport(ERROR, (errmsg("could not copy shard data"), errhint("Consult recent messages in the server logs for " "details."))); } /* the placement is repaired, so return to finalized state */ DeleteShardPlacementRow(targetPlacement->id); InsertShardPlacementRow(targetPlacement->id, targetPlacement->shardId, STATE_FINALIZED, targetPlacement->nodeName, targetPlacement->nodePort); RESUME_INTERRUPTS(); PG_RETURN_VOID(); }