/* * create_distributed_table gets a table name, distribution column, * distribution method and colocate_with option, then it creates a * distributed table. */ Datum create_distributed_table(PG_FUNCTION_ARGS) { Oid relationId = InvalidOid; text *distributionColumnText = NULL; Oid distributionMethodOid = InvalidOid; text *colocateWithTableNameText = NULL; Relation relation = NULL; char *distributionColumnName = NULL; Var *distributionColumn = NULL; char distributionMethod = 0; char *colocateWithTableName = NULL; bool viaDeprecatedAPI = false; CheckCitusVersion(ERROR); EnsureCoordinator(); relationId = PG_GETARG_OID(0); distributionColumnText = PG_GETARG_TEXT_P(1); distributionMethodOid = PG_GETARG_OID(2); colocateWithTableNameText = PG_GETARG_TEXT_P(3); /* * Lock target relation with an exclusive lock - there's no way to make * sense of this table until we've committed, and we don't want multiple * backends manipulating this relation. */ relation = try_relation_open(relationId, ExclusiveLock); if (relation == NULL) { ereport(ERROR, (errmsg("could not create distributed table: " "relation does not exist"))); } /* * We should do this check here since the codes in the following lines rely * on this relation to have a supported relation kind. More extensive checks * will be performed in CreateDistributedTable. */ EnsureRelationKindSupported(relationId); distributionColumnName = text_to_cstring(distributionColumnText); distributionColumn = BuildDistributionKeyFromColumnName(relation, distributionColumnName); distributionMethod = LookupDistributionMethod(distributionMethodOid); colocateWithTableName = text_to_cstring(colocateWithTableNameText); CreateDistributedTable(relationId, distributionColumn, distributionMethod, colocateWithTableName, viaDeprecatedAPI); relation_close(relation, NoLock); PG_RETURN_VOID(); }
/* * CreateReferenceTable creates a distributed table with the given relationId. The * created table has one shard and replication factor is set to the active worker * count. In fact, the above is the definition of a reference table in Citus. */ Datum create_reference_table(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); Relation relation = NULL; char *colocateWithTableName = NULL; List *workerNodeList = NIL; int workerCount = 0; Var *distributionColumn = NULL; bool viaDeprecatedAPI = false; EnsureCoordinator(); CheckCitusVersion(ERROR); /* * Ensure schema exists on each worker node. We can not run this function * transactionally, since we may create shards over separate sessions and * shard creation depends on the schema being present and visible from all * sessions. */ EnsureSchemaExistsOnAllNodes(relationId); /* * Lock target relation with an exclusive lock - there's no way to make * sense of this table until we've committed, and we don't want multiple * backends manipulating this relation. */ relation = relation_open(relationId, ExclusiveLock); /* * We should do this check here since the codes in the following lines rely * on this relation to have a supported relation kind. More extensive checks * will be performed in CreateDistributedTable. */ EnsureRelationKindSupported(relationId); workerNodeList = ActivePrimaryNodeList(); workerCount = list_length(workerNodeList); /* if there are no workers, error out */ if (workerCount == 0) { char *relationName = get_rel_name(relationId); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot create reference table \"%s\"", relationName), errdetail("There are no active worker nodes."))); } CreateDistributedTable(relationId, distributionColumn, DISTRIBUTE_BY_NONE, colocateWithTableName, viaDeprecatedAPI); relation_close(relation, NoLock); PG_RETURN_VOID(); }
/* * master_get_new_placementid is a user facing wrapper function around * GetNextPlacementId() which allocates and returns a unique placement id for the * placement to be created. * * NB: This can be called by any user; for now we have decided that that's * ok. We might want to restrict this to users part of a specific role or such * at some later point. */ Datum master_get_new_placementid(PG_FUNCTION_ARGS) { uint64 placementId = 0; Datum placementIdDatum = 0; EnsureCoordinator(); CheckCitusVersion(ERROR); placementId = GetNextPlacementId(); placementIdDatum = Int64GetDatum(placementId); PG_RETURN_DATUM(placementIdDatum); }
/* * master_get_new_shardid is a user facing wrapper function around GetNextShardId() * which allocates and returns a unique shardId for the shard to be created. * * NB: This can be called by any user; for now we have decided that that's * ok. We might want to restrict this to users part of a specific role or such * at some later point. */ Datum master_get_new_shardid(PG_FUNCTION_ARGS) { uint64 shardId = 0; Datum shardIdDatum = 0; EnsureCoordinator(); CheckCitusVersion(ERROR); shardId = GetNextShardId(); shardIdDatum = Int64GetDatum(shardId); PG_RETURN_DATUM(shardIdDatum); }
/* * master_create_worker_shards is a user facing function to create worker shards * for the given relation in round robin order. */ Datum master_create_worker_shards(PG_FUNCTION_ARGS) { text *tableNameText = PG_GETARG_TEXT_P(0); int32 shardCount = PG_GETARG_INT32(1); int32 replicationFactor = PG_GETARG_INT32(2); Oid distributedTableId = ResolveRelationId(tableNameText); /* do not add any data */ bool useExclusiveConnections = false; EnsureCoordinator(); CheckCitusVersion(ERROR); CreateShardsWithRoundRobinPolicy(distributedTableId, shardCount, replicationFactor, useExclusiveConnections); PG_RETURN_VOID(); }
/* * master_drop_all_shards attempts to drop all shards for a given relation. * Unlike master_apply_delete_command, this function can be called even * if the table has already been dropped. */ Datum master_drop_all_shards(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); text *schemaNameText = PG_GETARG_TEXT_P(1); text *relationNameText = PG_GETARG_TEXT_P(2); List *shardIntervalList = NIL; int droppedShardCount = 0; char *schemaName = text_to_cstring(schemaNameText); char *relationName = text_to_cstring(relationNameText); CheckCitusVersion(ERROR); /* * The SQL_DROP trigger calls this function even for tables that are * not distributed. In that case, silently ignore and return -1. */ if (!IsDistributedTable(relationId) || !EnableDDLPropagation) { PG_RETURN_INT32(-1); } EnsureCoordinator(); CheckTableSchemaNameForDrop(relationId, &schemaName, &relationName); /* * master_drop_all_shards is typically called from the DROP TABLE trigger, * but could be called by a user directly. Make sure we have an * AccessExlusiveLock to prevent any other commands from running on this table * concurrently. */ LockRelationOid(relationId, AccessExclusiveLock); shardIntervalList = LoadShardIntervalList(relationId); droppedShardCount = DropShards(relationId, schemaName, relationName, shardIntervalList); PG_RETURN_INT32(droppedShardCount); }
/* * master_apply_delete_command takes in a delete command, finds shards that * match the criteria defined in the delete command, drops the found shards from * the worker nodes, and updates the corresponding metadata on the master node. * This function drops a shard if and only if all rows in the shard satisfy * the conditions in the delete command. Note that this function only accepts * conditions on the partition key and if no condition is provided then all * shards are deleted. * * We mark shard placements that we couldn't drop as to be deleted later. If a * shard satisfies the given conditions, we delete it from shard metadata table * even though related shard placements are not deleted. */ Datum master_apply_delete_command(PG_FUNCTION_ARGS) { text *queryText = PG_GETARG_TEXT_P(0); char *queryString = text_to_cstring(queryText); char *relationName = NULL; char *schemaName = NULL; Oid relationId = InvalidOid; List *shardIntervalList = NIL; List *deletableShardIntervalList = NIL; List *queryTreeList = NIL; Query *deleteQuery = NULL; Node *whereClause = NULL; Node *deleteCriteria = NULL; Node *queryTreeNode = NULL; DeleteStmt *deleteStatement = NULL; int droppedShardCount = 0; LOCKMODE lockMode = 0; char partitionMethod = 0; bool failOK = false; #if (PG_VERSION_NUM >= 100000) RawStmt *rawStmt = (RawStmt *) ParseTreeRawStmt(queryString); queryTreeNode = rawStmt->stmt; #else queryTreeNode = ParseTreeNode(queryString); #endif EnsureCoordinator(); CheckCitusVersion(ERROR); if (!IsA(queryTreeNode, DeleteStmt)) { ereport(ERROR, (errmsg("query \"%s\" is not a delete statement", queryString))); } deleteStatement = (DeleteStmt *) queryTreeNode; schemaName = deleteStatement->relation->schemaname; relationName = deleteStatement->relation->relname; /* * We take an exclusive lock while dropping shards to prevent concurrent * writes. We don't want to block SELECTs, which means queries might fail * if they access a shard that has just been dropped. */ lockMode = ExclusiveLock; relationId = RangeVarGetRelid(deleteStatement->relation, lockMode, failOK); /* schema-prefix if it is not specified already */ if (schemaName == NULL) { Oid schemaId = get_rel_namespace(relationId); schemaName = get_namespace_name(schemaId); } CheckDistributedTable(relationId); EnsureTablePermissions(relationId, ACL_DELETE); #if (PG_VERSION_NUM >= 100000) queryTreeList = pg_analyze_and_rewrite(rawStmt, queryString, NULL, 0, NULL); #else queryTreeList = pg_analyze_and_rewrite(queryTreeNode, queryString, NULL, 0); #endif deleteQuery = (Query *) linitial(queryTreeList); CheckTableCount(deleteQuery); /* get where clause and flatten it */ whereClause = (Node *) deleteQuery->jointree->quals; deleteCriteria = eval_const_expressions(NULL, whereClause); partitionMethod = PartitionMethod(relationId); if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot delete from hash distributed table with this " "command"), errdetail("Delete statements on hash-partitioned tables " "are not supported with master_apply_delete_command."), errhint("Use master_modify_multiple_shards command instead."))); } else if (partitionMethod == DISTRIBUTE_BY_NONE) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot delete from distributed table"), errdetail("Delete statements on reference tables " "are not supported."))); } CheckDeleteCriteria(deleteCriteria); CheckPartitionColumn(relationId, deleteCriteria); shardIntervalList = LoadShardIntervalList(relationId); /* drop all shards if where clause is not present */ if (deleteCriteria == NULL) { deletableShardIntervalList = shardIntervalList; ereport(DEBUG2, (errmsg("dropping all shards for \"%s\"", relationName))); } else { deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId, shardIntervalList, deleteCriteria); } droppedShardCount = DropShards(relationId, schemaName, relationName, deletableShardIntervalList); PG_RETURN_INT32(droppedShardCount); }