/*
 * create_distributed_table gets a table name, distribution column,
 * distribution method and colocate_with option, then it creates a
 * distributed table.
 */
Datum
create_distributed_table(PG_FUNCTION_ARGS)
{
	Oid relationId = InvalidOid;
	text *distributionColumnText = NULL;
	Oid distributionMethodOid = InvalidOid;
	text *colocateWithTableNameText = NULL;

	Relation relation = NULL;
	char *distributionColumnName = NULL;
	Var *distributionColumn = NULL;
	char distributionMethod = 0;

	char *colocateWithTableName = NULL;

	bool viaDeprecatedAPI = false;

	CheckCitusVersion(ERROR);
	EnsureCoordinator();

	relationId = PG_GETARG_OID(0);
	distributionColumnText = PG_GETARG_TEXT_P(1);
	distributionMethodOid = PG_GETARG_OID(2);
	colocateWithTableNameText = PG_GETARG_TEXT_P(3);

	/*
	 * Lock target relation with an exclusive lock - there's no way to make
	 * sense of this table until we've committed, and we don't want multiple
	 * backends manipulating this relation.
	 */
	relation = try_relation_open(relationId, ExclusiveLock);

	if (relation == NULL)
	{
		ereport(ERROR, (errmsg("could not create distributed table: "
							   "relation does not exist")));
	}

	/*
	 * We should do this check here since the codes in the following lines rely
	 * on this relation to have a supported relation kind. More extensive checks
	 * will be performed in CreateDistributedTable.
	 */
	EnsureRelationKindSupported(relationId);

	distributionColumnName = text_to_cstring(distributionColumnText);
	distributionColumn = BuildDistributionKeyFromColumnName(relation,
															distributionColumnName);
	distributionMethod = LookupDistributionMethod(distributionMethodOid);

	colocateWithTableName = text_to_cstring(colocateWithTableNameText);

	CreateDistributedTable(relationId, distributionColumn, distributionMethod,
						   colocateWithTableName, viaDeprecatedAPI);

	relation_close(relation, NoLock);

	PG_RETURN_VOID();
}
/*
 * CreateReferenceTable creates a distributed table with the given relationId. The
 * created table has one shard and replication factor is set to the active worker
 * count. In fact, the above is the definition of a reference table in Citus.
 */
Datum
create_reference_table(PG_FUNCTION_ARGS)
{
	Oid relationId = PG_GETARG_OID(0);

	Relation relation = NULL;
	char *colocateWithTableName = NULL;
	List *workerNodeList = NIL;
	int workerCount = 0;
	Var *distributionColumn = NULL;

	bool viaDeprecatedAPI = false;

	EnsureCoordinator();
	CheckCitusVersion(ERROR);

	/*
	 * Ensure schema exists on each worker node. We can not run this function
	 * transactionally, since we may create shards over separate sessions and
	 * shard creation depends on the schema being present and visible from all
	 * sessions.
	 */
	EnsureSchemaExistsOnAllNodes(relationId);

	/*
	 * Lock target relation with an exclusive lock - there's no way to make
	 * sense of this table until we've committed, and we don't want multiple
	 * backends manipulating this relation.
	 */
	relation = relation_open(relationId, ExclusiveLock);

	/*
	 * We should do this check here since the codes in the following lines rely
	 * on this relation to have a supported relation kind. More extensive checks
	 * will be performed in CreateDistributedTable.
	 */
	EnsureRelationKindSupported(relationId);

	workerNodeList = ActivePrimaryNodeList();
	workerCount = list_length(workerNodeList);

	/* if there are no workers, error out */
	if (workerCount == 0)
	{
		char *relationName = get_rel_name(relationId);

		ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
						errmsg("cannot create reference table \"%s\"", relationName),
						errdetail("There are no active worker nodes.")));
	}

	CreateDistributedTable(relationId, distributionColumn, DISTRIBUTE_BY_NONE,
						   colocateWithTableName, viaDeprecatedAPI);

	relation_close(relation, NoLock);

	PG_RETURN_VOID();
}
Esempio n. 3
0
/*
 * master_drop_sequences attempts to drop a list of sequences on worker nodes.
 * The "IF EXISTS" clause is used to permit dropping sequences even if they may not
 * exist. If the commands fail on the workers, the operation is rolled back.
 * If ddl propagation (citus.enable_ddl_propagation) is set to off, then the function
 * returns without doing anything.
 */
Datum
master_drop_sequences(PG_FUNCTION_ARGS)
{
	ArrayType *sequenceNamesArray = PG_GETARG_ARRAYTYPE_P(0);
	ArrayIterator sequenceIterator = NULL;
	Datum sequenceText = 0;
	bool isNull = false;
	StringInfo dropSeqCommand = makeStringInfo();
	bool coordinator = IsCoordinator();

	CheckCitusVersion(ERROR);

	/* do nothing if DDL propagation is switched off or this is not the coordinator */
	if (!EnableDDLPropagation || !coordinator)
	{
		PG_RETURN_VOID();
	}

	/* iterate over sequence names to build single command to DROP them all */
	sequenceIterator = array_create_iterator(sequenceNamesArray, 0, NULL);
	while (array_iterate(sequenceIterator, &sequenceText, &isNull))
	{
		if (isNull)
		{
			ereport(ERROR, (errmsg("unexpected NULL sequence name"),
							errcode(ERRCODE_INVALID_PARAMETER_VALUE)));
		}

		/* append command portion if we haven't added any sequence names yet */
		if (dropSeqCommand->len == 0)
		{
			appendStringInfoString(dropSeqCommand, "DROP SEQUENCE IF EXISTS");
		}
		else
		{
			/* otherwise, add a comma to separate subsequent sequence names */
			appendStringInfoChar(dropSeqCommand, ',');
		}

		appendStringInfo(dropSeqCommand, " %s", TextDatumGetCString(sequenceText));
	}

	if (dropSeqCommand->len != 0)
	{
		appendStringInfoString(dropSeqCommand, " CASCADE");

		SendCommandToWorkers(WORKERS_WITH_METADATA, DISABLE_DDL_PROPAGATION);
		SendCommandToWorkers(WORKERS_WITH_METADATA, dropSeqCommand->data);
	}

	PG_RETURN_VOID();
}
Esempio n. 4
0
/*
 * get_current_transaction_id returns a tuple with (databaseId, processId,
 * initiatorNodeIdentifier, transactionNumber, timestamp) that exists in the
 * shared memory associated with this backend. Note that if the backend
 * is not in a transaction, the function returns uninitialized data where
 * transactionNumber equals to 0.
 */
Datum
get_current_transaction_id(PG_FUNCTION_ARGS)
{
	TupleDesc tupleDescriptor = NULL;
	HeapTuple heapTuple = NULL;

	Datum values[5];
	bool isNulls[5];

	DistributedTransactionId *distributedTransctionId = NULL;

	CheckCitusVersion(ERROR);

	/* build a tuple descriptor for our result type */
	if (get_call_result_type(fcinfo, NULL, &tupleDescriptor) != TYPEFUNC_COMPOSITE)
	{
		elog(ERROR, "return type must be a row type");
	}

	/* MyBackendData should always be avaliable, just out of paranoia */
	if (!MyBackendData)
	{
		ereport(ERROR, (errmsg("backend is not ready for distributed transactions")));
	}

	distributedTransctionId = GetCurrentDistributedTransactionId();

	memset(values, 0, sizeof(values));
	memset(isNulls, false, sizeof(isNulls));

	/* first two fields do not change for this backend, so get directly */
	values[0] = ObjectIdGetDatum(MyDatabaseId);
	values[1] = Int32GetDatum(MyProcPid);

	values[2] = Int32GetDatum(distributedTransctionId->initiatorNodeIdentifier);
	values[3] = UInt64GetDatum(distributedTransctionId->transactionNumber);

	/* provide a better output */
	if (distributedTransctionId->initiatorNodeIdentifier != 0)
	{
		values[4] = TimestampTzGetDatum(distributedTransctionId->timestamp);
	}
	else
	{
		isNulls[4] = true;
	}

	heapTuple = heap_form_tuple(tupleDescriptor, values, isNulls);

	PG_RETURN_DATUM(HeapTupleGetDatum(heapTuple));
}
Esempio n. 5
0
/*
 * master_get_new_placementid is a user facing wrapper function around
 * GetNextPlacementId() which allocates and returns a unique placement id for the
 * placement to be created.
 *
 * NB: This can be called by any user; for now we have decided that that's
 * ok. We might want to restrict this to users part of a specific role or such
 * at some later point.
 */
Datum
master_get_new_placementid(PG_FUNCTION_ARGS)
{
	uint64 placementId = 0;
	Datum placementIdDatum = 0;

	EnsureCoordinator();
	CheckCitusVersion(ERROR);

	placementId = GetNextPlacementId();
	placementIdDatum = Int64GetDatum(placementId);

	PG_RETURN_DATUM(placementIdDatum);
}
Esempio n. 6
0
/*
 * master_get_new_shardid is a user facing wrapper function around GetNextShardId()
 * which allocates and returns a unique shardId for the shard to be created.
 *
 * NB: This can be called by any user; for now we have decided that that's
 * ok. We might want to restrict this to users part of a specific role or such
 * at some later point.
 */
Datum
master_get_new_shardid(PG_FUNCTION_ARGS)
{
	uint64 shardId = 0;
	Datum shardIdDatum = 0;

	EnsureCoordinator();
	CheckCitusVersion(ERROR);

	shardId = GetNextShardId();
	shardIdDatum = Int64GetDatum(shardId);

	PG_RETURN_DATUM(shardIdDatum);
}
Esempio n. 7
0
/*
 * citus_total_relation_size accepts a table name and returns a distributed table
 * and its indexes' total relation size.
 */
Datum
citus_total_relation_size(PG_FUNCTION_ARGS)
{
	Oid relationId = PG_GETARG_OID(0);
	uint64 totalRelationSize = 0;
	char *tableSizeFunction = PG_TOTAL_RELATION_SIZE_FUNCTION;

	CheckCitusVersion(ERROR);

	if (CStoreTable(relationId))
	{
		tableSizeFunction = CSTORE_TABLE_SIZE_FUNCTION;
	}

	totalRelationSize = DistributedTableSize(relationId, tableSizeFunction);

	PG_RETURN_INT64(totalRelationSize);
}
Esempio n. 8
0
/*
 * master_create_worker_shards is a user facing function to create worker shards
 * for the given relation in round robin order.
 */
Datum
master_create_worker_shards(PG_FUNCTION_ARGS)
{
	text *tableNameText = PG_GETARG_TEXT_P(0);
	int32 shardCount = PG_GETARG_INT32(1);
	int32 replicationFactor = PG_GETARG_INT32(2);

	Oid distributedTableId = ResolveRelationId(tableNameText);

	/* do not add any data */
	bool useExclusiveConnections = false;

	EnsureCoordinator();
	CheckCitusVersion(ERROR);

	CreateShardsWithRoundRobinPolicy(distributedTableId, shardCount, replicationFactor,
									 useExclusiveConnections);

	PG_RETURN_VOID();
}
Esempio n. 9
0
/*
 * master_drop_all_shards attempts to drop all shards for a given relation.
 * Unlike master_apply_delete_command, this function can be called even
 * if the table has already been dropped.
 */
Datum
master_drop_all_shards(PG_FUNCTION_ARGS)
{
	Oid relationId = PG_GETARG_OID(0);
	text *schemaNameText = PG_GETARG_TEXT_P(1);
	text *relationNameText = PG_GETARG_TEXT_P(2);

	List *shardIntervalList = NIL;
	int droppedShardCount = 0;

	char *schemaName = text_to_cstring(schemaNameText);
	char *relationName = text_to_cstring(relationNameText);

	CheckCitusVersion(ERROR);

	/*
	 * The SQL_DROP trigger calls this function even for tables that are
	 * not distributed. In that case, silently ignore and return -1.
	 */
	if (!IsDistributedTable(relationId) || !EnableDDLPropagation)
	{
		PG_RETURN_INT32(-1);
	}

	EnsureCoordinator();
	CheckTableSchemaNameForDrop(relationId, &schemaName, &relationName);

	/*
	 * master_drop_all_shards is typically called from the DROP TABLE trigger,
	 * but could be called by a user directly. Make sure we have an
	 * AccessExlusiveLock to prevent any other commands from running on this table
	 * concurrently.
	 */
	LockRelationOid(relationId, AccessExclusiveLock);

	shardIntervalList = LoadShardIntervalList(relationId);
	droppedShardCount = DropShards(relationId, schemaName, relationName,
								   shardIntervalList);

	PG_RETURN_INT32(droppedShardCount);
}
Esempio n. 10
0
/*
 * assign_distributed_transaction_id updates the shared memory allocated for this backend
 * and sets initiatorNodeIdentifier, transactionNumber, timestamp fields with the given
 * inputs. Also, the function sets the database id and process id via the information that
 * Postgres provides.
 *
 * This function is only intended for internal use for managing distributed transactions.
 * Users should not use this function for any purpose.
 */
Datum
assign_distributed_transaction_id(PG_FUNCTION_ARGS)
{
	CheckCitusVersion(ERROR);

	/* MyBackendData should always be avaliable, just out of paranoia */
	if (!MyBackendData)
	{
		ereport(ERROR, (errmsg("backend is not ready for distributed transactions")));
	}

	/*
	 * Note that we don't need to lock shared memory (i.e., LockBackendSharedMemory()) here
	 * since this function is executed after AssignDistributedTransactionId() issued on the
	 * initiator node, which already takes the required lock to enforce the consistency.
	 */

	SpinLockAcquire(&MyBackendData->mutex);

	/* if an id is already assigned, release the lock and error */
	if (MyBackendData->transactionId.transactionNumber != 0)
	{
		SpinLockRelease(&MyBackendData->mutex);

		ereport(ERROR, (errmsg("the backend has already been assigned a "
							   "transaction id")));
	}

	MyBackendData->databaseId = MyDatabaseId;

	MyBackendData->transactionId.initiatorNodeIdentifier = PG_GETARG_INT32(0);
	MyBackendData->transactionId.transactionNumber = PG_GETARG_INT64(1);
	MyBackendData->transactionId.timestamp = PG_GETARG_TIMESTAMPTZ(2);
	MyBackendData->transactionId.transactionOriginator = false;

	SpinLockRelease(&MyBackendData->mutex);

	PG_RETURN_VOID();
}
Esempio n. 11
0
/*
 * master_apply_delete_command takes in a delete command, finds shards that
 * match the criteria defined in the delete command, drops the found shards from
 * the worker nodes, and updates the corresponding metadata on the master node.
 * This function drops a shard if and only if all rows in the shard satisfy
 * the conditions in the delete command. Note that this function only accepts
 * conditions on the partition key and if no condition is provided then all
 * shards are deleted.
 *
 * We mark shard placements that we couldn't drop as to be deleted later. If a
 * shard satisfies the given conditions, we delete it from shard metadata table
 * even though related shard placements are not deleted.
 */
Datum
master_apply_delete_command(PG_FUNCTION_ARGS)
{
	text *queryText = PG_GETARG_TEXT_P(0);
	char *queryString = text_to_cstring(queryText);
	char *relationName = NULL;
	char *schemaName = NULL;
	Oid relationId = InvalidOid;
	List *shardIntervalList = NIL;
	List *deletableShardIntervalList = NIL;
	List *queryTreeList = NIL;
	Query *deleteQuery = NULL;
	Node *whereClause = NULL;
	Node *deleteCriteria = NULL;
	Node *queryTreeNode = NULL;
	DeleteStmt *deleteStatement = NULL;
	int droppedShardCount = 0;
	LOCKMODE lockMode = 0;
	char partitionMethod = 0;
	bool failOK = false;
#if (PG_VERSION_NUM >= 100000)
	RawStmt *rawStmt = (RawStmt *) ParseTreeRawStmt(queryString);
	queryTreeNode = rawStmt->stmt;
#else
	queryTreeNode = ParseTreeNode(queryString);
#endif

	EnsureCoordinator();
	CheckCitusVersion(ERROR);

	if (!IsA(queryTreeNode, DeleteStmt))
	{
		ereport(ERROR, (errmsg("query \"%s\" is not a delete statement",
							   queryString)));
	}

	deleteStatement = (DeleteStmt *) queryTreeNode;

	schemaName = deleteStatement->relation->schemaname;
	relationName = deleteStatement->relation->relname;

	/*
	 * We take an exclusive lock while dropping shards to prevent concurrent
	 * writes. We don't want to block SELECTs, which means queries might fail
	 * if they access a shard that has just been dropped.
	 */
	lockMode = ExclusiveLock;

	relationId = RangeVarGetRelid(deleteStatement->relation, lockMode, failOK);

	/* schema-prefix if it is not specified already */
	if (schemaName == NULL)
	{
		Oid schemaId = get_rel_namespace(relationId);
		schemaName = get_namespace_name(schemaId);
	}

	CheckDistributedTable(relationId);
	EnsureTablePermissions(relationId, ACL_DELETE);

#if (PG_VERSION_NUM >= 100000)
	queryTreeList = pg_analyze_and_rewrite(rawStmt, queryString, NULL, 0, NULL);
#else
	queryTreeList = pg_analyze_and_rewrite(queryTreeNode, queryString, NULL, 0);
#endif
	deleteQuery = (Query *) linitial(queryTreeList);
	CheckTableCount(deleteQuery);

	/* get where clause and flatten it */
	whereClause = (Node *) deleteQuery->jointree->quals;
	deleteCriteria = eval_const_expressions(NULL, whereClause);

	partitionMethod = PartitionMethod(relationId);
	if (partitionMethod == DISTRIBUTE_BY_HASH)
	{
		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("cannot delete from hash distributed table with this "
							   "command"),
						errdetail("Delete statements on hash-partitioned tables "
								  "are not supported with master_apply_delete_command."),
						errhint("Use master_modify_multiple_shards command instead.")));
	}
	else if (partitionMethod == DISTRIBUTE_BY_NONE)
	{
		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("cannot delete from distributed table"),
						errdetail("Delete statements on reference tables "
								  "are not supported.")));
	}


	CheckDeleteCriteria(deleteCriteria);
	CheckPartitionColumn(relationId, deleteCriteria);

	shardIntervalList = LoadShardIntervalList(relationId);

	/* drop all shards if where clause is not present */
	if (deleteCriteria == NULL)
	{
		deletableShardIntervalList = shardIntervalList;
		ereport(DEBUG2, (errmsg("dropping all shards for \"%s\"", relationName)));
	}
	else
	{
		deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId,
																  shardIntervalList,
																  deleteCriteria);
	}

	droppedShardCount = DropShards(relationId, schemaName, relationName,
								   deletableShardIntervalList);

	PG_RETURN_INT32(droppedShardCount);
}
Esempio n. 12
0
/*
 * get_all_active_transactions returns all the avaliable information about all
 * the active backends.
 */
Datum
get_all_active_transactions(PG_FUNCTION_ARGS)
{
	ReturnSetInfo *returnSetInfo = (ReturnSetInfo *) fcinfo->resultinfo;
	TupleDesc tupleDescriptor = NULL;
	Tuplestorestate *tupleStore = NULL;
	MemoryContext perQueryContext = NULL;
	MemoryContext oldContext = NULL;

	int backendIndex = 0;

	Datum values[5];
	bool isNulls[5];

	CheckCitusVersion(ERROR);

	/* check to see if caller supports us returning a tuplestore */
	if (returnSetInfo == NULL || !IsA(returnSetInfo, ReturnSetInfo))
	{
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("set-valued function called in context " \
						"that cannot accept a set")));
	}

	if (!(returnSetInfo->allowedModes & SFRM_Materialize))
	{
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("materialize mode required, but it is not " \
						"allowed in this context")));
	}

	/* build a tuple descriptor for our result type */
	if (get_call_result_type(fcinfo, NULL, &tupleDescriptor) != TYPEFUNC_COMPOSITE)
	{
		elog(ERROR, "return type must be a row type");
	}

	perQueryContext = returnSetInfo->econtext->ecxt_per_query_memory;

	oldContext = MemoryContextSwitchTo(perQueryContext);

	tupleStore = tuplestore_begin_heap(true, false, work_mem);
	returnSetInfo->returnMode = SFRM_Materialize;
	returnSetInfo->setResult = tupleStore;
	returnSetInfo->setDesc = tupleDescriptor;

	MemoryContextSwitchTo(oldContext);

	/*
	 * We don't want to initialize memory while spinlock is held so we
	 * prefer to do it here. This initialization is done only for the first
	 * row.
	 */
	memset(values, 0, sizeof(values));
	memset(isNulls, false, sizeof(isNulls));

	/* we're reading all distributed transactions, prevent new backends */
	LockBackendSharedMemory(LW_SHARED);

	for (backendIndex = 0; backendIndex < MaxBackends; ++backendIndex)
	{
		BackendData *currentBackend =
			&backendManagementShmemData->backends[backendIndex];

		SpinLockAcquire(&currentBackend->mutex);

		/* we're only interested in active backends */
		if (currentBackend->transactionId.transactionNumber == 0)
		{
			SpinLockRelease(&currentBackend->mutex);
			continue;
		}

		values[0] = ObjectIdGetDatum(currentBackend->databaseId);
		values[1] = Int32GetDatum(ProcGlobal->allProcs[backendIndex].pid);
		values[2] = Int32GetDatum(currentBackend->transactionId.initiatorNodeIdentifier);
		values[3] = UInt64GetDatum(currentBackend->transactionId.transactionNumber);
		values[4] = TimestampTzGetDatum(currentBackend->transactionId.timestamp);

		SpinLockRelease(&currentBackend->mutex);

		tuplestore_putvalues(tupleStore, tupleDescriptor, values, isNulls);

		/*
		 * We don't want to initialize memory while spinlock is held so we
		 * prefer to do it here. This initialization is done for the rows
		 * starting from the second one.
		 */
		memset(values, 0, sizeof(values));
		memset(isNulls, false, sizeof(isNulls));
	}

	UnlockBackendSharedMemory();

	/* clean up and return the tuplestore */
	tuplestore_donestoring(tupleStore);

	PG_RETURN_VOID();
}
Esempio n. 13
0
/*
 * master_get_table_metadata takes in a relation name, and returns partition
 * related metadata for the relation. These metadata are grouped and returned in
 * a tuple, and are used by the caller when creating new shards. The function
 * errors if given relation does not exist, or is not partitioned.
 */
Datum
master_get_table_metadata(PG_FUNCTION_ARGS)
{
	text *relationName = PG_GETARG_TEXT_P(0);
	Oid relationId = ResolveRelationId(relationName);

	DistTableCacheEntry *partitionEntry = NULL;
	char *partitionKeyString = NULL;
	TypeFuncClass resultTypeClass = 0;
	Datum partitionKeyExpr = 0;
	Datum partitionKey = 0;
	Datum metadataDatum = 0;
	HeapTuple metadataTuple = NULL;
	TupleDesc metadataDescriptor = NULL;
	uint64 shardMaxSizeInBytes = 0;
	char shardStorageType = 0;
	Datum values[TABLE_METADATA_FIELDS];
	bool isNulls[TABLE_METADATA_FIELDS];

	CheckCitusVersion(ERROR);

	/* find partition tuple for partitioned relation */
	partitionEntry = DistributedTableCacheEntry(relationId);

	/* create tuple descriptor for return value */
	resultTypeClass = get_call_result_type(fcinfo, NULL, &metadataDescriptor);
	if (resultTypeClass != TYPEFUNC_COMPOSITE)
	{
		ereport(ERROR, (errmsg("return type must be a row type")));
	}

	/* form heap tuple for table metadata */
	memset(values, 0, sizeof(values));
	memset(isNulls, false, sizeof(isNulls));

	partitionKeyString = partitionEntry->partitionKeyString;

	/* reference tables do not have partition key */
	if (partitionKeyString == NULL)
	{
		partitionKey = PointerGetDatum(NULL);
		isNulls[3] = true;
	}
	else
	{
		/* get decompiled expression tree for partition key */
		partitionKeyExpr =
			PointerGetDatum(cstring_to_text(partitionEntry->partitionKeyString));
		partitionKey = DirectFunctionCall2(pg_get_expr, partitionKeyExpr,
										   ObjectIdGetDatum(relationId));
	}

	shardMaxSizeInBytes = (int64) ShardMaxSize * 1024L;

	/* get storage type */
	shardStorageType = ShardStorageType(relationId);

	values[0] = ObjectIdGetDatum(relationId);
	values[1] = shardStorageType;
	values[2] = partitionEntry->partitionMethod;
	values[3] = partitionKey;
	values[4] = Int32GetDatum(ShardReplicationFactor);
	values[5] = Int64GetDatum(shardMaxSizeInBytes);
	values[6] = Int32GetDatum(ShardPlacementPolicy);

	metadataTuple = heap_form_tuple(metadataDescriptor, values, isNulls);
	metadataDatum = HeapTupleGetDatum(metadataTuple);

	PG_RETURN_DATUM(metadataDatum);
}
Esempio n. 14
0
/*
 * master_get_active_worker_nodes returns a set of active worker host names and
 * port numbers in deterministic order. Currently we assume that all worker
 * nodes in pg_dist_node are active.
 */
Datum
master_get_active_worker_nodes(PG_FUNCTION_ARGS)
{
	FuncCallContext *functionContext = NULL;
	uint32 workerNodeIndex = 0;
	uint32 workerNodeCount = 0;

	CheckCitusVersion(ERROR);

	if (SRF_IS_FIRSTCALL())
	{
		MemoryContext oldContext = NULL;
		List *workerNodeList = NIL;
		uint32 workerNodeCount = 0;
		TupleDesc tupleDescriptor = NULL;
		bool hasOid = false;

		/* create a function context for cross-call persistence */
		functionContext = SRF_FIRSTCALL_INIT();

		/* switch to memory context appropriate for multiple function calls */
		oldContext = MemoryContextSwitchTo(functionContext->multi_call_memory_ctx);

		workerNodeList = ActiveReadableNodeList();
		workerNodeCount = (uint32) list_length(workerNodeList);

		functionContext->user_fctx = workerNodeList;
		functionContext->max_calls = workerNodeCount;

		/*
		 * This tuple descriptor must match the output parameters declared for
		 * the function in pg_proc.
		 */
		tupleDescriptor = CreateTemplateTupleDesc(WORKER_NODE_FIELDS, hasOid);
		TupleDescInitEntry(tupleDescriptor, (AttrNumber) 1, "node_name",
						   TEXTOID, -1, 0);
		TupleDescInitEntry(tupleDescriptor, (AttrNumber) 2, "node_port",
						   INT8OID, -1, 0);

		functionContext->tuple_desc = BlessTupleDesc(tupleDescriptor);

		MemoryContextSwitchTo(oldContext);
	}

	functionContext = SRF_PERCALL_SETUP();
	workerNodeIndex = functionContext->call_cntr;
	workerNodeCount = functionContext->max_calls;

	if (workerNodeIndex < workerNodeCount)
	{
		List *workerNodeList = functionContext->user_fctx;
		WorkerNode *workerNode = list_nth(workerNodeList, workerNodeIndex);

		Datum workerNodeDatum = WorkerNodeGetDatum(workerNode,
												   functionContext->tuple_desc);

		SRF_RETURN_NEXT(functionContext, workerNodeDatum);
	}
	else
	{
		SRF_RETURN_DONE(functionContext);
	}
}
Esempio n. 15
0
/*
 * master_get_table_ddl_events takes in a relation name, and returns the set of
 * DDL commands needed to reconstruct the relation. The returned DDL commands
 * are similar in flavor to schema definitions that pgdump returns. The function
 * errors if given relation does not exist.
 */
Datum
master_get_table_ddl_events(PG_FUNCTION_ARGS)
{
	FuncCallContext *functionContext = NULL;
	ListCell *tableDDLEventCell = NULL;

	CheckCitusVersion(ERROR);

	/*
	 * On the very first call to this function, we first use the given relation
	 * name to get to the relation. We then recreate the list of DDL statements
	 * issued for this relation, and save the first statement's position in the
	 * function context.
	 */
	if (SRF_IS_FIRSTCALL())
	{
		text *relationName = PG_GETARG_TEXT_P(0);
		Oid relationId = ResolveRelationId(relationName);
		bool includeSequenceDefaults = true;

		MemoryContext oldContext = NULL;
		List *tableDDLEventList = NIL;

		/* create a function context for cross-call persistence */
		functionContext = SRF_FIRSTCALL_INIT();

		/* switch to memory context appropriate for multiple function calls */
		oldContext = MemoryContextSwitchTo(functionContext->multi_call_memory_ctx);

		/* allocate DDL statements, and then save position in DDL statements */
		tableDDLEventList = GetTableDDLEvents(relationId, includeSequenceDefaults);
		tableDDLEventCell = list_head(tableDDLEventList);

		functionContext->user_fctx = tableDDLEventCell;

		MemoryContextSwitchTo(oldContext);
	}

	/*
	 * On every call to this function, we get the current position in the
	 * statement list. We then iterate to the next position in the list and
	 * return the current statement, if we have not yet reached the end of
	 * list.
	 */
	functionContext = SRF_PERCALL_SETUP();

	tableDDLEventCell = (ListCell *) functionContext->user_fctx;
	if (tableDDLEventCell != NULL)
	{
		char *ddlStatement = (char *) lfirst(tableDDLEventCell);
		text *ddlStatementText = cstring_to_text(ddlStatement);

		functionContext->user_fctx = lnext(tableDDLEventCell);

		SRF_RETURN_NEXT(functionContext, PointerGetDatum(ddlStatementText));
	}
	else
	{
		SRF_RETURN_DONE(functionContext);
	}
}