Пример #1
0
/*
 * lock_shard_resources allows shard resources  to be locked
 * remotely to serialise non-commutative writes on shards.
 *
 * This function does not sort the array to avoid deadlock, callers
 * must ensure a consistent order.
 */
Datum
lock_shard_resources(PG_FUNCTION_ARGS)
{
	LOCKMODE lockMode = IntToLockMode(PG_GETARG_INT32(0));
	ArrayType *shardIdArrayObject = PG_GETARG_ARRAYTYPE_P(1);
	Datum *shardIdArrayDatum = NULL;
	int shardIdCount = 0;
	int shardIdIndex = 0;

	if (ARR_NDIM(shardIdArrayObject) == 0)
	{
		ereport(ERROR, (errmsg("no locks specified")));
	}

	/* we don't want random users to block writes */
	EnsureSuperUser();

	shardIdCount = ArrayObjectCount(shardIdArrayObject);
	shardIdArrayDatum = DeconstructArrayObject(shardIdArrayObject);

	for (shardIdIndex = 0; shardIdIndex < shardIdCount; shardIdIndex++)
	{
		int64 shardId = DatumGetInt64(shardIdArrayDatum[shardIdIndex]);

		LockShardResource(shardId, lockMode);
	}

	PG_RETURN_VOID();
}
Пример #2
0
/* Creates a list of cstrings from a single dimensional array object. */
static List *
ArrayObjectToCStringList(ArrayType *arrayObject)
{
	List *cstringList = NIL;
	Datum *datumArray = DeconstructArrayObject(arrayObject);
	int32 arraySize = ArrayObjectCount(arrayObject);

	int32 arrayIndex = 0;
	for (arrayIndex = 0; arrayIndex < arraySize; arrayIndex++)
	{
		Datum datum = datumArray[arrayIndex];
		char *cstring = TextDatumGetCString(datum);

		cstringList = lappend(cstringList, cstring);
	}

	Assert(cstringList != NIL);
	return cstringList;
}
Пример #3
0
/*
 * FetchTableCommon executes common logic that wraps around the actual data
 * fetching function. This common logic includes ensuring that only one process
 * tries to fetch this table at any given time, and that data fetch operations
 * are retried in case of node failures.
 */
static void
FetchTableCommon(text *tableNameText, uint64 remoteTableSize,
				 ArrayType *nodeNameObject, ArrayType *nodePortObject,
				 bool (*FetchTableFunction)(const char *, uint32, const char *))
{
	uint64 shardId = INVALID_SHARD_ID;
	Oid relationId = InvalidOid;
	List *relationNameList = NIL;
	RangeVar *relation = NULL;
	uint32 nodeIndex = 0;
	bool tableFetched = false;
	char *tableName = text_to_cstring(tableNameText);

	Datum *nodeNameArray = DeconstructArrayObject(nodeNameObject);
	Datum *nodePortArray = DeconstructArrayObject(nodePortObject);
	int32 nodeNameCount = ArrayObjectCount(nodeNameObject);
	int32 nodePortCount = ArrayObjectCount(nodePortObject);

	/* we should have the same number of node names and port numbers */
	if (nodeNameCount != nodePortCount)
	{
		ereport(ERROR, (errmsg("node name array size: %d and node port array size: %d"
							   " do not match", nodeNameCount, nodePortCount)));
	}

	/*
	 * We lock on the shardId, but do not unlock. When the function returns, and
	 * the transaction for this function commits, this lock will automatically
	 * be released. This ensures that concurrent caching commands will see the
	 * newly created table when they acquire the lock (in read committed mode).
	 */
	shardId = ExtractShardId(tableName);
	LockShardResource(shardId, AccessExclusiveLock);

	relationNameList = textToQualifiedNameList(tableNameText);
	relation = makeRangeVarFromNameList(relationNameList);
	relationId = RangeVarGetRelid(relation, NoLock, true);

	/* check if we already fetched the table */
	if (relationId != InvalidOid)
	{
		uint64 localTableSize = 0;

		if (!ExpireCachedShards)
		{
			return;
		}

		/*
		 * Check if the cached shard has the same size on disk as it has as on
		 * the placement (is up to date).
		 *
		 * Note 1: performing updates or deletes on the original shard leads to
		 * inconsistent sizes between different databases in which case the data
		 * would be fetched every time, or worse, the placement would get into
		 * a deadlock when it tries to fetch from itself while holding the lock.
		 * Therefore, this option is disabled by default.
		 *
		 * Note 2: when appending data to a shard, the size on disk only
		 * increases when a new page is added (the next 8kB block).
		 */
		localTableSize = LocalTableSize(relationId);

		if (remoteTableSize > localTableSize)
		{
			/* table is not up to date, drop the table */
			ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 };

			tableObject.classId = RelationRelationId;
			tableObject.objectId = relationId;
			tableObject.objectSubId = 0;

			performDeletion(&tableObject, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
		}
		else
		{
			/* table is up to date */
			return;
		}
	}

	/* loop until we fetch the table or try all nodes */
	while (!tableFetched && (nodeIndex < nodeNameCount))
	{
		Datum nodeNameDatum = nodeNameArray[nodeIndex];
		Datum nodePortDatum = nodePortArray[nodeIndex];
		char *nodeName = TextDatumGetCString(nodeNameDatum);
		uint32 nodePort = DatumGetUInt32(nodePortDatum);

		tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName);

		nodeIndex++;
	}

	/* error out if we tried all nodes and could not fetch the table */
	if (!tableFetched)
	{
		ereport(ERROR, (errmsg("could not fetch relation: \"%s\"", tableName)));
	}
}