コード例 #1
0
ファイル: cdbrelsize.c プロジェクト: 50wu/gpdb
/*
 * Get the max size of the relation across segments
 */
int64
cdbRelMaxSegSize(Relation rel)
{
	int64 size = 0;
	int i;
	CdbPgResults cdb_pgresults = {NULL, 0};
	StringInfoData buffer;

	char *schemaName;
	char *relName;

	/*
	 * Let's ask the QEs for the size of the relation
	 */
	initStringInfo(&buffer);

	schemaName = get_namespace_name(RelationGetNamespace(rel));
	if (schemaName == NULL)
		elog(ERROR, "cache lookup failed for namespace %d",
			 RelationGetNamespace(rel));
	relName = RelationGetRelationName(rel);

	/*
	 * Safer to pass names than oids, just in case they get out of sync between QD and QE,
	 * which might happen with a toast table or index, I think (but maybe not)
	 */
	appendStringInfo(&buffer, "select pg_relation_size('%s.%s')",
					 quote_identifier(schemaName), quote_identifier(relName));

	CdbDispatchCommand(buffer.data, DF_WITH_SNAPSHOT, &cdb_pgresults);

	for (i = 0; i < cdb_pgresults.numResults; i++)
	{
		struct pg_result * pgresult = cdb_pgresults.pg_results[i];
		if (PQresultStatus(pgresult) != PGRES_TUPLES_OK)
		{
			cdbdisp_clearCdbPgResults(&cdb_pgresults);
			elog(ERROR,"cdbRelMaxSegSize: resultStatus not tuples_Ok: %s %s",
				 PQresStatus(PQresultStatus(pgresult)),PQresultErrorMessage(pgresult));
		}
		else
		{
			Assert(PQntuples(pgresult) == 1);
			int64 tempsize = 0;
			(void) scanint8(PQgetvalue(pgresult, 0, 0), false, &tempsize);
			if (tempsize > size)
				size = tempsize;
		}
	}

	pfree(buffer.data);

	cdbdisp_clearCdbPgResults(&cdb_pgresults);

	return size;
}
コード例 #2
0
ファイル: resgroup_helper.c プロジェクト: adam8157/gpdb
static void
dumpResGroupInfo(StringInfo str)
{
	if (Gp_role == GP_ROLE_DISPATCH)
	{
		int               i;
		StringInfoData    str_qd;
		StringInfoData    buffer;
		CdbPgResults      cdb_pgresults = {NULL, 0};
		struct pg_result *pg_result;

		initStringInfo(&str_qd);
		initStringInfo(&buffer);
		appendStringInfo(&buffer,
						 "select * from pg_resgroup_get_status_kv('dump');");
		
		CdbDispatchCommand(buffer.data, 0, &cdb_pgresults);
		
		if (cdb_pgresults.numResults == 0)
			elog(ERROR, "dumpResGroupInfo didn't get back any results from the segDBs");
		
		LWLockAcquire(ResGroupLock, LW_EXCLUSIVE);
		ResGroupDumpInfo(&str_qd);
		LWLockRelease(ResGroupLock);

		/* append all qes and qd together to form str */
		appendStringInfo(str, "{\"info\":[%s,", str_qd.data);
		for (i = 0; i < cdb_pgresults.numResults; i++)
		{
			pg_result = cdb_pgresults.pg_results[i];
			if (PQresultStatus(pg_result) != PGRES_TUPLES_OK)
			{
				cdbdisp_clearCdbPgResults(&cdb_pgresults);
				elog(ERROR, "pg_resgroup_get_status_kv(): resultStatus not tuples_Ok");
			}
			Assert(PQntuples(pg_result) == 1);
			appendStringInfo(str, "%s", PQgetvalue(pg_result, 0, 2));
			if (i < cdb_pgresults.numResults - 1)
				appendStringInfo(str, ",");
		}
		appendStringInfo(str, "]}");
		cdbdisp_clearCdbPgResults(&cdb_pgresults);
	}
	else
	{
		LWLockAcquire(ResGroupLock, LW_EXCLUSIVE);
		ResGroupDumpInfo(str);
		LWLockRelease(ResGroupLock);
	}
}
コード例 #3
0
ファイル: dbsize.c プロジェクト: pf-qiu/gpdb
/*
 * Helper function to dispatch a size-returning command.
 *
 * Dispatches the given SQL query to segments, and sums up the results.
 * The query is expected to return one int8 value.
 */
int64
get_size_from_segDBs(const char *cmd)
{
	int64		result;
	CdbPgResults cdb_pgresults = {NULL, 0};
	int			i;

	Assert(Gp_role == GP_ROLE_DISPATCH);

	CdbDispatchCommand(cmd, DF_WITH_SNAPSHOT, &cdb_pgresults);

	result = 0;
	for (i = 0; i < cdb_pgresults.numResults; i++)
	{
		Datum		value;
		struct pg_result *pgresult = cdb_pgresults.pg_results[i];

		if (PQresultStatus(pgresult) != PGRES_TUPLES_OK)
		{
			cdbdisp_clearCdbPgResults(&cdb_pgresults);
			ereport(ERROR,
					(errmsg("unexpected result from segment: %d",
							PQresultStatus(pgresult))));
		}
		if (PQntuples(pgresult) != 1 || PQnfields(pgresult) != 1)
		{
			cdbdisp_clearCdbPgResults(&cdb_pgresults);
			ereport(ERROR,
					(errmsg("unexpected shape of result from segment (%d rows, %d cols)",
							PQntuples(pgresult), PQnfields(pgresult))));
		}
		if (PQgetisnull(pgresult, 0, 0))
			value = 0;
		else
			value = DirectFunctionCall1(int8in,
										CStringGetDatum(PQgetvalue(pgresult, 0, 0)));
		result += value;
	}

	cdbdisp_clearCdbPgResults(&cdb_pgresults);

	return result;
}
コード例 #4
0
ファイル: cdbsreh.c プロジェクト: LJoNe/gpdb
/*
 * Delete error log of the specified relation.  This returns true from master
 * iif all segments and master find the relation.
 */
Datum
gp_truncate_error_log(PG_FUNCTION_ARGS)
{
	text	   *relname;
	char	   *relname_str;
	RangeVar	   *relrv;
	Oid				relid;
	bool		allResults = true;

	relname = PG_GETARG_TEXT_P(0);

	relname_str = text_to_cstring(relname);
	if (strcmp(relname_str, "*.*") == 0)
	{
		/*
		 * Only superuser is allowed to delete log files across database.
		 */
		if (!superuser())
			ereport(ERROR,
					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
					(errmsg("must be superuser to delete all error log files"))));

		ErrorLogDelete(InvalidOid, InvalidOid);
	}
	else if (strcmp(relname_str, "*") == 0)
	{
		/*
		 * Database owner can delete error log files.
		 */
		if (!pg_database_ownercheck(MyDatabaseId, GetUserId()))
			aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE,
						   get_database_name(MyDatabaseId));

		ErrorLogDelete(MyDatabaseId, InvalidOid);
	}
	else
	{
		AclResult	aclresult;

		relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
		relid = RangeVarGetRelid(relrv, true);

		/* Return false if the relation does not exist. */
		if (!OidIsValid(relid))
			PG_RETURN_BOOL(false);

		/*
		 * Allow only the table owner to truncate error log.
		 */
		aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_TRUNCATE);
		if (aclresult != ACLCHECK_OK)
			aclcheck_error(aclresult, ACL_KIND_CLASS, relrv->relname);

		/* We don't care if this fails or not. */
		ErrorLogDelete(MyDatabaseId, relid);
	}

	/*
	 * Dispatch the work to segments.
	 */
	if (Gp_role == GP_ROLE_DISPATCH)
	{
		int			i = 0;
		StringInfoData	sql;
		CdbPgResults cdb_pgresults = {NULL, 0};

		initStringInfo(&sql);


		appendStringInfo(&sql,
						 "SELECT pg_catalog.gp_truncate_error_log(%s)",
						 quote_literal_internal(text_to_cstring(relname)));

		CdbDispatchCommand(sql.data, DF_WITH_SNAPSHOT, &cdb_pgresults);

		for (i = 0; i < cdb_pgresults.numResults; i++)
		{
			Datum		value;
			bool		isnull;
			struct pg_result *pgresult = cdb_pgresults.pg_results[i];

			if (PQresultStatus(pgresult) != PGRES_TUPLES_OK)
			{
				cdbdisp_clearCdbPgResults(&cdb_pgresults);
				ereport(ERROR,
						(errmsg("unexpected result from segment: %d",
								PQresultStatus(pgresult))));
			}
			value = ResultToDatum(pgresult, 0, 0, boolin, &isnull);
			allResults &= (!isnull && DatumGetBool(value));
		}

		cdbdisp_clearCdbPgResults(&cdb_pgresults);
		pfree(sql.data);
	}

	/* Return true iif all segments return true. */
	PG_RETURN_BOOL(allResults);
}
コード例 #5
0
ファイル: cdbsreh.c プロジェクト: LJoNe/gpdb
/*
 * gp_read_error_log
 *
 * Returns set of error log tuples.
 */
Datum
gp_read_error_log(PG_FUNCTION_ARGS)
{
	FuncCallContext	   *funcctx;
	ReadErrorLogContext *context;
	HeapTuple			tuple;
	Datum				result;

	/*
	 * First call setup
	 */
	if (SRF_IS_FIRSTCALL())
	{
		MemoryContext	oldcontext;
		FILE	   *fp;
		text	   *relname;

		funcctx = SRF_FIRSTCALL_INIT();

		relname = PG_GETARG_TEXT_P(0);
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		context = palloc0(sizeof(ReadErrorLogContext));
		funcctx->user_fctx = (void *) context;

		funcctx->tuple_desc = BlessTupleDesc(GetErrorTupleDesc());

		/*
		 * Though this function is usually executed on segment, we dispatch
		 * the execution if it happens to be on QD, and combine the results
		 * into one set.
		 */
		if (Gp_role == GP_ROLE_DISPATCH)
		{
			struct CdbPgResults cdb_pgresults = {NULL, 0};
			StringInfoData sql;

			int		i;

			initStringInfo(&sql);
			/*
			 * construct SQL
			 */
			appendStringInfo(&sql,
					"SELECT * FROM pg_catalog.gp_read_error_log(%s) ",
							 quote_literal_internal(text_to_cstring(relname)));

			CdbDispatchCommand(sql.data, DF_WITH_SNAPSHOT, &cdb_pgresults);

			for (i = 0; i < cdb_pgresults.numResults; i++)
			{
				if (PQresultStatus(cdb_pgresults.pg_results[i]) != PGRES_TUPLES_OK)
				{
					cdbdisp_clearCdbPgResults(&cdb_pgresults);
					elog(ERROR, "unexpected result from segment: %d",
								PQresultStatus(cdb_pgresults.pg_results[i]));
				}
				context->numTuples += PQntuples(cdb_pgresults.pg_results[i]);
			}

			pfree(sql.data);

			context->segResults = cdb_pgresults.pg_results;
			context->numSegResults = cdb_pgresults.numResults;
		}
		else
		{
			/*
			 * In QE, read the error log.
			 */
			RangeVar	   *relrv;
			Oid				relid;

			relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
			relid = RangeVarGetRelid(relrv, true);

			/*
			 * If the relation has gone, silently return no tuples.
			 */
			if (OidIsValid(relid))
			{
				AclResult aclresult;

				/*
				 * Requires SELECT priv to read error log.
				 */
				aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
				if (aclresult != ACLCHECK_OK)
					aclcheck_error(aclresult, ACL_KIND_CLASS, relrv->relname);

				ErrorLogFileName(context->filename, MyDatabaseId, relid);
				fp = AllocateFile(context->filename, "r");
				context->fp = fp;
			}
		}

		MemoryContextSwitchTo(oldcontext);

		if (Gp_role != GP_ROLE_DISPATCH && !context->fp)
		{
			pfree(context);
			SRF_RETURN_DONE(funcctx);
		}
	}

	funcctx = SRF_PERCALL_SETUP();
	context = (ReadErrorLogContext *) funcctx->user_fctx;

	/*
	 * Read error log, probably on segments.  We don't check Gp_role, however,
	 * in case master also wants to read the file.
	 */
	if (context->fp)
	{
		pg_crc32	crc, written_crc;
		tuple = ErrorLogRead(context->fp, &written_crc);

		/*
		 * CRC check.
		 */
		if (HeapTupleIsValid(tuple))
		{
			INIT_CRC32C(crc);
			COMP_CRC32C(crc, tuple->t_data, tuple->t_len);
			FIN_CRC32C(crc);

			if (!EQ_CRC32C(crc, written_crc))
			{
				elog(LOG, "incorrect checksum in error log %s",
						  context->filename);
				tuple = NULL;
			}
		}

		/*
		 * If we found a valid tuple, return it.  Otherwise, fall through
		 * in the DONE routine.
		 */
		if (HeapTupleIsValid(tuple))
		{
			/*
			 * We need to set typmod for the executor to understand
			 * its type we just blessed.
			 */
			HeapTupleHeaderSetTypMod(tuple->t_data,
									 funcctx->tuple_desc->tdtypmod);

			result = HeapTupleGetDatum(tuple);
			SRF_RETURN_NEXT(funcctx, result);
		}
	}

	/*
	 * If we got results from dispatch, return all the tuples.
	 */
	while (context->currentResult < context->numSegResults)
	{
		Datum		values[NUM_ERRORTABLE_ATTR];
		bool		isnull[NUM_ERRORTABLE_ATTR];
		PGresult   *segres = context->segResults[context->currentResult];
		int			row = context->currentRow;

		if (row >= PQntuples(segres))
		{
			context->currentRow = 0;
			context->currentResult++;
			continue;
		}
		context->currentRow++;

		MemSet(isnull, false, sizeof(isnull));

		values[0] = ResultToDatum(segres, row, 0, timestamptz_in, &isnull[0]);
		values[1] = ResultToDatum(segres, row, 1, textin, &isnull[1]);
		values[2] = ResultToDatum(segres, row, 2, textin, &isnull[2]);
		values[3] = ResultToDatum(segres, row, 3, int4in, &isnull[3]);
		values[4] = ResultToDatum(segres, row, 4, int4in, &isnull[4]);
		values[5] = ResultToDatum(segres, row, 5, textin, &isnull[5]);
		values[6] = ResultToDatum(segres, row, 6, textin, &isnull[6]);
		values[7] = ResultToDatum(segres, row, 7, byteain, &isnull[7]);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, isnull);
		result = HeapTupleGetDatum(tuple);

		SRF_RETURN_NEXT(funcctx, result);
	}

	if (context->segResults != NULL)
	{
		int		i;

		for (i = 0; i < context->numSegResults; i++)
			PQclear(context->segResults[i]);

		/* XXX: better to copy to palloc'ed area */
		free(context->segResults);
	}

	/*
	 * Close the file, if we have opened it.
	 */
	if (context->fp != NULL)
	{
		FreeFile(context->fp);
		context->fp = NULL;
	}

	SRF_RETURN_DONE(funcctx);
}
コード例 #6
0
ファイル: resgroup_helper.c プロジェクト: adam8157/gpdb
/*
 * Get resource usage.
 *
 * On QD this function dispatch the request to all QEs, collecting both
 * QEs' and QD's resource usage.
 *
 * On QE this function only collect the resource usage on itself.
 *
 * Memory & cpu usage are returned in JSON format.
 */
static void
getResUsage(ResGroupStatCtx *ctx, Oid inGroupId)
{
	int64 *usages;
	TimestampTz *timestamps;
	int i, j;

	usages = palloc(sizeof(*usages) * ctx->nGroups);
	timestamps = palloc(sizeof(*timestamps) * ctx->nGroups);

	for (j = 0; j < ctx->nGroups; j++)
	{
		ResGroupStat *row = &ctx->groups[j];
		Oid groupId = DatumGetObjectId(row->groupId);

		usages[j] = ResGroupOps_GetCpuUsage(groupId);
		timestamps[j] = GetCurrentTimestamp();
	}

	if (Gp_role == GP_ROLE_DISPATCH)
	{
		CdbPgResults cdb_pgresults = {NULL, 0};
		StringInfoData buffer;

		initStringInfo(&buffer);
		appendStringInfo(&buffer,
						 "SELECT groupid, cpu_usage, memory_usage "
						 "FROM pg_resgroup_get_status(%d)",
						 inGroupId);

		CdbDispatchCommand(buffer.data, DF_WITH_SNAPSHOT, &cdb_pgresults);

		if (cdb_pgresults.numResults == 0)
			elog(ERROR, "pg_resgroup_get_status() didn't get back any resource statistic from the segDBs");

		for (i = 0; i < cdb_pgresults.numResults; i++)
		{
			struct pg_result *pg_result = cdb_pgresults.pg_results[i];

			/*
			 * Any error here should have propagated into errbuf, so we shouldn't
			 * ever see anything other that tuples_ok here.  But, check to be
			 * sure.
			 */
			if (PQresultStatus(pg_result) != PGRES_TUPLES_OK)
			{
				cdbdisp_clearCdbPgResults(&cdb_pgresults);
				elog(ERROR, "pg_resgroup_get_status(): resultStatus not tuples_Ok");
			}

			Assert(PQntuples(pg_result) == ctx->nGroups);
			for (j = 0; j < ctx->nGroups; j++)
			{
				const char *result;
				ResGroupStat *row = &ctx->groups[j];
				Oid groupId = pg_atoi(PQgetvalue(pg_result, j, 0),
									  sizeof(Oid), 0);

				Assert(groupId == row->groupId);

				if (row->memUsage->len == 0)
				{
					Datum d = ResGroupGetStat(groupId, RES_GROUP_STAT_MEM_USAGE);

					row->groupId = groupId;
					appendStringInfo(row->memUsage, "{\"%d\":%s",
									 GpIdentity.segindex, DatumGetCString(d));

					appendStringInfo(row->cpuUsage, "{");
					calcCpuUsage(row->cpuUsage, usages[j], timestamps[j],
								 ResGroupOps_GetCpuUsage(groupId),
								 GetCurrentTimestamp());
				}

				result = PQgetvalue(pg_result, j, 1);
				appendStringInfo(row->cpuUsage, ", %s", result);

				result = PQgetvalue(pg_result, j, 2);
				appendStringInfo(row->memUsage, ", %s", result);

				if (i == cdb_pgresults.numResults - 1)
				{
					appendStringInfoChar(row->cpuUsage, '}');
					appendStringInfoChar(row->memUsage, '}');
				}
			}
		}

		cdbdisp_clearCdbPgResults(&cdb_pgresults);
	}
	else
	{
		pg_usleep(300000);

		for (j = 0; j < ctx->nGroups; j++)
		{
			ResGroupStat *row = &ctx->groups[j];
			Oid groupId = DatumGetObjectId(row->groupId);
			Datum d = ResGroupGetStat(groupId, RES_GROUP_STAT_MEM_USAGE);

			appendStringInfo(row->memUsage, "\"%d\":%s",
							 GpIdentity.segindex, DatumGetCString(d));

			calcCpuUsage(row->cpuUsage, usages[j], timestamps[j],
						 ResGroupOps_GetCpuUsage(groupId),
						 GetCurrentTimestamp());
		}
	}
}
コード例 #7
0
ファイル: backoff.c プロジェクト: adam8157/gpdb
/**
 * An interface to re-weigh an existing session on the master and all backends.
 * Input:
 *	session id - what session is statement on?
 *	command count - what is the command count of statement.
 *	weight - int, what should be the new priority of this statement.
 * Output:
 *	number of backends whose weights were changed by this call.
 */
Datum
gp_adjust_priority_int(PG_FUNCTION_ARGS)
{
	int32		session_id = PG_GETARG_INT32(0);
	int32		command_count = PG_GETARG_INT32(1);
	int32		wt = PG_GETARG_INT32(2);
	int			numfound = 0;
	StatementId sid;

	if (!gp_enable_resqueue_priority)
		elog(ERROR, "Query prioritization is disabled.");

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("only superuser can re-prioritize a query after it has begun execution"))));

	if (Gp_role == GP_ROLE_UTILITY)
		elog(ERROR, "Query prioritization does not work in utility mode.");

	if (wt <= 0)
		elog(ERROR, "Weight of statement must be greater than 0.");

	init(&sid, session_id, command_count);

	if (Gp_role == GP_ROLE_DISPATCH)
	{
		int			i = 0;
		CdbPgResults cdb_pgresults = {NULL, 0};
		char		cmd[255];

		/*
		 * Make sure the session exists before dispatching
		 */
		for (i = 0; i < backoffSingleton->numEntries; i++)
		{
			BackoffBackendSharedEntry *se = getBackoffEntryRW(i);

			if (equalStatementId(&se->statementId, &sid))
			{
				if (gp_debug_resqueue_priority)
				{
					elog(LOG, "changing weight of (%d:%d) from %d to %d", se->statementId.sessionId, se->statementId.commandCount, se->weight, wt);
				}

				se->weight = wt;
				numfound++;
			}
		}

		if (numfound == 0)
			elog(ERROR, "Did not find any backend entries for session %d, command count %d.", session_id, command_count);

		/*
		 * Ok, it exists, dispatch the command to the segDBs.
		 */
		sprintf(cmd, "select gp_adjust_priority(%d,%d,%d)", session_id, command_count, wt);

		CdbDispatchCommand(cmd, DF_WITH_SNAPSHOT, &cdb_pgresults);

		for (i = 0; i < cdb_pgresults.numResults; i++)
		{
			struct pg_result *pgresult = cdb_pgresults.pg_results[i];

			if (PQresultStatus(pgresult) != PGRES_TUPLES_OK)
			{
				cdbdisp_clearCdbPgResults(&cdb_pgresults);
				elog(ERROR, "gp_adjust_priority: resultStatus not tuples_Ok");
			}
			else
			{

				int			j;

				for (j = 0; j < PQntuples(pgresult); j++)
				{
					int			retvalue = 0;

					retvalue = atoi(PQgetvalue(pgresult, j, 0));
					numfound += retvalue;
				}
			}
		}

		cdbdisp_clearCdbPgResults(&cdb_pgresults);

	}
	else	/* Gp_role == EXECUTE */
	{
		/*
		 * Find number of backends working on behalf of this session and
		 * distribute the weight evenly.
		 */
		int			i = 0;

		Assert(Gp_role == GP_ROLE_EXECUTE);
		for (i = 0; i < backoffSingleton->numEntries; i++)
		{
			BackoffBackendSharedEntry *se = getBackoffEntryRW(i);

			if (equalStatementId(&se->statementId, &sid))
			{
				if (gp_debug_resqueue_priority)
				{
					elog(LOG, "changing weight of (%d:%d) from %d to %d", se->statementId.sessionId, se->statementId.commandCount, se->weight, wt);
				}
				se->weight = wt;
				numfound++;
			}
		}

		if (gp_debug_resqueue_priority && numfound == 0)
		{
			elog(LOG, "did not find any matching backends on segments");
		}
	}

	PG_RETURN_INT32(numfound);
}
コード例 #8
0
ファイル: lockfuncs.c プロジェクト: adam8157/gpdb
/*
 * pg_lock_status - produce a view with one row per held or awaited lock mode
 */
Datum
pg_lock_status(PG_FUNCTION_ARGS)
{
	FuncCallContext *funcctx;
	PG_Lock_Status *mystatus;
	LockData   *lockData;
	PredicateLockData *predLockData;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc	tupdesc;
		MemoryContext oldcontext;

		/* create a function context for cross-call persistence */
		funcctx = SRF_FIRSTCALL_INIT();

		/*
		 * switch to memory context appropriate for multiple function calls
		 */
		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

		/* build tupdesc for result tuples */
		/* this had better match pg_locks view in system_views.sql */
		tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype",
						   TEXTOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple",
						   INT2OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid",
						   TEXTOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid",
						   XIDOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid",
						   OIDOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid",
						   INT2OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction",
						   TEXTOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode",
						   TEXTOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted",
						   BOOLOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath",
						   BOOLOID, -1, 0);
		/*
		 * These next columns are specific to GPDB
		 */
		TupleDescInitEntry(tupdesc, (AttrNumber) 16, "mppSessionId",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 17, "mppIsWriter",
						   BOOLOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 18, "gp_segment_id",
						   INT4OID, -1, 0);

		funcctx->tuple_desc = BlessTupleDesc(tupdesc);

		/*
		 * Collect all the locking information that we will format and send
		 * out as a result set.
		 */
		mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status));
		funcctx->user_fctx = (void *) mystatus;

		mystatus->lockData = GetLockStatusData();
		mystatus->currIdx = 0;
		mystatus->predLockData = GetPredicateLockStatusData();
		mystatus->predLockIdx = 0;

		mystatus->numSegLocks = 0;
		mystatus->numsegresults = 0;
		mystatus->segresults = NULL;

		/*
		 * Seeing the locks just from the masterDB isn't enough to know what is locked,
		 * or if there is a deadlock.  That's because the segDBs also take locks.
		 * Some locks show up only on the master, some only on the segDBs, and some on both.
		 *
		 * So, let's collect the lock information from all the segDBs.  Sure, this means
		 * there are a lot more rows coming back from pg_locks than before, since most locks
		 * on the segDBs happen across all the segDBs at the same time.  But not always,
		 * so let's play it safe and get them all.
		 */

		if (Gp_role == GP_ROLE_DISPATCH)
		{
			CdbPgResults cdb_pgresults = {NULL, 0};
			StringInfoData buffer;
			int i;
			initStringInfo(&buffer);

			/*
			 * Why dispatch something here, rather than do a UNION ALL in pg_locks view, and
			 * a join to gp_dist_random('gp_id')?  There are several important reasons.
			 *
			 * The union all method is much slower, and requires taking locks on gp_id.
			 * More importantly, applications such as pgAdmin do queries of this view that
			 * involve a correlated subqueries joining to other catalog tables,
			 * which works if we do it this way, but fails
			 * if the view includes the union all.  That completely breaks the server status
			 * display in pgAdmin.
			 *
			 * Why dispatch this way, rather than via SPI?  There are several advantages.
			 * First, it's easy to get "writer gang is busy" errors if we use SPI.
			 *
			 * Second, this should be much faster, as it doesn't require setting up
			 * the interconnect, and doesn't need to touch any actual data tables to be
			 * able to get the gp_segment_id.
			 *
			 * The downside is we get n result sets, where n == number of segDBs.
			 *
			 * It would be better yet if we sent a plan tree rather than a text string,
			 * so the segDBs don't need to parse it.  That would also avoid taking any relation locks
			 * on the segDB to get this info (normally need to get an accessShareLock on pg_locks on the segDB
			 * to make sure it doesn't go away during parsing).  But the only safe way I know to do this
			 * is to hand-build the plan tree, and I'm to lazy to do it right now. It's just a matter of
			 * building a function scan node, and filling it in with our result set info (from the tupledesc).
			 *
			 * One thing to note:  it's OK to join pg_locks with any catalog table or master-only table,
			 * but joining to a distributed table will result in "writer gang busy: possible attempt to
			 * execute volatile function in unsupported context" errors, because
			 * the scan of the distributed table might already be running on the writer gang
			 * when we want to dispatch this.
			 *
			 * This could be fixed by allocating a reader gang and dispatching to that, but the cost
			 * of setting up a new gang is high, and I've never seen anyone need to join this to a
			 * distributed table.
			 *
			 * GPDB_84_MERGE_FIXME: Should we rewrite this in a different way now that we have
			 * ON SEGMENT/ ON MASTER attributes on functions?
			 */
			CdbDispatchCommand("SELECT * FROM pg_catalog.pg_lock_status()", DF_WITH_SNAPSHOT, &cdb_pgresults);

			if (cdb_pgresults.numResults == 0)
				elog(ERROR, "pg_locks didn't get back any data from the segDBs");

			for (i = 0; i < cdb_pgresults.numResults; i++)
			{
				/*
				 * Any error here should have propagated into errbuf, so we shouldn't
				 * ever see anything other that tuples_ok here.  But, check to be
				 * sure.
				 */
				if (PQresultStatus(cdb_pgresults.pg_results[i]) != PGRES_TUPLES_OK)
				{
					cdbdisp_clearCdbPgResults(&cdb_pgresults);
					elog(ERROR,"pg_locks: resultStatus not tuples_Ok");
				}

				/*
				 * numSegLocks needs to be the total size we are returning to
				 * the application. At the start of this loop, it has the count
				 * for the masterDB locks.  Add each of the segDB lock counts.
				 */
				mystatus->numSegLocks += PQntuples(cdb_pgresults.pg_results[i]);

				/*
				 * This query better match the tupledesc we just made above.
				 */
				if (PQnfields(cdb_pgresults.pg_results[i]) != tupdesc->natts)
					elog(ERROR, "unexpected number of columns returned from pg_lock_status() on segment (%d, expected %d)",
						 PQnfields(cdb_pgresults.pg_results[i]), tupdesc->natts);
			}

			mystatus->numsegresults = cdb_pgresults.numResults;
			/*
			 * cdbdisp_dispatchRMCommand copies the result sets into our memory, which
			 * will still exist on the subsequent calls.
			 */
			mystatus->segresults = cdb_pgresults.pg_results;
		}

		MemoryContextSwitchTo(oldcontext);
	}

	funcctx = SRF_PERCALL_SETUP();
	mystatus = (PG_Lock_Status *) funcctx->user_fctx;
	lockData = mystatus->lockData;

	/*
	 * This loop returns all the local lock data from the segment we are running on.
	 */

	while (mystatus->currIdx < lockData->nelements)
	{
		bool		granted;
		LOCKMODE	mode = 0;
		const char *locktypename;
		char		tnbuf[32];
		Datum		values[NUM_LOCK_STATUS_COLUMNS];
		bool		nulls[NUM_LOCK_STATUS_COLUMNS];
		HeapTuple	tuple;
		Datum		result;
		LockInstanceData *instance;

		instance = &(lockData->locks[mystatus->currIdx]);

		/*
		 * Look to see if there are any held lock modes in this PROCLOCK. If
		 * so, report, and destructively modify lockData so we don't report
		 * again.
		 */
		granted = false;
		if (instance->holdMask)
		{
			for (mode = 0; mode < MAX_LOCKMODES; mode++)
			{
				if (instance->holdMask & LOCKBIT_ON(mode))
				{
					granted = true;
					instance->holdMask &= LOCKBIT_OFF(mode);
					break;
				}
			}
		}

		/*
		 * If no (more) held modes to report, see if PROC is waiting for a
		 * lock on this lock.
		 */
		if (!granted)
		{
			if (instance->waitLockMode != NoLock)
			{
				/* Yes, so report it with proper mode */
				mode = instance->waitLockMode;

				/*
				 * We are now done with this PROCLOCK, so advance pointer to
				 * continue with next one on next call.
				 */
				mystatus->currIdx++;
			}
			else
			{
				/*
				 * Okay, we've displayed all the locks associated with this
				 * PROCLOCK, proceed to the next one.
				 */
				mystatus->currIdx++;
				continue;
			}
		}

		/*
		 * Form tuple with appropriate data.
		 */
		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));

		if (instance->locktag.locktag_type <= LOCKTAG_LAST_TYPE)
			locktypename = LockTagTypeNames[instance->locktag.locktag_type];
		else
		{
			snprintf(tnbuf, sizeof(tnbuf), "unknown %d",
					 (int) instance->locktag.locktag_type);
			locktypename = tnbuf;
		}
		values[0] = CStringGetTextDatum(locktypename);

		switch ((LockTagType) instance->locktag.locktag_type)
		{
			case LOCKTAG_RELATION:
			case LOCKTAG_RELATION_EXTEND:
				values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
				values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
				nulls[3] = true;
				nulls[4] = true;
				nulls[5] = true;
				nulls[6] = true;
				nulls[7] = true;
				nulls[8] = true;
				nulls[9] = true;
				break;
			case LOCKTAG_PAGE:
				values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
				values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
				values[3] = UInt32GetDatum(instance->locktag.locktag_field3);
				nulls[4] = true;
				nulls[5] = true;
				nulls[6] = true;
				nulls[7] = true;
				nulls[8] = true;
				nulls[9] = true;
				break;
			case LOCKTAG_TUPLE:
				values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
				values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
				values[3] = UInt32GetDatum(instance->locktag.locktag_field3);
				values[4] = UInt16GetDatum(instance->locktag.locktag_field4);
				nulls[5] = true;
				nulls[6] = true;
				nulls[7] = true;
				nulls[8] = true;
				nulls[9] = true;
				break;
			case LOCKTAG_TRANSACTION:
				values[6] =
					TransactionIdGetDatum(instance->locktag.locktag_field1);
				nulls[1] = true;
				nulls[2] = true;
				nulls[3] = true;
				nulls[4] = true;
				nulls[5] = true;
				nulls[7] = true;
				nulls[8] = true;
				nulls[9] = true;
				break;
			case LOCKTAG_VIRTUALTRANSACTION:
				values[5] = VXIDGetDatum(instance->locktag.locktag_field1,
										 instance->locktag.locktag_field2);
				nulls[1] = true;
				nulls[2] = true;
				nulls[3] = true;
				nulls[4] = true;
				nulls[6] = true;
				nulls[7] = true;
				nulls[8] = true;
				nulls[9] = true;
				break;
			case LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE:
				values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
				values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
				values[7] = ObjectIdGetDatum(instance->locktag.locktag_field3);
				nulls[3] = true;
				nulls[4] = true;
				nulls[5] = true;
				nulls[6] = true;
				nulls[8] = true;
				nulls[9] = true;
				break;
			case LOCKTAG_RESOURCE_QUEUE:
#if 0
				values[1] = ObjectIdGetDatum(proc->databaseId);
#endif
				nulls[1] = true;
				values[8] = ObjectIdGetDatum(instance->locktag.locktag_field1);
				nulls[2] = true;
				nulls[3] = true;
				nulls[4] = true;
				nulls[5] = true;
				nulls[6] = true;
				nulls[7] = true;
				nulls[9] = true;
				break;
			case LOCKTAG_OBJECT:
			case LOCKTAG_USERLOCK:
			case LOCKTAG_ADVISORY:
			default:			/* treat unknown locktags like OBJECT */
				values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
				values[7] = ObjectIdGetDatum(instance->locktag.locktag_field2);
				values[8] = ObjectIdGetDatum(instance->locktag.locktag_field3);
				values[9] = Int16GetDatum(instance->locktag.locktag_field4);
				nulls[2] = true;
				nulls[3] = true;
				nulls[4] = true;
				nulls[5] = true;
				nulls[6] = true;
				break;
		}

		values[10] = VXIDGetDatum(instance->backend, instance->lxid);
		if (instance->pid != 0)
			values[11] = Int32GetDatum(instance->pid);
		else
			nulls[11] = true;
		values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode));
		values[13] = BoolGetDatum(granted);
		values[14] = BoolGetDatum(instance->fastpath);
		
		values[15] = Int32GetDatum(instance->mppSessionId);

		values[16] = BoolGetDatum(instance->mppIsWriter);

		values[17] = Int32GetDatum(GpIdentity.segindex);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);
		SRF_RETURN_NEXT(funcctx, result);
	}

	/*
	 * This loop only executes on the masterDB and only in dispatch mode, because that
	 * is the only time we dispatched to the segDBs.
	 */

	while (mystatus->currIdx >= lockData->nelements && mystatus->currIdx < lockData->nelements + mystatus->numSegLocks)
	{
		HeapTuple	tuple;
		Datum		result;
		Datum		values[NUM_LOCK_STATUS_COLUMNS];
		bool		nulls[NUM_LOCK_STATUS_COLUMNS];
		int i;
		int whichresultset = 0;
		int whichelement = mystatus->currIdx - lockData->nelements;
		int whichrow = whichelement;

		Assert(Gp_role == GP_ROLE_DISPATCH);

		/*
		 * Because we have one result set per segDB (rather than one big result set with everything),
		 * we need to figure out which result set we are on, and which row within that result set
		 * we are returning.
		 *
		 * So, we walk through all the result sets and all the rows in each one, in order.
		 */

		while(whichrow >= PQntuples(mystatus->segresults[whichresultset]))
		{
			whichrow -= PQntuples(mystatus->segresults[whichresultset]);
			whichresultset++;
			if (whichresultset >= mystatus->numsegresults)
				break;
		}

		/*
		 * If this condition is true, we have already sent everything back,
		 * and we just want to do the SRF_RETURN_DONE
		 */
		if (whichresultset >= mystatus->numsegresults)
			break;

		mystatus->currIdx++;

		/*
		 * Form tuple with appropriate data we got from the segDBs
		 */
		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));

		/*
		 * For each column, extract out the value (which comes out in text).
		 * Convert it to the appropriate datatype to match our tupledesc,
		 * and put that in values.
		 * The columns look like this (from select statement earlier):
		 *
		 * "   (locktype text, database oid, relation oid, page int4, tuple int2,"
		 *	"   transactionid xid, classid oid, objid oid, objsubid int2,"
		 *	"    transaction xid, pid int4, mode text, granted boolean, "
		 *	"    mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) ,"
		 */

		values[0] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 0));
		values[1] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 1)));
		values[2] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 2)));
		values[3] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 3)));
		values[4] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 4)));

		values[5] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 5));
		values[6] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 6)));
		values[7] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 7)));
		values[8] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 8)));
		values[9] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 9)));

		values[10] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 10));
		values[11] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 11)));
		values[12] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 12));
		values[13] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,13),"t",1)==0);
		values[14] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,14),"t",1)==0);
		values[15] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,15)));
		values[16] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,16),"t",1)==0);
		values[17] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,17)));

		/*
		 * Copy the null info over.  It should all match properly.
		 */
		for (i = 0; i < NUM_LOCK_STATUS_COLUMNS; i++)
		{
			nulls[i] = PQgetisnull(mystatus->segresults[whichresultset], whichrow, i);
		}

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);
		SRF_RETURN_NEXT(funcctx, result);
	}

	/*
	 * Have returned all regular locks. Now start on the SIREAD predicate
	 * locks.
	 */
	predLockData = mystatus->predLockData;
	if (mystatus->predLockIdx < predLockData->nelements)
	{
		PredicateLockTargetType lockType;

		PREDICATELOCKTARGETTAG *predTag = &(predLockData->locktags[mystatus->predLockIdx]);
		SERIALIZABLEXACT *xact = &(predLockData->xacts[mystatus->predLockIdx]);
		Datum		values[NUM_LOCK_STATUS_COLUMNS];
		bool		nulls[NUM_LOCK_STATUS_COLUMNS];
		HeapTuple	tuple;
		Datum		result;

		mystatus->predLockIdx++;

		/*
		 * Form tuple with appropriate data.
		 */
		MemSet(values, 0, sizeof(values));
		MemSet(nulls, false, sizeof(nulls));

		/* lock type */
		lockType = GET_PREDICATELOCKTARGETTAG_TYPE(*predTag);

		values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]);

		/* lock target */
		values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag);
		values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag);
		if (lockType == PREDLOCKTAG_TUPLE)
			values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag);
		else
			nulls[4] = true;
		if ((lockType == PREDLOCKTAG_TUPLE) ||
			(lockType == PREDLOCKTAG_PAGE))
			values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag);
		else
			nulls[3] = true;

		/* these fields are targets for other types of locks */
		nulls[5] = true;		/* virtualxid */
		nulls[6] = true;		/* transactionid */
		nulls[7] = true;		/* classid */
		nulls[8] = true;		/* objid */
		nulls[9] = true;		/* objsubid */

		/* lock holder */
		values[10] = VXIDGetDatum(xact->vxid.backendId,
								  xact->vxid.localTransactionId);
		if (xact->pid != 0)
			values[11] = Int32GetDatum(xact->pid);
		else
			nulls[11] = true;

		/*
		 * Lock mode. Currently all predicate locks are SIReadLocks, which are
		 * always held (never waiting) and have no fast path
		 */
		values[12] = CStringGetTextDatum("SIReadLock");
		values[13] = BoolGetDatum(true);
		values[14] = BoolGetDatum(false);

		/*
		 * GPDB_91_MERGE_FIXME: what to set these GPDB-specific fields to?
		 * These commented-out values are copy-pasted from the code above
		 * for normal locks.
		 */
		//values[14] = Int32GetDatum(proc->mppSessionId);
		//values[15] = BoolGetDatum(proc->mppIsWriter);
		//values[16] = Int32GetDatum(Gp_segment);

		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
		result = HeapTupleGetDatum(tuple);
		SRF_RETURN_NEXT(funcctx, result);
	}

	/*
	 * if we dispatched to the segDBs, free up the memory holding the result sets.
	 * Otherwise we might leak this memory each time we got called (does it automatically
	 * get freed by the pool being deleted?  Probably, but this is safer).
	 */
	if (mystatus->segresults != NULL)
	{
		int i;
		for (i = 0; i < mystatus->numsegresults; i++)
			PQclear(mystatus->segresults[i]);

		free(mystatus->segresults);
	}

	SRF_RETURN_DONE(funcctx);
}