/*
 * worker_fetch_query_results_file fetches a query results file from the remote
 * node. The function assumes an upstream compute task depends on this query
 * results file, and therefore directly fetches the file into the upstream
 * task's directory.
 */
Datum
worker_fetch_query_results_file(PG_FUNCTION_ARGS)
{
	uint64 jobId = PG_GETARG_INT64(0);
	uint32 queryTaskId = PG_GETARG_UINT32(1);
	uint32 upstreamTaskId = PG_GETARG_UINT32(2);
	text *nodeNameText = PG_GETARG_TEXT_P(3);
	uint32 nodePort = PG_GETARG_UINT32(4);
	char *nodeName = NULL;

	/* remote filename is <jobId>/<queryTaskId> */
	StringInfo remoteDirectoryName = JobDirectoryName(jobId);
	StringInfo remoteFilename = TaskFilename(remoteDirectoryName, queryTaskId);

	/* local filename is <jobId>/<upstreamTaskId>/<queryTaskId> */
	StringInfo taskDirectoryName = TaskDirectoryName(jobId, upstreamTaskId);
	StringInfo taskFilename = TaskFilename(taskDirectoryName, queryTaskId);

	/*
	 * If we are the first function to fetch a file for the upstream task, the
	 * task directory does not exist. We then lock and create the directory.
	 */
	bool taskDirectoryExists = DirectoryExists(taskDirectoryName);
	if (!taskDirectoryExists)
	{
		InitTaskDirectory(jobId, upstreamTaskId);
	}

	nodeName = text_to_cstring(nodeNameText);
	FetchRegularFile(nodeName, nodePort, remoteFilename, taskFilename);

	PG_RETURN_VOID();
}
/*
 * task_tracker_cleanup_job finds all tasks for the given job, and cleans up
 * files, connections, and shared hash enties associated with these tasks.
 */
Datum
task_tracker_cleanup_job(PG_FUNCTION_ARGS)
{
	uint64 jobId = PG_GETARG_INT64(0);

	HASH_SEQ_STATUS status;
	WorkerTask *currentTask = NULL;
	StringInfo jobDirectoryName = NULL;
	StringInfo jobSchemaName = NULL;

	/*
	 * We first clean up any open connections, and remove tasks belonging to
	 * this job from the shared hash.
	 */
	LWLockAcquire(&WorkerTasksSharedState->taskHashLock, LW_EXCLUSIVE);

	hash_seq_init(&status, WorkerTasksSharedState->taskHash);

	currentTask = (WorkerTask *) hash_seq_search(&status);
	while (currentTask != NULL)
	{
		if (currentTask->jobId == jobId)
		{
			CleanupTask(currentTask);
		}

		currentTask = (WorkerTask *) hash_seq_search(&status);
	}

	LWLockRelease(&WorkerTasksSharedState->taskHashLock);

	/*
	 * We then delete the job directory and schema, if they exist. This cleans
	 * up all intermediate files and tables allocated for the job. Note that the
	 * schema drop call can block if another process is creating the schema or
	 * writing to a table within the schema.
	 */
	jobDirectoryName = JobDirectoryName(jobId);
	RemoveDirectory(jobDirectoryName);

	LockJobResource(jobId, AccessExclusiveLock);
	jobSchemaName = JobSchemaName(jobId);
	RemoveJobSchema(jobSchemaName);
	UnlockJobResource(jobId, AccessExclusiveLock);

	PG_RETURN_VOID();
}
Exemple #3
0
/*
 * multi_ExecutorStart is a hook called at at the beginning of any execution
 * of any query plan.
 *
 * If a distributed relation is the target of the query, perform some validity
 * checks. If a legal statement, start the distributed execution. After that
 * the to-be-executed query is replaced with the portion executing solely on
 * the master.
 */
void
multi_ExecutorStart(QueryDesc *queryDesc, int eflags)
{
	PlannedStmt *planStatement = queryDesc->plannedstmt;

	if (HasCitusToplevelNode(planStatement))
	{
		MultiPlan *multiPlan = GetMultiPlan(planStatement);
		MultiExecutorType executorType = MULTI_EXECUTOR_INVALID_FIRST;
		Job *workerJob = multiPlan->workerJob;

		ExecCheckRTPerms(planStatement->rtable, true);

		executorType = JobExecutorType(multiPlan);
		if (executorType == MULTI_EXECUTOR_ROUTER)
		{
			Task *task = NULL;
			List *taskList = workerJob->taskList;
			TupleDesc tupleDescriptor = ExecCleanTypeFromTL(
				planStatement->planTree->targetlist, false);
			List *dependendJobList PG_USED_FOR_ASSERTS_ONLY = workerJob->dependedJobList;

			/* router executor can only execute distributed plans with a single task */
			Assert(list_length(taskList) == 1);
			Assert(dependendJobList == NIL);

			task = (Task *) linitial(taskList);

			/* we need to set tupleDesc in executorStart */
			queryDesc->tupDesc = tupleDescriptor;

			/* drop into the router executor */
			RouterExecutorStart(queryDesc, eflags, task);
		}
		else
		{
			PlannedStmt *masterSelectPlan = MasterNodeSelectPlan(multiPlan);
			CreateStmt *masterCreateStmt = MasterNodeCreateStatement(multiPlan);
			List *masterCopyStmtList = MasterNodeCopyStatementList(multiPlan);
			RangeTblEntry *masterRangeTableEntry = NULL;
			StringInfo jobDirectoryName = NULL;

			/*
			 * We create a directory on the master node to keep task execution results.
			 * We also register this directory for automatic cleanup on portal delete.
			 */
			jobDirectoryName = JobDirectoryName(workerJob->jobId);
			CreateDirectory(jobDirectoryName);

			ResourceOwnerEnlargeJobDirectories(CurrentResourceOwner);
			ResourceOwnerRememberJobDirectory(CurrentResourceOwner, workerJob->jobId);

			/* pick distributed executor to use */
			if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
			{
				/* skip distributed query execution for EXPLAIN commands */
			}
			else if (executorType == MULTI_EXECUTOR_REAL_TIME)
			{
				MultiRealTimeExecute(workerJob);
			}
			else if (executorType == MULTI_EXECUTOR_TASK_TRACKER)
			{
				MultiTaskTrackerExecute(workerJob);
			}

			/* then create the result relation */
			ProcessUtility((Node *) masterCreateStmt,
						   "(temp table creation)",
						   PROCESS_UTILITY_QUERY,
						   NULL,
						   None_Receiver,
						   NULL);

			/* make the temporary table visible */
			CommandCounterIncrement();

			if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
			{
				CopyQueryResults(masterCopyStmtList);
			}

			/*
			 * Update the QueryDesc's snapshot so it sees the table. That's not
			 * particularly pretty, but we don't have much of a choice.  One might
			 * think we could unregister the snapshot, push a new active one,
			 * update it, register it, and be happy. That only works if it's only
			 * registered once though...
			 */
			queryDesc->snapshot->curcid = GetCurrentCommandId(false);

			/*
			 * Set the OID of the RTE used in the master select statement to point
			 * to the now created (and filled) temporary table. The target
			 * relation's oid is only known now.
			 */
			masterRangeTableEntry =
				(RangeTblEntry *) linitial(masterSelectPlan->rtable);
			masterRangeTableEntry->relid =
				RelnameGetRelid(masterRangeTableEntry->eref->aliasname);

			/*
			 * Replace to-be-run query with the master select query. As the
			 * planned statement is now replaced we can't call GetMultiPlan() in
			 * the later hooks, so we set a flag marking this as a distributed
			 * statement running on the master. That e.g. allows us to drop the
			 * temp table later.
			 *
			 * We copy the original statement's queryId, to allow
			 * pg_stat_statements and similar extension to associate the
			 * statement with the toplevel statement.
			 */
			masterSelectPlan->queryId = queryDesc->plannedstmt->queryId;
			queryDesc->plannedstmt = masterSelectPlan;

			eflags |= EXEC_FLAG_CITUS_MASTER_SELECT;
		}
	}

	/* if the execution is not done for router executor, drop into standard executor */
	if (queryDesc->estate == NULL ||
		!(queryDesc->estate->es_top_eflags & EXEC_FLAG_CITUS_ROUTER_EXECUTOR))
	{
		standard_ExecutorStart(queryDesc, eflags);
	}
}