/* * worker_fetch_query_results_file fetches a query results file from the remote * node. The function assumes an upstream compute task depends on this query * results file, and therefore directly fetches the file into the upstream * task's directory. */ Datum worker_fetch_query_results_file(PG_FUNCTION_ARGS) { uint64 jobId = PG_GETARG_INT64(0); uint32 queryTaskId = PG_GETARG_UINT32(1); uint32 upstreamTaskId = PG_GETARG_UINT32(2); text *nodeNameText = PG_GETARG_TEXT_P(3); uint32 nodePort = PG_GETARG_UINT32(4); char *nodeName = NULL; /* remote filename is <jobId>/<queryTaskId> */ StringInfo remoteDirectoryName = JobDirectoryName(jobId); StringInfo remoteFilename = TaskFilename(remoteDirectoryName, queryTaskId); /* local filename is <jobId>/<upstreamTaskId>/<queryTaskId> */ StringInfo taskDirectoryName = TaskDirectoryName(jobId, upstreamTaskId); StringInfo taskFilename = TaskFilename(taskDirectoryName, queryTaskId); /* * If we are the first function to fetch a file for the upstream task, the * task directory does not exist. We then lock and create the directory. */ bool taskDirectoryExists = DirectoryExists(taskDirectoryName); if (!taskDirectoryExists) { InitTaskDirectory(jobId, upstreamTaskId); } nodeName = text_to_cstring(nodeNameText); FetchRegularFile(nodeName, nodePort, remoteFilename, taskFilename); PG_RETURN_VOID(); }
/* * task_tracker_cleanup_job finds all tasks for the given job, and cleans up * files, connections, and shared hash enties associated with these tasks. */ Datum task_tracker_cleanup_job(PG_FUNCTION_ARGS) { uint64 jobId = PG_GETARG_INT64(0); HASH_SEQ_STATUS status; WorkerTask *currentTask = NULL; StringInfo jobDirectoryName = NULL; StringInfo jobSchemaName = NULL; /* * We first clean up any open connections, and remove tasks belonging to * this job from the shared hash. */ LWLockAcquire(&WorkerTasksSharedState->taskHashLock, LW_EXCLUSIVE); hash_seq_init(&status, WorkerTasksSharedState->taskHash); currentTask = (WorkerTask *) hash_seq_search(&status); while (currentTask != NULL) { if (currentTask->jobId == jobId) { CleanupTask(currentTask); } currentTask = (WorkerTask *) hash_seq_search(&status); } LWLockRelease(&WorkerTasksSharedState->taskHashLock); /* * We then delete the job directory and schema, if they exist. This cleans * up all intermediate files and tables allocated for the job. Note that the * schema drop call can block if another process is creating the schema or * writing to a table within the schema. */ jobDirectoryName = JobDirectoryName(jobId); RemoveDirectory(jobDirectoryName); LockJobResource(jobId, AccessExclusiveLock); jobSchemaName = JobSchemaName(jobId); RemoveJobSchema(jobSchemaName); UnlockJobResource(jobId, AccessExclusiveLock); PG_RETURN_VOID(); }
/* * multi_ExecutorStart is a hook called at at the beginning of any execution * of any query plan. * * If a distributed relation is the target of the query, perform some validity * checks. If a legal statement, start the distributed execution. After that * the to-be-executed query is replaced with the portion executing solely on * the master. */ void multi_ExecutorStart(QueryDesc *queryDesc, int eflags) { PlannedStmt *planStatement = queryDesc->plannedstmt; if (HasCitusToplevelNode(planStatement)) { MultiPlan *multiPlan = GetMultiPlan(planStatement); MultiExecutorType executorType = MULTI_EXECUTOR_INVALID_FIRST; Job *workerJob = multiPlan->workerJob; ExecCheckRTPerms(planStatement->rtable, true); executorType = JobExecutorType(multiPlan); if (executorType == MULTI_EXECUTOR_ROUTER) { Task *task = NULL; List *taskList = workerJob->taskList; TupleDesc tupleDescriptor = ExecCleanTypeFromTL( planStatement->planTree->targetlist, false); List *dependendJobList PG_USED_FOR_ASSERTS_ONLY = workerJob->dependedJobList; /* router executor can only execute distributed plans with a single task */ Assert(list_length(taskList) == 1); Assert(dependendJobList == NIL); task = (Task *) linitial(taskList); /* we need to set tupleDesc in executorStart */ queryDesc->tupDesc = tupleDescriptor; /* drop into the router executor */ RouterExecutorStart(queryDesc, eflags, task); } else { PlannedStmt *masterSelectPlan = MasterNodeSelectPlan(multiPlan); CreateStmt *masterCreateStmt = MasterNodeCreateStatement(multiPlan); List *masterCopyStmtList = MasterNodeCopyStatementList(multiPlan); RangeTblEntry *masterRangeTableEntry = NULL; StringInfo jobDirectoryName = NULL; /* * We create a directory on the master node to keep task execution results. * We also register this directory for automatic cleanup on portal delete. */ jobDirectoryName = JobDirectoryName(workerJob->jobId); CreateDirectory(jobDirectoryName); ResourceOwnerEnlargeJobDirectories(CurrentResourceOwner); ResourceOwnerRememberJobDirectory(CurrentResourceOwner, workerJob->jobId); /* pick distributed executor to use */ if (eflags & EXEC_FLAG_EXPLAIN_ONLY) { /* skip distributed query execution for EXPLAIN commands */ } else if (executorType == MULTI_EXECUTOR_REAL_TIME) { MultiRealTimeExecute(workerJob); } else if (executorType == MULTI_EXECUTOR_TASK_TRACKER) { MultiTaskTrackerExecute(workerJob); } /* then create the result relation */ ProcessUtility((Node *) masterCreateStmt, "(temp table creation)", PROCESS_UTILITY_QUERY, NULL, None_Receiver, NULL); /* make the temporary table visible */ CommandCounterIncrement(); if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) { CopyQueryResults(masterCopyStmtList); } /* * Update the QueryDesc's snapshot so it sees the table. That's not * particularly pretty, but we don't have much of a choice. One might * think we could unregister the snapshot, push a new active one, * update it, register it, and be happy. That only works if it's only * registered once though... */ queryDesc->snapshot->curcid = GetCurrentCommandId(false); /* * Set the OID of the RTE used in the master select statement to point * to the now created (and filled) temporary table. The target * relation's oid is only known now. */ masterRangeTableEntry = (RangeTblEntry *) linitial(masterSelectPlan->rtable); masterRangeTableEntry->relid = RelnameGetRelid(masterRangeTableEntry->eref->aliasname); /* * Replace to-be-run query with the master select query. As the * planned statement is now replaced we can't call GetMultiPlan() in * the later hooks, so we set a flag marking this as a distributed * statement running on the master. That e.g. allows us to drop the * temp table later. * * We copy the original statement's queryId, to allow * pg_stat_statements and similar extension to associate the * statement with the toplevel statement. */ masterSelectPlan->queryId = queryDesc->plannedstmt->queryId; queryDesc->plannedstmt = masterSelectPlan; eflags |= EXEC_FLAG_CITUS_MASTER_SELECT; } } /* if the execution is not done for router executor, drop into standard executor */ if (queryDesc->estate == NULL || !(queryDesc->estate->es_top_eflags & EXEC_FLAG_CITUS_ROUTER_EXECUTOR)) { standard_ExecutorStart(queryDesc, eflags); } }