bool QueryManager::fetchNormalWorkOrders(const dag_node_index index) { bool generated_new_workorders = false; if (!query_exec_state_->hasDoneGenerationWorkOrders(index)) { // Do not fetch any work units until all blocking dependencies are met. // The releational operator is not aware of blocking dependencies for // uncorrelated scalar queries. if (!checkAllBlockingDependenciesMet(index)) { return false; } const size_t num_pending_workorders_before = workorders_container_->getNumNormalWorkOrders(index); const bool done_generation = query_dag_->getNodePayloadMutable(index)->getAllWorkOrders(workorders_container_.get(), query_context_.get(), storage_manager_, foreman_client_id_, bus_); if (done_generation) { query_exec_state_->setDoneGenerationWorkOrders(index); } // TODO(shoban): It would be a good check to see if operator is making // useful progress, i.e., the operator either generates work orders to // execute or still has pending work orders executing. However, this will not // work if Foreman polls operators without feeding data. This check can be // enabled, if Foreman is refactored to call getAllWorkOrders() only when // pending work orders are completed or new input blocks feed. generated_new_workorders = (num_pending_workorders_before < workorders_container_->getNumNormalWorkOrders(index)); } return generated_new_workorders; }
void QueryManagerBase::markOperatorFinished(const dag_node_index index) { query_exec_state_->setExecutionFinished(index); for (const dag_node_index dependent_op_index : blocking_dependents_[index]) { blocking_dependencies_[dependent_op_index].erase(index); } for (const dag_node_index dependent_op_index : output_consumers_[index]) { non_blocking_dependencies_[dependent_op_index].erase(index); } RelationalOperator *op = query_dag_->getNodePayloadMutable(index); op->updateCatalogOnCompletion(); const relation_id output_rel = op->getOutputRelationID(); for (const pair<dag_node_index, bool> &dependent_link : query_dag_->getDependents(index)) { const dag_node_index dependent_op_index = dependent_link.first; if (output_rel >= 0) { // Signal dependent operator that current operator is done feeding input blocks. query_dag_->getNodePayloadMutable(dependent_op_index)->doneFeedingInputBlocks(output_rel); } if (checkAllBlockingDependenciesMet(dependent_op_index)) { // Process the dependent operator (of the operator whose WorkOrder // was just executed) for which all the dependencies have been met. if (!fetchNormalWorkOrders(dependent_op_index) && non_blocking_dependencies_[dependent_op_index].empty() && checkNormalExecutionOver(dependent_op_index) && (!checkRebuildRequired(dependent_op_index) || initiateRebuild(dependent_op_index))) { markOperatorFinished(dependent_op_index); } } } }
void Foreman::markOperatorFinished(dag_node_index index) { execution_finished_[index] = true; ++num_operators_finished_; const relation_id output_rel = query_dag_->getNodePayload(index).getOutputRelationID(); for (pair<dag_node_index, bool> dependent_link : query_dag_->getDependents(index)) { RelationalOperator *dependent_op = query_dag_->getNodePayloadMutable(dependent_link.first); // Signal dependent operator that current operator is done feeding input blocks. if (output_rel >= 0) { dependent_op->doneFeedingInputBlocks(output_rel); } if (checkAllBlockingDependenciesMet(dependent_link.first)) { dependent_op->informAllBlockingDependenciesMet(); } } }
void QueryManager::processRebuildWorkOrderCompleteMessage(const dag_node_index op_index) { query_exec_state_->decrementNumRebuildWorkOrders(op_index); if (checkRebuildOver(op_index)) { markOperatorFinished(op_index); for (const pair<dag_node_index, bool> &dependent_link : query_dag_->getDependents(op_index)) { const dag_node_index dependent_op_index = dependent_link.first; if (checkAllBlockingDependenciesMet(dependent_op_index)) { processOperator(dependent_op_index, true); } } } }
void QueryManagerBase::processDataPipelineMessage(const dag_node_index op_index, const block_id block, const relation_id rel_id, const partition_id part_id) { for (const dag_node_index consumer_index : output_consumers_[op_index]) { // Feed the streamed block to the consumer. Note that 'output_consumers_' // only contain those dependents of operator with index = op_index which are // eligible to receive streamed input. query_dag_->getNodePayloadMutable(consumer_index)->feedInputBlock(block, rel_id, part_id); // Because of the streamed input just fed, check if there are any new // WorkOrders available and if so, fetch them. if (checkAllBlockingDependenciesMet(consumer_index)) { fetchNormalWorkOrders(consumer_index); } } }
void QueryManager::markOperatorFinished(const dag_node_index index) { query_exec_state_->setExecutionFinished(index); RelationalOperator *op = query_dag_->getNodePayloadMutable(index); op->updateCatalogOnCompletion(); const relation_id output_rel = op->getOutputRelationID(); for (const pair<dag_node_index, bool> &dependent_link : query_dag_->getDependents(index)) { const dag_node_index dependent_op_index = dependent_link.first; RelationalOperator *dependent_op = query_dag_->getNodePayloadMutable(dependent_op_index); // Signal dependent operator that current operator is done feeding input blocks. if (output_rel >= 0) { dependent_op->doneFeedingInputBlocks(output_rel); } if (checkAllBlockingDependenciesMet(dependent_op_index)) { dependent_op->informAllBlockingDependenciesMet(); } } }
void Foreman::processOperator(RelationalOperator *op, dag_node_index index, bool recursively_check_dependents) { if (fetchNormalWorkOrders(op, index)) { // Fetched work orders. Return to wait for the generated work orders to // execute, and skip the execution-finished checks. return; } if (checkNormalExecutionOver(index)) { if (checkRebuildRequired(index)) { if (!checkRebuildInitiated(index)) { // Rebuild hasn't started, initiate it. if (initiateRebuild(index)) { // Rebuild initiated and completed right away. markOperatorFinished(index); } else { // Rebuild WorkOrders have been generated. return; } } else if (checkRebuildOver(index)) { // Rebuild had been initiated and it is over. markOperatorFinished(index); } } else { // Rebuild is not required and normal execution over, mark finished. markOperatorFinished(index); } // If we reach here, that means the operator has been marked as finished. if (recursively_check_dependents) { for (pair<dag_node_index, bool> dependent_link : query_dag_->getDependents(index)) { if (checkAllBlockingDependenciesMet(dependent_link.first)) { processOperator( query_dag_->getNodePayloadMutable(dependent_link.first), dependent_link.first, true); } } } } }
void Foreman::initialize() { if (cpu_id_ >= 0) { // We can pin the foreman thread to a CPU if specified. ThreadUtil::BindToCPU(cpu_id_); } DEBUG_ASSERT(query_dag_ != nullptr); initializeState(); // Collect all the workorders from all the relational operators in the DAG. const dag_node_index dag_size = query_dag_->size(); for (dag_node_index index = 0; index < dag_size; ++index) { if (checkAllBlockingDependenciesMet(index)) { query_dag_->getNodePayloadMutable(index)->informAllBlockingDependenciesMet(); processOperator(index, false); } } // Dispatch the WorkOrders generated so far. dispatchWorkerMessages(0, 0); }
void QueryManager::processWorkOrderCompleteMessage( const dag_node_index op_index) { query_exec_state_->decrementNumQueuedWorkOrders(op_index); // Check if new work orders are available and fetch them if so. fetchNormalWorkOrders(op_index); if (checkRebuildRequired(op_index)) { if (checkNormalExecutionOver(op_index)) { if (!checkRebuildInitiated(op_index)) { if (initiateRebuild(op_index)) { // Rebuild initiated and completed right away. markOperatorFinished(op_index); } else { // Rebuild under progress. } } else if (checkRebuildOver(op_index)) { // Rebuild was under progress and now it is over. markOperatorFinished(op_index); } } else { // Normal execution under progress for this operator. } } else if (checkOperatorExecutionOver(op_index)) { // Rebuild not required for this operator and its normal execution is // complete. markOperatorFinished(op_index); } for (const pair<dag_node_index, bool> &dependent_link : query_dag_->getDependents(op_index)) { const dag_node_index dependent_op_index = dependent_link.first; if (checkAllBlockingDependenciesMet(dependent_op_index)) { // Process the dependent operator (of the operator whose WorkOrder // was just executed) for which all the dependencies have been met. processOperator(dependent_op_index, true); } } }
// TODO(harshad) - There is duplication in terms of functionality provided by // TMB and ForemanMessage class with respect to determining the message types. // Try to use TMB message types for infering the messsage types consistently. bool Foreman::processMessage(const ForemanMessage &message) { const dag_node_index dag_size = query_dag_->size(); // Get the relational operator that caused this message to be sent. dag_node_index response_op_index = message.getRelationalOpIndex(); const int worker_id = message.getWorkerID(); switch (message.getType()) { case ForemanMessage::kWorkOrderCompletion: { // Completion of a regular WorkOrder. DEBUG_ASSERT(worker_id >= 0); --queued_workorders_per_op_[response_op_index]; // As the given worker finished executing a WorkOrder, decrement its // number of queued WorkOrders. workers_->decrementNumQueuedWorkOrders(worker_id); // Check if new work orders are available and fetch them if so. fetchNormalWorkOrders( query_dag_->getNodePayloadMutable(response_op_index), response_op_index); if (checkRebuildRequired(response_op_index)) { if (checkNormalExecutionOver(response_op_index)) { if (!checkRebuildInitiated(response_op_index)) { if (initiateRebuild(response_op_index)) { // Rebuild initiated and completed right away. markOperatorFinished(response_op_index); } else { // Rebuild under progress. } } else if (checkRebuildOver(response_op_index)) { // Rebuild was under progress and now it is over. markOperatorFinished(response_op_index); } } else { // Normal execution under progress for this operator. } } else if (checkOperatorExecutionOver(response_op_index)) { // Rebuild not required for this operator and its normal execution is // complete. markOperatorFinished(response_op_index); } for (pair<dag_node_index, bool> dependent_link : query_dag_->getDependents(response_op_index)) { RelationalOperator *dependent_op = query_dag_->getNodePayloadMutable(dependent_link.first); if (checkAllBlockingDependenciesMet(dependent_link.first)) { // Process the dependent operator (of the operator whose WorkOrder // was just executed) for which all the dependencies have been met. processOperator(dependent_op, dependent_link.first, true); } } } break; case ForemanMessage::kRebuildCompletion: { DEBUG_ASSERT(worker_id >= 0); // Completion of a rebuild WorkOrder. --rebuild_status_[response_op_index].second; workers_->decrementNumQueuedWorkOrders(worker_id); if (checkRebuildOver(response_op_index)) { markOperatorFinished(response_op_index); for (pair<dag_node_index, bool> dependent_link : query_dag_->getDependents(response_op_index)) { RelationalOperator *dependent_op = query_dag_->getNodePayloadMutable(dependent_link.first); if (checkAllBlockingDependenciesMet(dependent_link.first)) { processOperator(dependent_op, dependent_link.first, true); } } } } break; case ForemanMessage::kDataPipeline: { // Data streaming message. Possible senders of this message include // InsertDestination and some operators which modify existing blocks. for (dag_node_index consumer_index : output_consumers_[response_op_index]) { RelationalOperator *consumer_op = query_dag_->getNodePayloadMutable(consumer_index); // Feed the streamed block to the consumer. Note that // output_consumers_ only contain those dependents of operator with // index = response_op_index which are eligible to receive streamed // input. consumer_op->feedInputBlock(message.getOutputBlockID(), message.getRelationID()); // Because of the streamed input just fed, check if there are any new // WorkOrders available and if so, fetch them. fetchNormalWorkOrders(consumer_op, consumer_index); } // end for (feeding input to dependents) } break; case ForemanMessage::kWorkOrdersAvailable: { // Check if new work orders are available. fetchNormalWorkOrders( query_dag_->getNodePayloadMutable(response_op_index), response_op_index); break; } default: FATAL_ERROR("Unknown ForemanMessage type"); } // Dispatch the WorkerMessages to the workers. We prefer to start the search // for the schedulable WorkOrders beginning from response_op_index. The first // candidate worker to receive the next WorkOrder is the one that sent the // response message to Foreman. dispatchWorkerMessages(((worker_id >= 0) ? worker_id : 0), response_op_index); return num_operators_finished_ == dag_size; }
QueryManager::QueryManager(const tmb::client_id foreman_client_id, const std::size_t num_numa_nodes, QueryHandle *query_handle, CatalogDatabaseLite *catalog_database, StorageManager *storage_manager, tmb::MessageBus *bus) : foreman_client_id_(foreman_client_id), query_id_(DCHECK_NOTNULL(query_handle)->query_id()), catalog_database_(DCHECK_NOTNULL(catalog_database)), storage_manager_(DCHECK_NOTNULL(storage_manager)), bus_(DCHECK_NOTNULL(bus)) { DCHECK(query_handle->getQueryPlanMutable() != nullptr); query_dag_ = query_handle->getQueryPlanMutable()->getQueryPlanDAGMutable(); DCHECK(query_dag_ != nullptr); const dag_node_index num_operators_in_dag = query_dag_->size(); output_consumers_.resize(num_operators_in_dag); blocking_dependencies_.resize(num_operators_in_dag); query_exec_state_.reset(new QueryExecutionState(num_operators_in_dag)); workorders_container_.reset( new WorkOrdersContainer(num_operators_in_dag, num_numa_nodes)); query_context_.reset(new QueryContext(query_handle->getQueryContextProto(), *catalog_database_, storage_manager_, foreman_client_id_, bus_)); for (dag_node_index node_index = 0; node_index < num_operators_in_dag; ++node_index) { const QueryContext::insert_destination_id insert_destination_index = query_dag_->getNodePayload(node_index).getInsertDestinationID(); if (insert_destination_index != QueryContext::kInvalidInsertDestinationId) { // Rebuild is necessary whenever InsertDestination is present. query_exec_state_->setRebuildRequired(node_index); query_exec_state_->setRebuildStatus(node_index, 0, false); } for (const pair<dag_node_index, bool> &dependent_link : query_dag_->getDependents(node_index)) { const dag_node_index dependent_op_index = dependent_link.first; if (!query_dag_->getLinkMetadata(node_index, dependent_op_index)) { // The link is not a pipeline-breaker. Streaming of blocks is possible // between these two operators. output_consumers_[node_index].push_back(dependent_op_index); } else { // The link is a pipeline-breaker. Streaming of blocks is not possible // between these two operators. blocking_dependencies_[dependent_op_index].push_back(node_index); } } } // Collect all the workorders from all the relational operators in the DAG. for (dag_node_index index = 0; index < num_operators_in_dag; ++index) { if (checkAllBlockingDependenciesMet(index)) { query_dag_->getNodePayloadMutable(index)->informAllBlockingDependenciesMet(); processOperator(index, false); } } }