void LogicalFilter::Print(int level) const { cout << setw(level * kTabSize) << " " << "Filter:" << endl; GetPlanContext(); cout << setw(level * kTabSize) << " " << "[Partition info: " << plan_context_->plan_partitioner_.get_partition_key().attrName << " table_id= " << plan_context_->plan_partitioner_.get_partition_key().table_id_ << " column_id= " << plan_context_->plan_partitioner_.get_partition_key().index << " ]" << endl; #ifdef NEWCONDI for (int i = 0; i < condi_.size(); ++i) { printf(" %s\n", condi_[i]->alias.c_str()); } #else ++level; for (int i = 0; i < condition_.size(); ++i) { cout << setw(level * kTabSize) << " " << condition_[i]->alias_ << endl; } --level; #endif child_->Print(level); }
void LogicalSort::PrintOrderByAttr(int level) const { cout << setw(level * kTabSize) << " " << "OrderByAttr:" << endl; #ifndef NEWCONDI for (int i = 0; i < order_by_attr_.size(); i++) { printf("%s %s\n", (const char *)order_by_attr_[i]->table_name_, order_by_attr_[i]->direction_ == 0 ? "ASC" : "DESC"); } #else GetPlanContext(); cout << setw(level * kTabSize) << " " << "[Partition info: " << plan_context_->plan_partitioner_.get_partition_key().attrName << " table_id= " << plan_context_->plan_partitioner_.get_partition_key().table_id_ << " column_id= " << plan_context_->plan_partitioner_.get_partition_key().index << " ]" << endl; level++; for (int i = 0; i < order_by_attrs_.size(); ++i) { cout << setw(level * kTabSize) << " " << order_by_attrs_[i].first->alias_ << " " << ((order_by_attrs_[i].second == 0) ? "ASC" : "DESC") << endl; } #endif }
PhysicalOperatorBase* LogicalLimit::GetPhysicalPlan( const unsigned& blocksize, PhysicalOperatorBase* child_iterator) { PlanContext plan_context = GetPlanContext(); PhysicalLimit::State state(GetSchema(plan_context.attribute_list_), child_iterator, returned_tuples_, blocksize, start_position_); PhysicalOperatorBase* limit = new PhysicalLimit(state); return limit; }
PhysicalOperatorBase* LogicalFilter::GetPhysicalPlan( const unsigned& blocksize) { PlanContext plan_context = GetPlanContext(); PhysicalOperatorBase* child_iterator = child_->GetPhysicalPlan(blocksize); PhysicalFilter::State state; // Initial a state. state.block_size_ = blocksize; state.child_ = child_iterator; state.qual_ = condi_; state.column_id_ = column_id_; state.schema_ = GetSchema(plan_context.attribute_list_); PhysicalOperatorBase* filter = new PhysicalFilter(state); return filter; }
void LogicalQueryPlanRoot::Print(int level) const { cout << setw(level * kTabSize) << " " << "Root" << endl; GetPlanContext(); cout << setw(level * kTabSize) << " " << "[Partition info: " << plan_context_->plan_partitioner_.get_partition_key().attrName << " table_id= " << plan_context_->plan_partitioner_.get_partition_key().table_id_ << " column_id= " << plan_context_->plan_partitioner_.get_partition_key().index << " ]" << endl; child_->Print(level); }
PhysicalOperatorBase *LogicalSort::GetPhysicalPlan(const unsigned &blocksize) { PlanContext dataflow = GetPlanContext(); // Get all of the data from other nodes if needed. Expander::State expander_state; expander_state.block_count_in_buffer_ = EXPANDER_BUFFER_SIZE; expander_state.block_size_ = blocksize; expander_state.init_thread_count_ = Config::initial_degree_of_parallelism; expander_state.child_ = child_->GetPhysicalPlan(blocksize); expander_state.schema_ = GetSchema(child_plan_context_.attribute_list_); PhysicalOperatorBase *expander_lower = new Expander(expander_state); ExchangeMerger::State exchange_state; exchange_state.block_size_ = blocksize; exchange_state.child_ = expander_lower; exchange_state.exchange_id_ = IDsGenerator::getInstance()->generateUniqueExchangeID(); exchange_state.schema_ = GetSchema(child_plan_context_.attribute_list_); vector<NodeID> lower_id_list = GetInvolvedNodeID(child_plan_context_.plan_partitioner_); exchange_state.lower_id_list_ = lower_id_list; // upper exchange_state.partition_schema_ = partition_schema::set_hash_partition(0); // TODO(admin): compute the upper_ip_list to do reduce side sort vector<NodeID> upper_ip_list; upper_ip_list.push_back(0); exchange_state.upper_id_list_ = upper_ip_list; // lower PhysicalOperatorBase *exchange = new ExchangeMerger(exchange_state); PhysicalSort::State reducer_state; reducer_state.block_size_ = blocksize; reducer_state.child_ = exchange; // Actually we just need the column number in the end. for (unsigned i = 0; i < order_by_attr_.size(); i++) { reducer_state.order_by_key_.push_back( GetOrderByKey(order_by_attr_[i]->table_name_)); reducer_state.direction_.push_back(order_by_attr_[i]->direction_); } reducer_state.input_ = GetSchema(child_plan_context_.attribute_list_); PhysicalOperatorBase *reducer_sort = new PhysicalSort(reducer_state); return reducer_sort; }
void LogicalAggregation::Print(int level) const { cout << setw(level * kTabSize) << " " << "Aggregation: "; ++level; switch (aggregation_style_) { case kLocalAgg: { cout << "kLocalAgg" << endl; break; } case kReparGlobalAgg: { cout << "kReparGlobalAgg" << endl; break; } case kLocalAggReparGlobalAgg: { cout << "kLocalAggReparGlobalAgg!" << endl; break; } default: { cout << "aggregation style is not given!" << endl; } } GetPlanContext(); cout << setw(level * kTabSize) << " " << "[Partition info: " << plan_context_->plan_partitioner_.get_partition_key().attrName << " table_id= " << plan_context_->plan_partitioner_.get_partition_key().table_id_ << " column_id= " << plan_context_->plan_partitioner_.get_partition_key().index << " ]" << endl; cout << setw((level - 1) * kTabSize) << " " << "## group by attributes:" << endl; for (int i = 0; i < group_by_attrs_.size(); ++i) { cout << " " << group_by_attrs_[i]->alias_ << endl; } cout << setw((level - 1) * kTabSize) << " " << "## aggregation attributes:" << endl; for (int i = 0; i < aggregation_attrs_.size(); ++i) { cout << setw(level * kTabSize) << " " << aggregation_attrs_[i]->alias_ << endl; } --level; child_->Print(level); }
void LogicalSubquery::Print(int level) const { cout << setw(level * kTabSize) << " " << "Subquery: " << subquery_alias_ << endl; GetPlanContext(); cout << setw(level * kTabSize) << " " << "[Partition info: " << plan_context_->plan_partitioner_.get_partition_key().attrName << " table_id= " << plan_context_->plan_partitioner_.get_partition_key().table_id_ << " column_id= " << plan_context_->plan_partitioner_.get_partition_key().index << " ]" << endl; ++level; for (int i = 0; i < subquery_attrs_.size(); ++i) { cout << setw(level * kTabSize) << " " << subquery_attrs_[i].attrName << endl; } --level; cout << "--------------------------" << endl; child_->Print(level + 1); }
void LogicalLimit::Print(int level) const { if (!CanBeOmitted()) { cout << setw(level * kTabSize) << " " << "LIMIT:" << endl; GetPlanContext(); cout << setw(level * kTabSize) << " " << "[Partition info: " << plan_context_->plan_partitioner_.get_partition_key().attrName << " table_id= " << plan_context_->plan_partitioner_.get_partition_key().table_id_ << " column_id= " << plan_context_->plan_partitioner_.get_partition_key().index << " ]" << endl; ++level; cout << setw(level * kTabSize) << " " << "offset: " << start_position_ << " tuples: " << returned_tuples_ << endl; --level; } child_->Print(level); }
/** * get PlanContext and child physical plan from child , * consider PlanContext's partitioner's location and collector, * decide whether add expander and exchange operator in physical plan. * * choose one of three top physical operators depend on fashion_, * return complete physical plan */ PhysicalOperatorBase* LogicalQueryPlanRoot::GetPhysicalPlan( const unsigned& block_size) { PlanContext child_plan_context = GetPlanContext(); /////////// LogicalLimit* limit = NULL; PhysicalOperatorBase* child_iterator = NULL; if (child_->get_operator_type() == OperatorType::kLogicalLimit) { limit = reinterpret_cast<LogicalLimit*>(child_); child_iterator = limit->child_->GetPhysicalPlan(block_size); } else { child_iterator = child_->GetPhysicalPlan(block_size); } ///////////// NodeTracker* node_tracker = NodeTracker::GetInstance(); bool is_exchange_need = false; /** * If the number of partitions in the child PlanContext is 1 and the the * location is right in the collector, * then exchange is not necessary. */ if (!(1 == child_plan_context.plan_partitioner_.GetNumberOfPartitions() && child_plan_context.plan_partitioner_.get_partition_list()[0] .get_location() == collecter_node)) { is_exchange_need = true; // add BlockStreamExpander iterator into physical plan Expander::State expander_state_lower; expander_state_lower.block_count_in_buffer_ = 10; expander_state_lower.block_size_ = block_size; expander_state_lower.init_thread_count_ = Config::initial_degree_of_parallelism; expander_state_lower.child_ = child_iterator; expander_state_lower.schema_ = GetSchema(child_plan_context.attribute_list_); PhysicalOperatorBase* expander_lower = new Expander(expander_state_lower); // add ExchangeEpoll iterator into physical plan ExchangeMerger::State state; state.block_size_ = block_size; state.child_ = expander_lower; // child_iterator; state.exchange_id_ = IDsGenerator::getInstance()->generateUniqueExchangeID(); state.schema_ = GetSchema(child_plan_context.attribute_list_); state.upper_id_list_.push_back(collecter_node); state.partition_schema_ = partition_schema::set_hash_partition(0); std::vector<NodeID> lower_id_list = GetInvolvedNodeID(child_plan_context.plan_partitioner_); state.lower_id_list_ = lower_id_list; child_iterator = new ExchangeMerger(state); } Expander::State expander_state; expander_state.block_count_in_buffer_ = 10; expander_state.block_size_ = block_size; if (is_exchange_need) // if data exchange is used, only one expanded thread is enough. expander_state.init_thread_count_ = 1; else expander_state.init_thread_count_ = Config::initial_degree_of_parallelism; expander_state.child_ = child_iterator; expander_state.schema_ = GetSchema(child_plan_context.attribute_list_); PhysicalOperatorBase* expander = new Expander(expander_state); if (child_->get_operator_type() == OperatorType::kLogicalLimit) { expander = limit->GetPhysicalPlan(block_size, expander); } PhysicalOperatorBase* ret; switch (style_) { case kPrint: { ResultPrinter::State print_state( GetSchema(child_plan_context.attribute_list_), expander, block_size, GetAttributeName(child_plan_context)); ret = new ResultPrinter(print_state); break; } case kPerformance: { PerformanceMonitor::State performance_state( GetSchema(child_plan_context.attribute_list_), expander, block_size); ret = new PerformanceMonitor(performance_state); break; } case kResultCollector: { std::vector<std::string> column_header; GetColumnHeader(column_header, child_plan_context.attribute_list_); physical_operator::ResultCollector::State result_state( GetSchema(child_plan_context.attribute_list_), expander, block_size, column_header); ret = new physical_operator::ResultCollector(result_state); break; } } return ret; }
bool LogicalFilter::GetOptimalPhysicalPlan( Requirement requirement, PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned& block_size) { PhysicalPlanDescriptor physical_plan; std::vector<PhysicalPlanDescriptor> candidate_physical_plans; /* no requirement to the child*/ if (child_->GetOptimalPhysicalPlan(Requirement(), physical_plan)) { NetworkTransfer transfer = requirement.requireNetworkTransfer(physical_plan.plan_context_); if (NONE == transfer) { PhysicalFilter::State state; state.block_size_ = block_size; state.child_ = physical_plan.plan; state.qual_ = condi_; state.column_id_ = column_id_; PlanContext plan_context = GetPlanContext(); state.schema_ = GetSchema(plan_context.attribute_list_); PhysicalOperatorBase* filter = new PhysicalFilter(state); physical_plan.plan = filter; candidate_physical_plans.push_back(physical_plan); } else if ((OneToOne == transfer) || (Shuffle == transfer)) { /** * The input plan context should be transfered in the network to meet the * requirement. * TODO(wangli): Implement OneToOne Exchange * */ PhysicalFilter::State state_f; state_f.block_size_ = block_size; state_f.child_ = physical_plan.plan; state_f.qual_ = condi_; state_f.column_id_ = column_id_; PlanContext plan_context = GetPlanContext(); state_f.schema_ = GetSchema(plan_context.attribute_list_); PhysicalOperatorBase* filter = new PhysicalFilter(state_f); physical_plan.plan = filter; physical_plan.cost += physical_plan.plan_context_.GetAggregatedDatasize(); ExchangeMerger::State state; state.block_size_ = block_size; state.child_ = physical_plan.plan; // child_iterator; state.exchange_id_ = IDsGenerator::getInstance()->generateUniqueExchangeID(); state.schema_ = GetSchema(physical_plan.plan_context_.attribute_list_); std::vector<NodeID> upper_id_list; if (requirement.hasRequiredLocations()) { upper_id_list = requirement.getRequiredLocations(); } else { if (requirement.hasRequiredPartitionFunction()) { // Partition function contains the number of partitions. PartitionFunction* partitoin_function = requirement.getPartitionFunction(); upper_id_list = std::vector<NodeID>( NodeTracker::GetInstance()->GetNodeIDList().begin(), NodeTracker::GetInstance()->GetNodeIDList().begin() + partitoin_function->getNumberOfPartitions() - 1); } else { // TODO(wangli): decide the degree of parallelism upper_id_list = NodeTracker::GetInstance()->GetNodeIDList(); } } state.upper_id_list_ = upper_id_list; assert(requirement.hasReuiredPartitionKey()); state.partition_schema_ = partition_schema::set_hash_partition(this->GetIdInAttributeList( physical_plan.plan_context_.attribute_list_, requirement.getPartitionKey())); assert(state.partition_schema_.partition_key_index >= 0); std::vector<NodeID> lower_id_list = GetInvolvedNodeID(physical_plan.plan_context_.plan_partitioner_); state.lower_id_list_ = lower_id_list; PhysicalOperatorBase* exchange = new ExchangeMerger(state); physical_plan.plan = exchange; } candidate_physical_plans.push_back(physical_plan); } if (child_->GetOptimalPhysicalPlan(requirement, physical_plan)) { PhysicalFilter::State state; state.block_size_ = block_size; state.child_ = physical_plan.plan; state.column_id_ = column_id_; PlanContext plan_context = GetPlanContext(); state.schema_ = GetSchema(plan_context.attribute_list_); PhysicalOperatorBase* filter = new PhysicalFilter(state); physical_plan.plan = filter; candidate_physical_plans.push_back(physical_plan); } physical_plan_descriptor = GetBestPhysicalPlanDescriptor(candidate_physical_plans); if (requirement.passLimits(physical_plan_descriptor.cost)) return true; else return false; }
/** * Note: if group_by_attribute_list_ is empty, the partition key is * ATTRIBUTE_NULL */ PhysicalOperatorBase* LogicalAggregation::GetPhysicalPlan( const unsigned& block_size) { if (NULL == plan_context_) { GetPlanContext(); } PhysicalOperatorBase* ret; const PlanContext child_plan_context = child_->GetPlanContext(); PhysicalAggregation::State local_agg_state; local_agg_state.group_by_attrs_ = group_by_attrs_; local_agg_state.aggregation_attrs_ = aggregation_attrs_; local_agg_state.block_size_ = block_size; local_agg_state.num_of_buckets_ = EstimateGroupByCardinality(child_plan_context); local_agg_state.bucket_size_ = 64; local_agg_state.input_schema_ = GetSchema(child_plan_context.attribute_list_); local_agg_state.output_schema_ = GetSchema(plan_context_->attribute_list_); local_agg_state.child_ = child_->GetPhysicalPlan(block_size); local_agg_state.avg_index_ = avg_id_in_agg_; local_agg_state.count_column_id_ = count_column_id_; local_agg_state.hash_schema_ = local_agg_state.output_schema_->duplicateSchema(); switch (aggregation_style_) { case kLocalAgg: { local_agg_state.agg_node_type_ = PhysicalAggregation::State::kNotHybridAgg; ret = new PhysicalAggregation(local_agg_state); break; } case kLocalAggReparGlobalAgg: { local_agg_state.agg_node_type_ = PhysicalAggregation::State::kHybridAggLocal; PhysicalAggregation* local_aggregation = new PhysicalAggregation(local_agg_state); Expander::State expander_state; expander_state.block_count_in_buffer_ = EXPANDER_BUFFER_SIZE; expander_state.block_size_ = block_size; expander_state.init_thread_count_ = Config::initial_degree_of_parallelism; expander_state.child_ = local_aggregation; expander_state.schema_ = local_agg_state.hash_schema_->duplicateSchema(); PhysicalOperatorBase* expander_lower = new Expander(expander_state); ExchangeMerger::State exchange_state; exchange_state.block_size_ = block_size; exchange_state.child_ = expander_lower; exchange_state.exchange_id_ = IDsGenerator::getInstance()->generateUniqueExchangeID(); exchange_state.lower_id_list_ = GetInvolvedNodeID(child_->GetPlanContext().plan_partitioner_); exchange_state.upper_id_list_ = GetInvolvedNodeID(plan_context_->plan_partitioner_); exchange_state.partition_schema_ = partition_schema::set_hash_partition(0); exchange_state.schema_ = local_agg_state.hash_schema_->duplicateSchema(); PhysicalOperatorBase* exchange = new ExchangeMerger(exchange_state); PhysicalAggregation::State global_agg_state; global_agg_state.agg_node_type_ = PhysicalAggregation::State::kHybridAggGlobal; global_agg_state.input_schema_ = GetSchema(plan_context_->attribute_list_); global_agg_state.output_schema_ = GetSchema(plan_context_->attribute_list_); global_agg_state.hash_schema_ = global_agg_state.output_schema_->duplicateSchema(); // change each aggregation expression and group by expression to one // attribute SetGroupbyAndAggAttrsForGlobalAgg(global_agg_state.group_by_attrs_, global_agg_state.aggregation_attrs_, global_agg_state.input_schema_); global_agg_state.block_size_ = block_size; global_agg_state.bucket_size_ = 64; global_agg_state.child_ = exchange; global_agg_state.num_of_buckets_ = local_agg_state.num_of_buckets_; global_agg_state.avg_index_ = avg_id_in_agg_; global_agg_state.count_column_id_ = count_column_id_; PhysicalOperatorBase* global_aggregation = new PhysicalAggregation(global_agg_state); ret = global_aggregation; break; } case kReparGlobalAgg: { assert(false); } } return ret; }