PlanContext LogicalSort::GetPlanContext() { lock_->acquire(); if (NULL != plan_context_) { lock_->release(); return *plan_context_; } // Get the information from its child PlanContext child_plan_context_ = child_->GetPlanContext(); PlanContext ret; ret.attribute_list_ = child_plan_context_.attribute_list_; ret.commu_cost_ = child_plan_context_.commu_cost_; ret.plan_partitioner_.set_partition_func( child_plan_context_.plan_partitioner_.get_partition_func()); ret.plan_partitioner_.set_partition_key(Attribute()); NodeID location = 0; unsigned long data_cardinality = 0; PartitionOffset offset = 0; PlanPartitionInfo par(offset, data_cardinality, location); vector<PlanPartitionInfo> partition_list; partition_list.push_back(par); ret.plan_partitioner_.set_partition_list(partition_list); SetColumnId(child_plan_context_); LogicInitCnxt licnxt; licnxt.schema0_ = GetSchema(child_plan_context_.attribute_list_); GetColumnToId(child_plan_context_.attribute_list_, licnxt.column_id0_); for (int i = 0; i < order_by_attrs_.size(); ++i) { licnxt.return_type_ = order_by_attrs_[i].first->actual_type_; order_by_attrs_[i].first->InitExprAtLogicalPlan(licnxt); } plan_context_ = new PlanContext(); *plan_context_ = ret; lock_->release(); return ret; }
PlanContext LogicalFilter::GetPlanContext() { /** In the currently implementation, we assume that the boolean operator * between each AttributeComparator is "AND". */ lock_->acquire(); if (NULL != plan_context_) { lock_->release(); return *plan_context_; } PlanContext plan_context = child_->GetPlanContext(); if (plan_context.IsHashPartitioned()) { for (unsigned i = 0; i < plan_context.plan_partitioner_.GetNumberOfPartitions(); ++i) { if (CanBeHashPruned(i, plan_context.plan_partitioner_)) { // Is filtered. plan_context.plan_partitioner_.GetPartition(i)->set_filtered(); } else { // Call predictSelectivilty() to alter cardinality. /** * Should predict the volume of data that passes the filter. * TODO(wangli): A precious prediction is needed based on the statistic * of the input data, which may be maintained in the * catalog module. */ const unsigned before_filter_cardinality = plan_context.plan_partitioner_.GetPartition(i)->get_cardinality(); const unsigned after_filter_cardinality = before_filter_cardinality * PredictSelectivity(); plan_context.plan_partitioner_.GetPartition(i) ->set_cardinality(after_filter_cardinality); } } } // std::map<std::string, int> column_to_id; // GetColumnToId(plan_context.attribute_list_, column_to_id); // Schema* input_schema = GetSchema(plan_context.attribute_list_); #ifdef NEWCONDI for (int i = 0; i < condi_.size(); ++i) { // Initialize expression of logical execution plan. InitExprAtLogicalPlan(condi_[i], t_boolean, column_to_id, input_schema); } #else LogicInitCnxt licnxt; GetColumnToId(plan_context.attribute_list_, licnxt.column_id0_); licnxt.schema0_ = plan_context.GetSchema(); for (int i = 0; i < condition_.size(); ++i) { licnxt.return_type_ = t_boolean; condition_[i]->InitExprAtLogicalPlan(licnxt); } #endif plan_context_ = new PlanContext(); *plan_context_ = plan_context; plan_context_->attribute_list_.assign(plan_context.attribute_list_.begin(), plan_context.attribute_list_.end()); lock_->release(); return *plan_context_; }
PlanContext LogicalAggregation::GetPlanContext() { lock_->acquire(); if (NULL != plan_context_) { lock_->release(); return *plan_context_; } PlanContext ret; const PlanContext child_context = child_->GetPlanContext(); ChangeAggAttrsForAVG(); // initialize expression of group_by_attrs and aggregation_attrs Schema* input_schema = GetSchema(child_context.attribute_list_); map<string, int> column_to_id; GetColumnToId(child_context.attribute_list_, column_to_id); for (int i = 0; i < group_by_attrs_.size(); ++i) { group_by_attrs_[i]->InitExprAtLogicalPlan(group_by_attrs_[i]->actual_type_, column_to_id, input_schema); } for (int i = 0; i < aggregation_attrs_.size(); ++i) { aggregation_attrs_[i]->InitExprAtLogicalPlan( aggregation_attrs_[i]->actual_type_, column_to_id, input_schema); } if (CanOmitHashRepartition(child_context)) { aggregation_style_ = kLocalAgg; LOG(INFO) << "Aggregation style: kLocalAgg" << std::endl; } else { // as for the kLocalAggReparGlobalAgg style is optimal // to kReparAndGlobalAgg so it's set to be default. aggregation_style_ = kLocalAggReparGlobalAgg; LOG(INFO) << "Aggregation style: kLocalAggReparGlobalAgg" << std::endl; } switch (aggregation_style_) { case kLocalAgg: { ret.attribute_list_ = GetAttrsAfterAgg(); ret.commu_cost_ = child_context.commu_cost_; ret.plan_partitioner_ = child_context.plan_partitioner_; Attribute partition_key = child_context.plan_partitioner_.get_partition_key(); partition_key.table_id_ = INTERMEIDATE_TABLEID; ret.plan_partitioner_.set_partition_key(partition_key); for (unsigned i = 0; i < ret.plan_partitioner_.GetNumberOfPartitions(); i++) { const unsigned cardinality = ret.plan_partitioner_.GetPartition(i)->get_cardinality(); ret.plan_partitioner_.GetPartition(i) ->set_cardinality(EstimateGroupByCardinality(child_context) / ret.plan_partitioner_.GetNumberOfPartitions()); } break; } default: { /** * repartition aggregation is currently simplified. */ // TODO(FZH): ideally, the partition properties (especially the the number // of partitions and partition style) after repartition aggregation should // be decided by the partition property enforcement. ret.attribute_list_ = GetAttrsAfterAgg(); ret.commu_cost_ = child_context.commu_cost_ + child_context.GetAggregatedDatasize(); ret.plan_partitioner_.set_partition_func( child_context.plan_partitioner_.get_partition_func()); // set partition key if (group_by_attrs_.empty()) { ret.plan_partitioner_.set_partition_key(Attribute()); } else { int id = 0; // if there is column in groupby attributes, so move it to the front, in // order to get partition by one column not one expression for (int i = 0; i < group_by_attrs_.size(); ++i) { if (group_by_attrs_[i]->expr_node_type_ == t_qcolcumns) { id = i; break; } } std::swap(group_by_attrs_[0], group_by_attrs_[id]); ret.plan_partitioner_.set_partition_key( group_by_attrs_[0]->ExprNodeToAttr(0)); } NodeID location = 0; int64_t data_cardinality = EstimateGroupByCardinality(child_context); PartitionOffset offset = 0; PlanPartitionInfo par(offset, data_cardinality, location); std::vector<PlanPartitionInfo> partition_list; partition_list.push_back(par); ret.plan_partitioner_.set_partition_list(partition_list); break; } } plan_context_ = new PlanContext(); *plan_context_ = ret; lock_->release(); return ret; }