Beispiel #1
0
PlanContext LogicalSort::GetPlanContext() {
  lock_->acquire();
  if (NULL != plan_context_) {
    lock_->release();
    return *plan_context_;
  }
  // Get the information from its child
  PlanContext child_plan_context_ = child_->GetPlanContext();
  PlanContext ret;
  ret.attribute_list_ = child_plan_context_.attribute_list_;
  ret.commu_cost_ = child_plan_context_.commu_cost_;
  ret.plan_partitioner_.set_partition_func(
      child_plan_context_.plan_partitioner_.get_partition_func());
  ret.plan_partitioner_.set_partition_key(Attribute());

  NodeID location = 0;
  unsigned long data_cardinality = 0;
  PartitionOffset offset = 0;
  PlanPartitionInfo par(offset, data_cardinality, location);
  vector<PlanPartitionInfo> partition_list;
  partition_list.push_back(par);
  ret.plan_partitioner_.set_partition_list(partition_list);
  SetColumnId(child_plan_context_);
  LogicInitCnxt licnxt;
  licnxt.schema0_ = GetSchema(child_plan_context_.attribute_list_);
  GetColumnToId(child_plan_context_.attribute_list_, licnxt.column_id0_);
  for (int i = 0; i < order_by_attrs_.size(); ++i) {
    licnxt.return_type_ = order_by_attrs_[i].first->actual_type_;
    order_by_attrs_[i].first->InitExprAtLogicalPlan(licnxt);
  }
  plan_context_ = new PlanContext();
  *plan_context_ = ret;
  lock_->release();
  return ret;
}
Beispiel #2
0
PlanContext LogicalFilter::GetPlanContext() {
  /** In the currently implementation, we assume that the boolean operator
   * between each AttributeComparator is "AND".
   */
  lock_->acquire();
  if (NULL != plan_context_) {
    lock_->release();
    return *plan_context_;
  }
  PlanContext plan_context = child_->GetPlanContext();
  if (plan_context.IsHashPartitioned()) {
    for (unsigned i = 0;
         i < plan_context.plan_partitioner_.GetNumberOfPartitions(); ++i) {
      if (CanBeHashPruned(i, plan_context.plan_partitioner_)) {
        // Is filtered.
        plan_context.plan_partitioner_.GetPartition(i)->set_filtered();
      } else {  // Call predictSelectivilty() to alter cardinality.
                /**
                 * Should predict the volume of data that passes the filter.
                 * TODO(wangli): A precious prediction is needed based on the statistic
                 *               of the input data, which may be maintained in the
                 *               catalog module.
                 */

        const unsigned before_filter_cardinality =
            plan_context.plan_partitioner_.GetPartition(i)->get_cardinality();
        const unsigned after_filter_cardinality =
            before_filter_cardinality * PredictSelectivity();
        plan_context.plan_partitioner_.GetPartition(i)
            ->set_cardinality(after_filter_cardinality);
      }
    }
  }
//  std::map<std::string, int> column_to_id;
//  GetColumnToId(plan_context.attribute_list_, column_to_id);
//  Schema* input_schema = GetSchema(plan_context.attribute_list_);
#ifdef NEWCONDI
  for (int i = 0; i < condi_.size(); ++i) {
    // Initialize expression of logical execution plan.
    InitExprAtLogicalPlan(condi_[i], t_boolean, column_to_id, input_schema);
  }
#else
  LogicInitCnxt licnxt;
  GetColumnToId(plan_context.attribute_list_, licnxt.column_id0_);
  licnxt.schema0_ = plan_context.GetSchema();
  for (int i = 0; i < condition_.size(); ++i) {
    licnxt.return_type_ = t_boolean;
    condition_[i]->InitExprAtLogicalPlan(licnxt);
  }
#endif
  plan_context_ = new PlanContext();
  *plan_context_ = plan_context;
  plan_context_->attribute_list_.assign(plan_context.attribute_list_.begin(),
                                        plan_context.attribute_list_.end());
  lock_->release();
  return *plan_context_;
}
PlanContext LogicalAggregation::GetPlanContext() {
  lock_->acquire();
  if (NULL != plan_context_) {
    lock_->release();
    return *plan_context_;
  }
  PlanContext ret;
  const PlanContext child_context = child_->GetPlanContext();

  ChangeAggAttrsForAVG();
  // initialize expression of group_by_attrs and aggregation_attrs
  Schema* input_schema = GetSchema(child_context.attribute_list_);
  map<string, int> column_to_id;
  GetColumnToId(child_context.attribute_list_, column_to_id);
  for (int i = 0; i < group_by_attrs_.size(); ++i) {
    group_by_attrs_[i]->InitExprAtLogicalPlan(group_by_attrs_[i]->actual_type_,
                                              column_to_id, input_schema);
  }
  for (int i = 0; i < aggregation_attrs_.size(); ++i) {
    aggregation_attrs_[i]->InitExprAtLogicalPlan(
        aggregation_attrs_[i]->actual_type_, column_to_id, input_schema);
  }

  if (CanOmitHashRepartition(child_context)) {
    aggregation_style_ = kLocalAgg;
    LOG(INFO) << "Aggregation style: kLocalAgg" << std::endl;
  } else {  // as for the kLocalAggReparGlobalAgg style is optimal
            // to kReparAndGlobalAgg so it's set to be default.
    aggregation_style_ = kLocalAggReparGlobalAgg;
    LOG(INFO) << "Aggregation style: kLocalAggReparGlobalAgg" << std::endl;
  }
  switch (aggregation_style_) {
    case kLocalAgg: {
      ret.attribute_list_ = GetAttrsAfterAgg();
      ret.commu_cost_ = child_context.commu_cost_;
      ret.plan_partitioner_ = child_context.plan_partitioner_;
      Attribute partition_key =
          child_context.plan_partitioner_.get_partition_key();
      partition_key.table_id_ = INTERMEIDATE_TABLEID;
      ret.plan_partitioner_.set_partition_key(partition_key);
      for (unsigned i = 0; i < ret.plan_partitioner_.GetNumberOfPartitions();
           i++) {
        const unsigned cardinality =
            ret.plan_partitioner_.GetPartition(i)->get_cardinality();
        ret.plan_partitioner_.GetPartition(i)
            ->set_cardinality(EstimateGroupByCardinality(child_context) /
                              ret.plan_partitioner_.GetNumberOfPartitions());
      }
      break;
    }
    default: {
      /**
       * repartition aggregation is currently simplified.
       */

      // TODO(FZH): ideally, the partition properties (especially the the number
      // of partitions and partition style) after repartition aggregation should
      // be decided by the partition property enforcement.
      ret.attribute_list_ = GetAttrsAfterAgg();
      ret.commu_cost_ =
          child_context.commu_cost_ + child_context.GetAggregatedDatasize();
      ret.plan_partitioner_.set_partition_func(
          child_context.plan_partitioner_.get_partition_func());
      // set partition key
      if (group_by_attrs_.empty()) {
        ret.plan_partitioner_.set_partition_key(Attribute());
      } else {
        int id = 0;
        // if there is column in groupby attributes, so move it to the front, in
        // order to get partition by one column not one expression
        for (int i = 0; i < group_by_attrs_.size(); ++i) {
          if (group_by_attrs_[i]->expr_node_type_ == t_qcolcumns) {
            id = i;
            break;
          }
        }
        std::swap(group_by_attrs_[0], group_by_attrs_[id]);
        ret.plan_partitioner_.set_partition_key(
            group_by_attrs_[0]->ExprNodeToAttr(0));
      }

      NodeID location = 0;
      int64_t data_cardinality = EstimateGroupByCardinality(child_context);
      PartitionOffset offset = 0;
      PlanPartitionInfo par(offset, data_cardinality, location);
      std::vector<PlanPartitionInfo> partition_list;
      partition_list.push_back(par);
      ret.plan_partitioner_.set_partition_list(partition_list);
      break;
    }
  }
  plan_context_ = new PlanContext();
  *plan_context_ = ret;
  lock_->release();
  return ret;
}