physical::PhysicalPtr InjectJoinFilters::pushDownFiltersInternal(
    const physical::PhysicalPtr &probe_child,
    const physical::PhysicalPtr &build_child,
    const physical::FilterJoinPtr &filter_join) const {
  switch (probe_child->getPhysicalType()) {
    case P::PhysicalType::kAggregate:  // Fall through
    case P::PhysicalType::kHashJoin:
    case P::PhysicalType::kSample:
    case P::PhysicalType::kSelection:
    case P::PhysicalType::kSort:
    case P::PhysicalType::kWindowAggregate: {
      DCHECK_GE(probe_child->getNumChildren(), 1u);
      const P::PhysicalPtr child = probe_child->children()[0];
      if (E::SubsetOfExpressions(filter_join->probe_attributes(),
                                 child->getOutputAttributes())) {
        const P::PhysicalPtr new_child =
            pushDownFiltersInternal(child, build_child, filter_join);
        if (new_child != child) {
          std::vector<P::PhysicalPtr> new_children = probe_child->children();
          new_children[0] = new_child;
          return probe_child->copyWithNewChildren(new_children);
        }
      }
    }
    default:
      break;
  }

  if (probe_child != filter_join->left()) {
    // TODO(jianqiao): may need to update probe_attributes.
    return P::FilterJoin::Create(probe_child,
                                 build_child,
                                 filter_join->probe_attributes(),
                                 filter_join->build_attributes(),
                                 E::ToNamedExpressions(probe_child->getOutputAttributes()),
                                 filter_join->build_side_filter_predicate(),
                                 filter_join->is_anti_join(),
                                 filter_join->hasRepartition(),
                                 filter_join->cloneOutputPartitionSchemeHeader());
  } else {
    return filter_join;
  }
}
Beispiel #2
0
P::PhysicalPtr ReduceGroupByAttributes::applyToNode(const P::PhysicalPtr &input) {
  P::TableReferencePtr table_reference;
  if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
    // Collect the attributes-to-TableReference mapping info.
    for (const auto &attr : table_reference->attribute_list()) {
      source_.emplace(attr->id(), std::make_pair(table_reference, attr));
    }
    return input;
  }

  P::AggregatePtr aggregate;
  if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate) ||
      aggregate->grouping_expressions().size() <= 1u) {
    return input;
  }

  // Divide the group-by attributes into groups based on their source table.
  std::map<P::TableReferencePtr, std::vector<E::AttributeReferencePtr>> table_attributes;
  for (const auto &expr : aggregate->grouping_expressions()) {
    const auto source_it = source_.find(expr->id());
    if (source_it != source_.end()) {
      table_attributes[source_it->second.first].emplace_back(source_it->second.second);
    }
  }

  std::unordered_set<E::ExprId> erased_grouping_attr_ids;
  std::vector<std::pair<P::TableReferencePtr, E::AttributeReferencePtr>> hoisted_tables;

  // For each group (i.e. each source table), if it is profitable then we pull
  // the table up the aggregation.
  for (const auto &pair : table_attributes) {
    const P::TableReferencePtr table = pair.first;
    const std::vector<E::AttributeReferencePtr> &attributes = pair.second;
    // TODO(jianqiao): find a cost-based metic instead of hard-coding the threshold
    // number of group-by attributes.
    if (attributes.size() <= FLAGS_reduce_group_by_attributes_threshold) {
      continue;
    }

    std::vector<AttributeInfo> attr_infos;
    for (const auto &attr : attributes) {
      attr_infos.emplace_back(attr,
                              cost_model_->impliesUniqueAttributes(table, {attr}),
                              !attr->getValueType().isVariableLength(),
                              attr->getValueType().maximumByteLength());
    }

    std::vector<const AttributeInfo *> attr_info_refs;
    for (const auto &info : attr_infos) {
      attr_info_refs.emplace_back(&info);
    }
    std::sort(attr_info_refs.begin(),
              attr_info_refs.end(),
              AttributeInfo::IsBetterThan);

    const AttributeInfo &best_candidate = *attr_info_refs.front();
    if (!best_candidate.is_unique) {
      // Cannot find a key attribute. Give up pulling this table up.
      continue;
    }

    const E::AttributeReferencePtr key_attribute = best_candidate.attribute;
    hoisted_tables.emplace_back(table, key_attribute);

    for (const auto &attr : attributes) {
      if (attr->id() != key_attribute->id()) {
        erased_grouping_attr_ids.emplace(attr->id());
      }
    }
  }

  if (erased_grouping_attr_ids.empty()) {
    return input;
  }

  // Reconstuct the Aggregate node with reduced group-by attributes and then
  // construct HashJoin nodes on top of the Aggregate.
  std::vector<E::NamedExpressionPtr> reduced_grouping_expressions;
  for (const auto &expr : aggregate->grouping_expressions()) {
    if (erased_grouping_attr_ids.find(expr->id()) == erased_grouping_attr_ids.end()) {
      reduced_grouping_expressions.emplace_back(expr);
    }
  }

  const P::AggregatePtr new_aggregate =
      P::Aggregate::Create(aggregate->input(),
                           reduced_grouping_expressions,
                           aggregate->aggregate_expressions(),
                           aggregate->filter_predicate());

  P::PhysicalPtr output = new_aggregate;
  std::vector<E::NamedExpressionPtr> project_expressions =
      E::ToNamedExpressions(output->getOutputAttributes());
  for (const auto &pair : hoisted_tables) {
    const P::TableReferencePtr &source_table = pair.first;
    const E::AttributeReferencePtr &probe_attribute = pair.second;

    E::AttributeReferencePtr build_attribute;
    std::vector<E::AttributeReferencePtr> new_attribute_list;
    for (const auto &attr : source_table->attribute_list()) {
      if (attr->id() == probe_attribute->id()) {
        build_attribute =
          E::AttributeReference::Create(optimizer_context_->nextExprId(),
                                        attr->attribute_name(),
                                        attr->attribute_alias(),
                                        attr->relation_name(),
                                        attr->getValueType(),
                                        E::AttributeReferenceScope::kLocal);
        new_attribute_list.emplace_back(build_attribute);
      } else {
        new_attribute_list.emplace_back(attr);
        project_expressions.emplace_back(attr);
      }
    }

    DCHECK(build_attribute != nullptr);
    const P::TableReferencePtr build_side_table =
        P::TableReference::Create(source_table->relation(),
                                  source_table->relation()->getName(),
                                  new_attribute_list);
    output = P::HashJoin::Create(output,
                                 build_side_table,
                                 {probe_attribute},
                                 {build_attribute},
                                 nullptr,
                                 project_expressions,
                                 P::HashJoin::JoinType::kInnerJoin);
  }

  return output;
}
bool OneToOne::generatePlan(const L::LogicalPtr &logical_input,
                            P::PhysicalPtr *physical_output) {
  switch (logical_input->getLogicalType()) {
    case L::LogicalType::kTopLevelPlan: {
      const L::TopLevelPlanPtr top_level_plan = std::static_pointer_cast<const L::TopLevelPlan>(logical_input);
      const P::PhysicalPtr main_physical_plan =
          physical_mapper_->createOrGetPhysicalFromLogical(top_level_plan->plan());
      std::vector<P::PhysicalPtr> shared_physical_subplans;
      for (const L::LogicalPtr &shared_logical_subplan : top_level_plan->shared_subplans()) {
        shared_physical_subplans.emplace_back(
            physical_mapper_->createOrGetPhysicalFromLogical(shared_logical_subplan));
      }
      *physical_output = P::TopLevelPlan::Create(main_physical_plan,
                                                 shared_physical_subplans);
      return true;
    }
    case L::LogicalType::kSharedSubplanReference: {
      const L::SharedSubplanReferencePtr shared_subplan_reference =
          std::static_pointer_cast<const L::SharedSubplanReference>(logical_input);
      *physical_output = P::SharedSubplanReference::Create(shared_subplan_reference->subplan_id(),
                                                           shared_subplan_reference->referenced_attributes(),
                                                           shared_subplan_reference->output_attributes());
      return true;
    }
    case L::LogicalType::kTableReference: {
      const L::TableReferencePtr table_reference =
          std::static_pointer_cast<const L::TableReference>(logical_input);
      *physical_output = P::TableReference::Create(table_reference->catalog_relation(),
                                                   table_reference->relation_alias(),
                                                   table_reference->attribute_list());
      return true;
    }
    case L::LogicalType::kCopyFrom: {
      const L::CopyFromPtr copy_from =
          std::static_pointer_cast<const L::CopyFrom>(logical_input);
      *physical_output = P::CopyFrom::Create(
          copy_from->catalog_relation(), copy_from->file_name(),
          copy_from->column_delimiter(), copy_from->escape_strings());
      return true;
    }
    case L::LogicalType::kCreateIndex: {
      const L::CreateIndexPtr create_index =
          std::static_pointer_cast<const L::CreateIndex>(logical_input);
      *physical_output = P::CreateIndex::Create(physical_mapper_->createOrGetPhysicalFromLogical(
                                                                    create_index->input()),
                                                create_index->index_name(),
                                                create_index->index_attributes(),
                                                create_index->index_description());
      return true;
    }
    case L::LogicalType::kCreateTable: {
      const L::CreateTablePtr create_table =
          std::static_pointer_cast<const L::CreateTable>(logical_input);
      *physical_output = P::CreateTable::Create(create_table->relation_name(),
                                                create_table->attributes(),
                                                create_table->block_properties());
      return true;
    }
    case L::LogicalType::kDeleteTuples: {
      const L::DeleteTuplesPtr delete_tuples =
          std::static_pointer_cast<const L::DeleteTuples>(logical_input);
      *physical_output = P::DeleteTuples::Create(
          physical_mapper_->createOrGetPhysicalFromLogical(delete_tuples->input()),
          delete_tuples->predicate());
      return true;
    }
    case L::LogicalType::kDropTable: {
      const L::DropTablePtr drop_table =
          std::static_pointer_cast<const L::DropTable>(logical_input);
      *physical_output = P::DropTable::Create(drop_table->catalog_relation());
      return true;
    }
    case L::LogicalType::kInsertSelection: {
      const L::InsertSelectionPtr insert_selection =
          std::static_pointer_cast<const L::InsertSelection>(logical_input);
      *physical_output = P::InsertSelection::Create(
          physical_mapper_->createOrGetPhysicalFromLogical(insert_selection->destination()),
          physical_mapper_->createOrGetPhysicalFromLogical(insert_selection->selection()));
      return true;
    }
    case L::LogicalType::kInsertTuple: {
      const L::InsertTuplePtr insert_tuple =
          std::static_pointer_cast<const L::InsertTuple>(logical_input);
      *physical_output = P::InsertTuple::Create(
          physical_mapper_->createOrGetPhysicalFromLogical(insert_tuple->input()),
          insert_tuple->column_values());
      return true;
    }
    case L::LogicalType::kSample: {
      const L::SamplePtr sample =
          std::static_pointer_cast<const L::Sample>(logical_input);
      *physical_output = P::Sample::Create(
          physical_mapper_->createOrGetPhysicalFromLogical(sample->input()),
          sample->is_block_sample(),
          sample->percentage());
      return true;
    }
    case L::LogicalType::kSort: {
      const L::Sort *sort =
          static_cast<const L::Sort*>(logical_input.get());

      const P::PhysicalPtr physical_input =
          physical_mapper_->createOrGetPhysicalFromLogical(sort->input());

      // Find non-sort attributes.
      const std::vector<E::AttributeReferencePtr> input_attributes =
          physical_input->getOutputAttributes();
      E::UnorderedNamedExpressionSet sort_attributes_set(sort->sort_attributes().begin(),
                                                         sort->sort_attributes().end());
      std::vector<E::AttributeReferencePtr> non_sort_attributes;
      for (const E::AttributeReferencePtr &input_attribute : input_attributes) {
        if (sort_attributes_set.find(input_attribute) == sort_attributes_set.end()) {
          non_sort_attributes.emplace_back(input_attribute);
        }
      }

      *physical_output = P::Sort::Create(
          physical_mapper_->createOrGetPhysicalFromLogical(sort->input()),
          sort->sort_attributes(),
          non_sort_attributes,
          sort->sort_ascending(),
          sort->nulls_first_flags(),
          sort->limit());
      return true;
    }
    case L::LogicalType::kTableGenerator: {
      const L::TableGeneratorPtr table_generator =
          std::static_pointer_cast<const L::TableGenerator>(logical_input);
      *physical_output = P::TableGenerator::Create(
          table_generator->generator_function_handle(),
          table_generator->table_alias(),
          table_generator->attribute_list());
      return true;
    }
    case L::LogicalType::kUpdateTable: {
      const L::UpdateTablePtr update_table =
          std::static_pointer_cast<const L::UpdateTable>(logical_input);
      *physical_output = P::UpdateTable::Create(
          physical_mapper_->createOrGetPhysicalFromLogical(update_table->input()),
          update_table->assignees(),
          update_table->assignment_expressions(),
          update_table->predicate());
      return true;
    }
    default:
      return false;
  }
}