physical::PhysicalPtr InjectJoinFilters::pushDownFiltersInternal( const physical::PhysicalPtr &probe_child, const physical::PhysicalPtr &build_child, const physical::FilterJoinPtr &filter_join) const { switch (probe_child->getPhysicalType()) { case P::PhysicalType::kAggregate: // Fall through case P::PhysicalType::kHashJoin: case P::PhysicalType::kSample: case P::PhysicalType::kSelection: case P::PhysicalType::kSort: case P::PhysicalType::kWindowAggregate: { DCHECK_GE(probe_child->getNumChildren(), 1u); const P::PhysicalPtr child = probe_child->children()[0]; if (E::SubsetOfExpressions(filter_join->probe_attributes(), child->getOutputAttributes())) { const P::PhysicalPtr new_child = pushDownFiltersInternal(child, build_child, filter_join); if (new_child != child) { std::vector<P::PhysicalPtr> new_children = probe_child->children(); new_children[0] = new_child; return probe_child->copyWithNewChildren(new_children); } } } default: break; } if (probe_child != filter_join->left()) { // TODO(jianqiao): may need to update probe_attributes. return P::FilterJoin::Create(probe_child, build_child, filter_join->probe_attributes(), filter_join->build_attributes(), E::ToNamedExpressions(probe_child->getOutputAttributes()), filter_join->build_side_filter_predicate(), filter_join->is_anti_join(), filter_join->hasRepartition(), filter_join->cloneOutputPartitionSchemeHeader()); } else { return filter_join; } }
P::PhysicalPtr ReduceGroupByAttributes::applyToNode(const P::PhysicalPtr &input) { P::TableReferencePtr table_reference; if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) { // Collect the attributes-to-TableReference mapping info. for (const auto &attr : table_reference->attribute_list()) { source_.emplace(attr->id(), std::make_pair(table_reference, attr)); } return input; } P::AggregatePtr aggregate; if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate) || aggregate->grouping_expressions().size() <= 1u) { return input; } // Divide the group-by attributes into groups based on their source table. std::map<P::TableReferencePtr, std::vector<E::AttributeReferencePtr>> table_attributes; for (const auto &expr : aggregate->grouping_expressions()) { const auto source_it = source_.find(expr->id()); if (source_it != source_.end()) { table_attributes[source_it->second.first].emplace_back(source_it->second.second); } } std::unordered_set<E::ExprId> erased_grouping_attr_ids; std::vector<std::pair<P::TableReferencePtr, E::AttributeReferencePtr>> hoisted_tables; // For each group (i.e. each source table), if it is profitable then we pull // the table up the aggregation. for (const auto &pair : table_attributes) { const P::TableReferencePtr table = pair.first; const std::vector<E::AttributeReferencePtr> &attributes = pair.second; // TODO(jianqiao): find a cost-based metic instead of hard-coding the threshold // number of group-by attributes. if (attributes.size() <= FLAGS_reduce_group_by_attributes_threshold) { continue; } std::vector<AttributeInfo> attr_infos; for (const auto &attr : attributes) { attr_infos.emplace_back(attr, cost_model_->impliesUniqueAttributes(table, {attr}), !attr->getValueType().isVariableLength(), attr->getValueType().maximumByteLength()); } std::vector<const AttributeInfo *> attr_info_refs; for (const auto &info : attr_infos) { attr_info_refs.emplace_back(&info); } std::sort(attr_info_refs.begin(), attr_info_refs.end(), AttributeInfo::IsBetterThan); const AttributeInfo &best_candidate = *attr_info_refs.front(); if (!best_candidate.is_unique) { // Cannot find a key attribute. Give up pulling this table up. continue; } const E::AttributeReferencePtr key_attribute = best_candidate.attribute; hoisted_tables.emplace_back(table, key_attribute); for (const auto &attr : attributes) { if (attr->id() != key_attribute->id()) { erased_grouping_attr_ids.emplace(attr->id()); } } } if (erased_grouping_attr_ids.empty()) { return input; } // Reconstuct the Aggregate node with reduced group-by attributes and then // construct HashJoin nodes on top of the Aggregate. std::vector<E::NamedExpressionPtr> reduced_grouping_expressions; for (const auto &expr : aggregate->grouping_expressions()) { if (erased_grouping_attr_ids.find(expr->id()) == erased_grouping_attr_ids.end()) { reduced_grouping_expressions.emplace_back(expr); } } const P::AggregatePtr new_aggregate = P::Aggregate::Create(aggregate->input(), reduced_grouping_expressions, aggregate->aggregate_expressions(), aggregate->filter_predicate()); P::PhysicalPtr output = new_aggregate; std::vector<E::NamedExpressionPtr> project_expressions = E::ToNamedExpressions(output->getOutputAttributes()); for (const auto &pair : hoisted_tables) { const P::TableReferencePtr &source_table = pair.first; const E::AttributeReferencePtr &probe_attribute = pair.second; E::AttributeReferencePtr build_attribute; std::vector<E::AttributeReferencePtr> new_attribute_list; for (const auto &attr : source_table->attribute_list()) { if (attr->id() == probe_attribute->id()) { build_attribute = E::AttributeReference::Create(optimizer_context_->nextExprId(), attr->attribute_name(), attr->attribute_alias(), attr->relation_name(), attr->getValueType(), E::AttributeReferenceScope::kLocal); new_attribute_list.emplace_back(build_attribute); } else { new_attribute_list.emplace_back(attr); project_expressions.emplace_back(attr); } } DCHECK(build_attribute != nullptr); const P::TableReferencePtr build_side_table = P::TableReference::Create(source_table->relation(), source_table->relation()->getName(), new_attribute_list); output = P::HashJoin::Create(output, build_side_table, {probe_attribute}, {build_attribute}, nullptr, project_expressions, P::HashJoin::JoinType::kInnerJoin); } return output; }
bool OneToOne::generatePlan(const L::LogicalPtr &logical_input, P::PhysicalPtr *physical_output) { switch (logical_input->getLogicalType()) { case L::LogicalType::kTopLevelPlan: { const L::TopLevelPlanPtr top_level_plan = std::static_pointer_cast<const L::TopLevelPlan>(logical_input); const P::PhysicalPtr main_physical_plan = physical_mapper_->createOrGetPhysicalFromLogical(top_level_plan->plan()); std::vector<P::PhysicalPtr> shared_physical_subplans; for (const L::LogicalPtr &shared_logical_subplan : top_level_plan->shared_subplans()) { shared_physical_subplans.emplace_back( physical_mapper_->createOrGetPhysicalFromLogical(shared_logical_subplan)); } *physical_output = P::TopLevelPlan::Create(main_physical_plan, shared_physical_subplans); return true; } case L::LogicalType::kSharedSubplanReference: { const L::SharedSubplanReferencePtr shared_subplan_reference = std::static_pointer_cast<const L::SharedSubplanReference>(logical_input); *physical_output = P::SharedSubplanReference::Create(shared_subplan_reference->subplan_id(), shared_subplan_reference->referenced_attributes(), shared_subplan_reference->output_attributes()); return true; } case L::LogicalType::kTableReference: { const L::TableReferencePtr table_reference = std::static_pointer_cast<const L::TableReference>(logical_input); *physical_output = P::TableReference::Create(table_reference->catalog_relation(), table_reference->relation_alias(), table_reference->attribute_list()); return true; } case L::LogicalType::kCopyFrom: { const L::CopyFromPtr copy_from = std::static_pointer_cast<const L::CopyFrom>(logical_input); *physical_output = P::CopyFrom::Create( copy_from->catalog_relation(), copy_from->file_name(), copy_from->column_delimiter(), copy_from->escape_strings()); return true; } case L::LogicalType::kCreateIndex: { const L::CreateIndexPtr create_index = std::static_pointer_cast<const L::CreateIndex>(logical_input); *physical_output = P::CreateIndex::Create(physical_mapper_->createOrGetPhysicalFromLogical( create_index->input()), create_index->index_name(), create_index->index_attributes(), create_index->index_description()); return true; } case L::LogicalType::kCreateTable: { const L::CreateTablePtr create_table = std::static_pointer_cast<const L::CreateTable>(logical_input); *physical_output = P::CreateTable::Create(create_table->relation_name(), create_table->attributes(), create_table->block_properties()); return true; } case L::LogicalType::kDeleteTuples: { const L::DeleteTuplesPtr delete_tuples = std::static_pointer_cast<const L::DeleteTuples>(logical_input); *physical_output = P::DeleteTuples::Create( physical_mapper_->createOrGetPhysicalFromLogical(delete_tuples->input()), delete_tuples->predicate()); return true; } case L::LogicalType::kDropTable: { const L::DropTablePtr drop_table = std::static_pointer_cast<const L::DropTable>(logical_input); *physical_output = P::DropTable::Create(drop_table->catalog_relation()); return true; } case L::LogicalType::kInsertSelection: { const L::InsertSelectionPtr insert_selection = std::static_pointer_cast<const L::InsertSelection>(logical_input); *physical_output = P::InsertSelection::Create( physical_mapper_->createOrGetPhysicalFromLogical(insert_selection->destination()), physical_mapper_->createOrGetPhysicalFromLogical(insert_selection->selection())); return true; } case L::LogicalType::kInsertTuple: { const L::InsertTuplePtr insert_tuple = std::static_pointer_cast<const L::InsertTuple>(logical_input); *physical_output = P::InsertTuple::Create( physical_mapper_->createOrGetPhysicalFromLogical(insert_tuple->input()), insert_tuple->column_values()); return true; } case L::LogicalType::kSample: { const L::SamplePtr sample = std::static_pointer_cast<const L::Sample>(logical_input); *physical_output = P::Sample::Create( physical_mapper_->createOrGetPhysicalFromLogical(sample->input()), sample->is_block_sample(), sample->percentage()); return true; } case L::LogicalType::kSort: { const L::Sort *sort = static_cast<const L::Sort*>(logical_input.get()); const P::PhysicalPtr physical_input = physical_mapper_->createOrGetPhysicalFromLogical(sort->input()); // Find non-sort attributes. const std::vector<E::AttributeReferencePtr> input_attributes = physical_input->getOutputAttributes(); E::UnorderedNamedExpressionSet sort_attributes_set(sort->sort_attributes().begin(), sort->sort_attributes().end()); std::vector<E::AttributeReferencePtr> non_sort_attributes; for (const E::AttributeReferencePtr &input_attribute : input_attributes) { if (sort_attributes_set.find(input_attribute) == sort_attributes_set.end()) { non_sort_attributes.emplace_back(input_attribute); } } *physical_output = P::Sort::Create( physical_mapper_->createOrGetPhysicalFromLogical(sort->input()), sort->sort_attributes(), non_sort_attributes, sort->sort_ascending(), sort->nulls_first_flags(), sort->limit()); return true; } case L::LogicalType::kTableGenerator: { const L::TableGeneratorPtr table_generator = std::static_pointer_cast<const L::TableGenerator>(logical_input); *physical_output = P::TableGenerator::Create( table_generator->generator_function_handle(), table_generator->table_alias(), table_generator->attribute_list()); return true; } case L::LogicalType::kUpdateTable: { const L::UpdateTablePtr update_table = std::static_pointer_cast<const L::UpdateTable>(logical_input); *physical_output = P::UpdateTable::Create( physical_mapper_->createOrGetPhysicalFromLogical(update_table->input()), update_table->assignees(), update_table->assignment_expressions(), update_table->predicate()); return true; } default: return false; } }