std::size_t SimpleCostModel::estimateCardinality( const P::PhysicalPtr &physical_plan) { switch (physical_plan->getPhysicalType()) { case P::PhysicalType::kTopLevelPlan: return estimateCardinalityForTopLevelPlan( std::static_pointer_cast<const P::TopLevelPlan>(physical_plan)); case P::PhysicalType::kTableReference: return estimateCardinalityForTableReference( std::static_pointer_cast<const P::TableReference>(physical_plan)); case P::PhysicalType::kSelection: return estimateCardinalityForSelection( std::static_pointer_cast<const P::Selection>(physical_plan)); case P::PhysicalType::kTableGenerator: return estimateCardinalityForTableGenerator( std::static_pointer_cast<const P::TableGenerator>(physical_plan)); case P::PhysicalType::kHashJoin: return estimateCardinalityForHashJoin( std::static_pointer_cast<const P::HashJoin>(physical_plan)); case P::PhysicalType::kNestedLoopsJoin: return estimateCardinalityForNestedLoopsJoin( std::static_pointer_cast<const P::NestedLoopsJoin>(physical_plan)); case P::PhysicalType::kAggregate: return estimateCardinalityForAggregate( std::static_pointer_cast<const P::Aggregate>(physical_plan)); case P::PhysicalType::kSharedSubplanReference: { const P::SharedSubplanReferencePtr shared_subplan_reference = std::static_pointer_cast<const P::SharedSubplanReference>(physical_plan); return estimateCardinality( shared_subplans_[shared_subplan_reference->subplan_id()]); } default: LOG(FATAL) << "Unsupported physical plan:" << physical_plan->toString(); } }
P::PhysicalPtr ReduceGroupByAttributes::apply(const P::PhysicalPtr &input) { DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); cost_model_.reset(new cost::StarSchemaSimpleCostModel( std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans())); P::PhysicalPtr output = applyInternal(input); if (output != input) { output = PruneColumns().apply(output); } return output; }
P::PhysicalPtr InjectJoinFilters::apply(const P::PhysicalPtr &input) { DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); const P::TopLevelPlanPtr top_level_plan = std::static_pointer_cast<const P::TopLevelPlan>(input); cost_model_.reset( new cost::StarSchemaSimpleCostModel( top_level_plan->shared_subplans())); lip_filter_configuration_.reset(new P::LIPFilterConfiguration()); // Step 1. Transform applicable HashJoin nodes to FilterJoin nodes. P::PhysicalPtr output = transformHashJoinToFilters(input); if (output == input) { return input; } // Step 2. If the top level plan is a filter join, wrap it with a Selection // to stabilize output columns. output = wrapSelection(output); // Step 3. Push down FilterJoin nodes to be evaluated early. output = pushDownFilters(output); // Step 4. Add Selection nodes for attaching the LIPFilters, if necessary. output = addFilterAnchors(output, false); // Step 5. Because of the pushdown of FilterJoin nodes, there are optimization // opportunities for projecting columns early. output = PruneColumns().apply(output); // Step 6. For each FilterJoin node, attach its corresponding LIPFilter to // proper nodes. concretizeAsLIPFilters(output, nullptr); if (!lip_filter_configuration_->getBuildInfoMap().empty() || !lip_filter_configuration_->getProbeInfoMap().empty()) { output = std::static_pointer_cast<const P::TopLevelPlan>(output) ->copyWithLIPFilterConfiguration( P::LIPFilterConfigurationPtr(lip_filter_configuration_.release())); } return output; }
void InjectJoinFilters::concretizeAsLIPFilters( const P::PhysicalPtr &input, const P::PhysicalPtr &anchor_node) const { switch (input->getPhysicalType()) { case P::PhysicalType::kAggregate: { const P::AggregatePtr &aggregate = std::static_pointer_cast<const P::Aggregate>(input); concretizeAsLIPFilters(aggregate->input(), aggregate); break; } case P::PhysicalType::kSelection: { const P::SelectionPtr &selection = std::static_pointer_cast<const P::Selection>(input); concretizeAsLIPFilters(selection->input(), selection); break; } // Currently we disable the attachment of filters to HashJoin nodes. See the // comments in InjectJoinFilters::addFilterAnchors(). /* case P::PhysicalType::kHashJoin: { const P::HashJoinPtr &hash_join = std::static_pointer_cast<const P::HashJoin>(input); concretizeAsLIPFilters(hash_join->left(), hash_join); concretizeAsLIPFilters(hash_join->right(), nullptr); break; } */ case P::PhysicalType::kFilterJoin: { const P::FilterJoinPtr &filter_join = std::static_pointer_cast<const P::FilterJoin>(input); DCHECK_EQ(1u, filter_join->build_attributes().size()); const E::AttributeReferencePtr &build_attr = filter_join->build_attributes().front(); std::int64_t min_cpp_value; std::int64_t max_cpp_value; const bool has_exact_min_max_stats = findExactMinMaxValuesForAttributeHelper(filter_join, build_attr, &min_cpp_value, &max_cpp_value); DCHECK(has_exact_min_max_stats); DCHECK_GE(max_cpp_value, min_cpp_value); DCHECK_LE(max_cpp_value - min_cpp_value, kMaxFilterSize); CHECK(anchor_node != nullptr); lip_filter_configuration_->addBuildInfo( P::BitVectorExactFilterBuildInfo::Create(build_attr, min_cpp_value, max_cpp_value, filter_join->is_anti_join()), filter_join); lip_filter_configuration_->addProbeInfo( P::LIPFilterProbeInfo::Create(filter_join->probe_attributes().front(), build_attr, filter_join), anchor_node); concretizeAsLIPFilters(filter_join->left(), anchor_node); concretizeAsLIPFilters(filter_join->right(), filter_join); break; } default: { for (const P::PhysicalPtr &child : input->children()) { concretizeAsLIPFilters(child, nullptr); } } } }
std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) { DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); const P::TopLevelPlanPtr top_level_plan = std::static_pointer_cast<const P::TopLevelPlan>(input); cost_model_.reset( new C::StarSchemaSimpleCostModel( top_level_plan->shared_subplans())); lip_filter_conf_ = top_level_plan->lip_filter_configuration(); color_map_["TableReference"] = "skyblue"; color_map_["Selection"] = "#90EE90"; color_map_["FilterJoin"] = "pink"; color_map_["FilterJoin(Anti)"] = "pink"; color_map_["HashJoin"] = "red"; color_map_["HashLeftOuterJoin"] = "orange"; color_map_["HashLeftSemiJoin"] = "orange"; color_map_["HashLeftAntiJoin"] = "orange"; visit(input); // Format output graph std::ostringstream graph_oss; graph_oss << "digraph g {\n"; graph_oss << " rankdir=BT\n"; graph_oss << " node [penwidth=2]\n"; graph_oss << " edge [fontsize=16 fontcolor=gray penwidth=2]\n\n"; // Format nodes for (const NodeInfo &node_info : nodes_) { graph_oss << " " << node_info.id << " ["; if (!node_info.labels.empty()) { graph_oss << "label=\"" << EscapeSpecialChars(JoinToString(node_info.labels, " ")) << "\""; } if (!node_info.color.empty()) { graph_oss << " style=filled fillcolor=\"" << node_info.color << "\""; } graph_oss << "]\n"; } graph_oss << "\n"; // Format edges for (const EdgeInfo &edge_info : edges_) { graph_oss << " " << edge_info.src_node_id << " -> " << edge_info.dst_node_id << " ["; if (edge_info.dashed) { graph_oss << "style=dashed "; } if (!edge_info.labels.empty()) { graph_oss << "label=\"" << EscapeSpecialChars(JoinToString(edge_info.labels, " ")) << "\""; } graph_oss << "]\n"; } graph_oss << "}\n"; return graph_oss.str(); }
void PlanVisualizer::visit(const P::PhysicalPtr &input) { int node_id = ++id_counter_; node_id_map_.emplace(input, node_id); std::set<E::ExprId> referenced_ids; for (const auto &attr : input->getReferencedAttributes()) { referenced_ids.emplace(attr->id()); } for (const auto &child : input->children()) { visit(child); int child_id = node_id_map_[child]; edges_.emplace_back(EdgeInfo()); EdgeInfo &edge_info = edges_.back(); edge_info.src_node_id = child_id; edge_info.dst_node_id = node_id; edge_info.dashed = false; if ((input->getPhysicalType() == P::PhysicalType::kHashJoin || input->getPhysicalType() == P::PhysicalType::kFilterJoin) && child == input->children()[1]) { edge_info.dashed = true; } for (const auto &attr : child->getOutputAttributes()) { if (referenced_ids.find(attr->id()) != referenced_ids.end()) { edge_info.labels.emplace_back(attr->attribute_alias()); } } } nodes_.emplace_back(NodeInfo()); NodeInfo &node_info = nodes_.back(); node_info.id = node_id; if (color_map_.find(input->getName()) != color_map_.end()) { node_info.color = color_map_[input->getName()]; } switch (input->getPhysicalType()) { case P::PhysicalType::kTableReference: { const P::TableReferencePtr table_reference = std::static_pointer_cast<const P::TableReference>(input); node_info.labels.emplace_back(table_reference->relation()->getName()); break; } case P::PhysicalType::kHashJoin: { const P::HashJoinPtr hash_join = std::static_pointer_cast<const P::HashJoin>(input); node_info.labels.emplace_back(input->getName()); const auto &left_attributes = hash_join->left_join_attributes(); const auto &right_attributes = hash_join->right_join_attributes(); for (std::size_t i = 0; i < left_attributes.size(); ++i) { node_info.labels.emplace_back( left_attributes[i]->attribute_alias() + " = " + right_attributes[i]->attribute_alias()); } break; } case P::PhysicalType::kFilterJoin: { const P::FilterJoinPtr filter_join = std::static_pointer_cast<const P::FilterJoin>(input); node_info.labels.emplace_back(input->getName()); const auto &probe_attributes = filter_join->probe_attributes(); const auto &build_attributes = filter_join->build_attributes(); for (std::size_t i = 0; i < probe_attributes.size(); ++i) { node_info.labels.emplace_back( probe_attributes[i]->attribute_alias() + " = " + build_attributes[i]->attribute_alias()); } break; } default: { node_info.labels.emplace_back(input->getName()); break; } } const P::PartitionSchemeHeader *partition_scheme_header = input->getOutputPartitionSchemeHeader(); if (partition_scheme_header) { node_info.labels.emplace_back(partition_scheme_header->toString()); } if (lip_filter_conf_ != nullptr) { const auto &build_filters = lip_filter_conf_->getBuildInfoMap(); const auto build_it = build_filters.find(input); if (build_it != build_filters.end()) { for (const auto &build_info : build_it->second) { node_info.labels.emplace_back( std::string("[LIP build] ") + build_info->build_attribute()->attribute_alias()); } } const auto &probe_filters = lip_filter_conf_->getProbeInfoMap(); const auto probe_it = probe_filters.find(input); if (probe_it != probe_filters.end()) { for (const auto &probe_info : probe_it->second) { node_info.labels.emplace_back( std::string("[LIP probe] ") + probe_info->probe_attribute()->attribute_alias()); } } } try { const std::size_t estimated_cardinality = cost_model_->estimateCardinality(input); const double estimated_selectivity = cost_model_->estimateSelectivity(input); node_info.labels.emplace_back( "est. # = " + std::to_string(estimated_cardinality)); node_info.labels.emplace_back( "est. Selectivity = " + std::to_string(estimated_selectivity)); } catch (const std::exception &e) { // NOTE(jianqiao): CostModel::estimateCardinality() may throw UnsupportedPhysicalPlan // exception for some type of physical nodes such as CreateTable. // In this case, we omit the node's cardinality/selectivity information. } }