std::size_t SimpleCostModel::estimateCardinality(
    const P::PhysicalPtr &physical_plan) {
  switch (physical_plan->getPhysicalType()) {
    case P::PhysicalType::kTopLevelPlan:
      return estimateCardinalityForTopLevelPlan(
          std::static_pointer_cast<const P::TopLevelPlan>(physical_plan));
    case P::PhysicalType::kTableReference:
      return estimateCardinalityForTableReference(
          std::static_pointer_cast<const P::TableReference>(physical_plan));
    case P::PhysicalType::kSelection:
      return estimateCardinalityForSelection(
          std::static_pointer_cast<const P::Selection>(physical_plan));
    case P::PhysicalType::kTableGenerator:
      return estimateCardinalityForTableGenerator(
          std::static_pointer_cast<const P::TableGenerator>(physical_plan));
    case P::PhysicalType::kHashJoin:
      return estimateCardinalityForHashJoin(
          std::static_pointer_cast<const P::HashJoin>(physical_plan));
    case P::PhysicalType::kNestedLoopsJoin:
      return estimateCardinalityForNestedLoopsJoin(
          std::static_pointer_cast<const P::NestedLoopsJoin>(physical_plan));
    case P::PhysicalType::kAggregate:
      return estimateCardinalityForAggregate(
          std::static_pointer_cast<const P::Aggregate>(physical_plan));
    case P::PhysicalType::kSharedSubplanReference: {
      const P::SharedSubplanReferencePtr shared_subplan_reference =
          std::static_pointer_cast<const P::SharedSubplanReference>(physical_plan);
      return estimateCardinality(
          shared_subplans_[shared_subplan_reference->subplan_id()]);
    }
    default:
      LOG(FATAL) << "Unsupported physical plan:" << physical_plan->toString();
  }
}
Example #2
0
P::PhysicalPtr ReduceGroupByAttributes::apply(const P::PhysicalPtr &input) {
  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
  cost_model_.reset(new cost::StarSchemaSimpleCostModel(
      std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans()));

  P::PhysicalPtr output = applyInternal(input);
  if (output != input) {
    output = PruneColumns().apply(output);
  }
  return output;
}
P::PhysicalPtr InjectJoinFilters::apply(const P::PhysicalPtr &input) {
  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);

  const P::TopLevelPlanPtr top_level_plan =
     std::static_pointer_cast<const P::TopLevelPlan>(input);
  cost_model_.reset(
      new cost::StarSchemaSimpleCostModel(
          top_level_plan->shared_subplans()));
  lip_filter_configuration_.reset(new P::LIPFilterConfiguration());

  // Step 1. Transform applicable HashJoin nodes to FilterJoin nodes.
  P::PhysicalPtr output = transformHashJoinToFilters(input);

  if (output == input) {
    return input;
  }

  // Step 2. If the top level plan is a filter join, wrap it with a Selection
  // to stabilize output columns.
  output = wrapSelection(output);

  // Step 3. Push down FilterJoin nodes to be evaluated early.
  output = pushDownFilters(output);

  // Step 4. Add Selection nodes for attaching the LIPFilters, if necessary.
  output = addFilterAnchors(output, false);

  // Step 5. Because of the pushdown of FilterJoin nodes, there are optimization
  // opportunities for projecting columns early.
  output = PruneColumns().apply(output);

  // Step 6. For each FilterJoin node, attach its corresponding LIPFilter to
  // proper nodes.
  concretizeAsLIPFilters(output, nullptr);

  if (!lip_filter_configuration_->getBuildInfoMap().empty() ||
      !lip_filter_configuration_->getProbeInfoMap().empty()) {
    output = std::static_pointer_cast<const P::TopLevelPlan>(output)
        ->copyWithLIPFilterConfiguration(
              P::LIPFilterConfigurationPtr(lip_filter_configuration_.release()));
  }

  return output;
}
void InjectJoinFilters::concretizeAsLIPFilters(
    const P::PhysicalPtr &input,
    const P::PhysicalPtr &anchor_node) const {
  switch (input->getPhysicalType()) {
    case P::PhysicalType::kAggregate: {
      const P::AggregatePtr &aggregate =
          std::static_pointer_cast<const P::Aggregate>(input);
      concretizeAsLIPFilters(aggregate->input(), aggregate);
      break;
    }
    case P::PhysicalType::kSelection: {
      const P::SelectionPtr &selection =
          std::static_pointer_cast<const P::Selection>(input);
      concretizeAsLIPFilters(selection->input(), selection);
      break;
    }
    // Currently we disable the attachment of filters to HashJoin nodes. See the
    // comments in InjectJoinFilters::addFilterAnchors().
    /*
    case P::PhysicalType::kHashJoin: {
      const P::HashJoinPtr &hash_join =
          std::static_pointer_cast<const P::HashJoin>(input);
      concretizeAsLIPFilters(hash_join->left(), hash_join);
      concretizeAsLIPFilters(hash_join->right(), nullptr);
      break;
    }
    */
    case P::PhysicalType::kFilterJoin: {
      const P::FilterJoinPtr &filter_join =
          std::static_pointer_cast<const P::FilterJoin>(input);
      DCHECK_EQ(1u, filter_join->build_attributes().size());
      const E::AttributeReferencePtr &build_attr =
          filter_join->build_attributes().front();

      std::int64_t min_cpp_value;
      std::int64_t max_cpp_value;
      const bool has_exact_min_max_stats =
          findExactMinMaxValuesForAttributeHelper(filter_join,
                                                  build_attr,
                                                  &min_cpp_value,
                                                  &max_cpp_value);
      DCHECK(has_exact_min_max_stats);
      DCHECK_GE(max_cpp_value, min_cpp_value);
      DCHECK_LE(max_cpp_value - min_cpp_value, kMaxFilterSize);
      CHECK(anchor_node != nullptr);

      lip_filter_configuration_->addBuildInfo(
          P::BitVectorExactFilterBuildInfo::Create(build_attr,
                                                   min_cpp_value,
                                                   max_cpp_value,
                                                   filter_join->is_anti_join()),
          filter_join);
      lip_filter_configuration_->addProbeInfo(
          P::LIPFilterProbeInfo::Create(filter_join->probe_attributes().front(),
                                        build_attr,
                                        filter_join),
          anchor_node);

      concretizeAsLIPFilters(filter_join->left(), anchor_node);
      concretizeAsLIPFilters(filter_join->right(), filter_join);
      break;
    }
    default: {
      for (const P::PhysicalPtr &child : input->children()) {
        concretizeAsLIPFilters(child, nullptr);
      }
    }
  }
}
std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) {
  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
  const P::TopLevelPlanPtr top_level_plan =
      std::static_pointer_cast<const P::TopLevelPlan>(input);
  cost_model_.reset(
      new C::StarSchemaSimpleCostModel(
          top_level_plan->shared_subplans()));
  lip_filter_conf_ = top_level_plan->lip_filter_configuration();

  color_map_["TableReference"] = "skyblue";
  color_map_["Selection"] = "#90EE90";
  color_map_["FilterJoin"] = "pink";
  color_map_["FilterJoin(Anti)"] = "pink";
  color_map_["HashJoin"] = "red";
  color_map_["HashLeftOuterJoin"] = "orange";
  color_map_["HashLeftSemiJoin"] = "orange";
  color_map_["HashLeftAntiJoin"] = "orange";

  visit(input);

  // Format output graph
  std::ostringstream graph_oss;
  graph_oss << "digraph g {\n";
  graph_oss << "  rankdir=BT\n";
  graph_oss << "  node [penwidth=2]\n";
  graph_oss << "  edge [fontsize=16 fontcolor=gray penwidth=2]\n\n";

  // Format nodes
  for (const NodeInfo &node_info : nodes_) {
    graph_oss << "  " << node_info.id << " [";
    if (!node_info.labels.empty()) {
      graph_oss << "label=\""
                << EscapeSpecialChars(JoinToString(node_info.labels, "&#10;"))
                << "\"";
    }
    if (!node_info.color.empty()) {
      graph_oss << " style=filled fillcolor=\"" << node_info.color << "\"";
    }
    graph_oss << "]\n";
  }
  graph_oss << "\n";

  // Format edges
  for (const EdgeInfo &edge_info : edges_) {
    graph_oss << "  " << edge_info.src_node_id << " -> "
              << edge_info.dst_node_id << " [";
    if (edge_info.dashed) {
      graph_oss << "style=dashed ";
    }
    if (!edge_info.labels.empty()) {
      graph_oss << "label=\""
                << EscapeSpecialChars(JoinToString(edge_info.labels, "&#10;"))
                << "\"";
    }
    graph_oss << "]\n";
  }

  graph_oss << "}\n";

  return graph_oss.str();
}
void PlanVisualizer::visit(const P::PhysicalPtr &input) {
  int node_id = ++id_counter_;
  node_id_map_.emplace(input, node_id);

  std::set<E::ExprId> referenced_ids;
  for (const auto &attr : input->getReferencedAttributes()) {
    referenced_ids.emplace(attr->id());
  }
  for (const auto &child : input->children()) {
    visit(child);

    int child_id = node_id_map_[child];

    edges_.emplace_back(EdgeInfo());
    EdgeInfo &edge_info = edges_.back();
    edge_info.src_node_id = child_id;
    edge_info.dst_node_id = node_id;
    edge_info.dashed = false;

    if ((input->getPhysicalType() == P::PhysicalType::kHashJoin ||
         input->getPhysicalType() == P::PhysicalType::kFilterJoin) &&
        child == input->children()[1]) {
      edge_info.dashed = true;
    }

    for (const auto &attr : child->getOutputAttributes()) {
      if (referenced_ids.find(attr->id()) != referenced_ids.end()) {
        edge_info.labels.emplace_back(attr->attribute_alias());
      }
    }
  }

  nodes_.emplace_back(NodeInfo());
  NodeInfo &node_info = nodes_.back();
  node_info.id = node_id;
  if (color_map_.find(input->getName()) != color_map_.end()) {
    node_info.color = color_map_[input->getName()];
  }

  switch (input->getPhysicalType()) {
    case P::PhysicalType::kTableReference: {
      const P::TableReferencePtr table_reference =
        std::static_pointer_cast<const P::TableReference>(input);
      node_info.labels.emplace_back(table_reference->relation()->getName());
      break;
    }
    case P::PhysicalType::kHashJoin: {
      const P::HashJoinPtr hash_join =
        std::static_pointer_cast<const P::HashJoin>(input);
      node_info.labels.emplace_back(input->getName());

      const auto &left_attributes = hash_join->left_join_attributes();
      const auto &right_attributes = hash_join->right_join_attributes();
      for (std::size_t i = 0; i < left_attributes.size(); ++i) {
        node_info.labels.emplace_back(
            left_attributes[i]->attribute_alias() + " = " + right_attributes[i]->attribute_alias());
      }
      break;
    }
    case P::PhysicalType::kFilterJoin: {
      const P::FilterJoinPtr filter_join =
          std::static_pointer_cast<const P::FilterJoin>(input);
      node_info.labels.emplace_back(input->getName());

      const auto &probe_attributes = filter_join->probe_attributes();
      const auto &build_attributes = filter_join->build_attributes();
      for (std::size_t i = 0; i < probe_attributes.size(); ++i) {
        node_info.labels.emplace_back(
            probe_attributes[i]->attribute_alias() + " = " +
                build_attributes[i]->attribute_alias());
      }
      break;
    }
    default: {
      node_info.labels.emplace_back(input->getName());
      break;
    }
  }

  const P::PartitionSchemeHeader *partition_scheme_header = input->getOutputPartitionSchemeHeader();
  if (partition_scheme_header) {
    node_info.labels.emplace_back(partition_scheme_header->toString());
  }

  if (lip_filter_conf_ != nullptr) {
    const auto &build_filters = lip_filter_conf_->getBuildInfoMap();
    const auto build_it = build_filters.find(input);
    if (build_it != build_filters.end()) {
      for (const auto &build_info : build_it->second) {
        node_info.labels.emplace_back(
            std::string("[LIP build] ") + build_info->build_attribute()->attribute_alias());
      }
    }
    const auto &probe_filters = lip_filter_conf_->getProbeInfoMap();
    const auto probe_it = probe_filters.find(input);
    if (probe_it != probe_filters.end()) {
      for (const auto &probe_info : probe_it->second) {
        node_info.labels.emplace_back(
            std::string("[LIP probe] ") + probe_info->probe_attribute()->attribute_alias());
      }
    }
  }

  try {
    const std::size_t estimated_cardinality = cost_model_->estimateCardinality(input);
    const double estimated_selectivity = cost_model_->estimateSelectivity(input);

    node_info.labels.emplace_back(
        "est. # = " + std::to_string(estimated_cardinality));
    node_info.labels.emplace_back(
        "est. Selectivity = " + std::to_string(estimated_selectivity));
  } catch (const std::exception &e) {
    // NOTE(jianqiao): CostModel::estimateCardinality() may throw UnsupportedPhysicalPlan
    // exception for some type of physical nodes such as CreateTable.
    // In this case, we omit the node's cardinality/selectivity information.
  }
}