E::AttributeReferencePtr NameResolver::lookup(
    const ParseString *parse_attr_node,
    const ParseString *parse_rel_node) const {
  E::AttributeReferencePtr attribute;
  // Look up the attribute from local scope.
  if (parse_rel_node == nullptr) {
    // If the relation name is not given, search all visible relations.
    for (const std::unique_ptr<RelationInfo> &item : relations_) {
      E::AttributeReferencePtr found_attribute =
          item->findAttributeByName(parse_attr_node);
      if (found_attribute != nullptr) {
        // More than one relation has the same attribute name.
        if (attribute != nullptr) {
          THROW_SQL_ERROR_AT(parse_attr_node) << "Ambiguous attribute "
                                              << parse_attr_node->value();
        }
        attribute = found_attribute;
      }
    }
  } else {
    const std::map<std::string, const RelationInfo *>::const_iterator found_it =
        rel_name_to_rel_info_map_.find(ToLower(parse_rel_node->value()));
    if (found_it != rel_name_to_rel_info_map_.end()) {
      attribute = found_it->second->findAttributeByName(parse_attr_node);
    }
  }

  // If cannot find the attribute in local scope, look into parent scopes.
  if (attribute == nullptr) {
    if (parent_resolver_ != nullptr) {
      const E::AttributeReferencePtr outer_attribute =
          parent_resolver_->lookup(parse_attr_node, parse_rel_node);
      if (outer_attribute != nullptr) {
        attribute = E::AttributeReference::Create(outer_attribute->id(),
                                                  outer_attribute->attribute_name(),
                                                  outer_attribute->attribute_alias(),
                                                  outer_attribute->relation_name(),
                                                  outer_attribute->getValueType(),
                                                  E::AttributeReferenceScope::kOuter);
      }
    }
  }

  if (attribute == nullptr) {
    THROW_SQL_ERROR_AT(parse_attr_node) << "Unrecognized attribute "
                                        << parse_attr_node->value();
  }

  return attribute;
}
Esempio n. 2
0
P::PhysicalPtr ReduceGroupByAttributes::applyToNode(const P::PhysicalPtr &input) {
  P::TableReferencePtr table_reference;
  if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) {
    // Collect the attributes-to-TableReference mapping info.
    for (const auto &attr : table_reference->attribute_list()) {
      source_.emplace(attr->id(), std::make_pair(table_reference, attr));
    }
    return input;
  }

  P::AggregatePtr aggregate;
  if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate) ||
      aggregate->grouping_expressions().size() <= 1u) {
    return input;
  }

  // Divide the group-by attributes into groups based on their source table.
  std::map<P::TableReferencePtr, std::vector<E::AttributeReferencePtr>> table_attributes;
  for (const auto &expr : aggregate->grouping_expressions()) {
    const auto source_it = source_.find(expr->id());
    if (source_it != source_.end()) {
      table_attributes[source_it->second.first].emplace_back(source_it->second.second);
    }
  }

  std::unordered_set<E::ExprId> erased_grouping_attr_ids;
  std::vector<std::pair<P::TableReferencePtr, E::AttributeReferencePtr>> hoisted_tables;

  // For each group (i.e. each source table), if it is profitable then we pull
  // the table up the aggregation.
  for (const auto &pair : table_attributes) {
    const P::TableReferencePtr table = pair.first;
    const std::vector<E::AttributeReferencePtr> &attributes = pair.second;
    // TODO(jianqiao): find a cost-based metic instead of hard-coding the threshold
    // number of group-by attributes.
    if (attributes.size() <= FLAGS_reduce_group_by_attributes_threshold) {
      continue;
    }

    std::vector<AttributeInfo> attr_infos;
    for (const auto &attr : attributes) {
      attr_infos.emplace_back(attr,
                              cost_model_->impliesUniqueAttributes(table, {attr}),
                              !attr->getValueType().isVariableLength(),
                              attr->getValueType().maximumByteLength());
    }

    std::vector<const AttributeInfo *> attr_info_refs;
    for (const auto &info : attr_infos) {
      attr_info_refs.emplace_back(&info);
    }
    std::sort(attr_info_refs.begin(),
              attr_info_refs.end(),
              AttributeInfo::IsBetterThan);

    const AttributeInfo &best_candidate = *attr_info_refs.front();
    if (!best_candidate.is_unique) {
      // Cannot find a key attribute. Give up pulling this table up.
      continue;
    }

    const E::AttributeReferencePtr key_attribute = best_candidate.attribute;
    hoisted_tables.emplace_back(table, key_attribute);

    for (const auto &attr : attributes) {
      if (attr->id() != key_attribute->id()) {
        erased_grouping_attr_ids.emplace(attr->id());
      }
    }
  }

  if (erased_grouping_attr_ids.empty()) {
    return input;
  }

  // Reconstuct the Aggregate node with reduced group-by attributes and then
  // construct HashJoin nodes on top of the Aggregate.
  std::vector<E::NamedExpressionPtr> reduced_grouping_expressions;
  for (const auto &expr : aggregate->grouping_expressions()) {
    if (erased_grouping_attr_ids.find(expr->id()) == erased_grouping_attr_ids.end()) {
      reduced_grouping_expressions.emplace_back(expr);
    }
  }

  const P::AggregatePtr new_aggregate =
      P::Aggregate::Create(aggregate->input(),
                           reduced_grouping_expressions,
                           aggregate->aggregate_expressions(),
                           aggregate->filter_predicate());

  P::PhysicalPtr output = new_aggregate;
  std::vector<E::NamedExpressionPtr> project_expressions =
      E::ToNamedExpressions(output->getOutputAttributes());
  for (const auto &pair : hoisted_tables) {
    const P::TableReferencePtr &source_table = pair.first;
    const E::AttributeReferencePtr &probe_attribute = pair.second;

    E::AttributeReferencePtr build_attribute;
    std::vector<E::AttributeReferencePtr> new_attribute_list;
    for (const auto &attr : source_table->attribute_list()) {
      if (attr->id() == probe_attribute->id()) {
        build_attribute =
          E::AttributeReference::Create(optimizer_context_->nextExprId(),
                                        attr->attribute_name(),
                                        attr->attribute_alias(),
                                        attr->relation_name(),
                                        attr->getValueType(),
                                        E::AttributeReferenceScope::kLocal);
        new_attribute_list.emplace_back(build_attribute);
      } else {
        new_attribute_list.emplace_back(attr);
        project_expressions.emplace_back(attr);
      }
    }

    DCHECK(build_attribute != nullptr);
    const P::TableReferencePtr build_side_table =
        P::TableReference::Create(source_table->relation(),
                                  source_table->relation()->getName(),
                                  new_attribute_list);
    output = P::HashJoin::Create(output,
                                 build_side_table,
                                 {probe_attribute},
                                 {build_attribute},
                                 nullptr,
                                 project_expressions,
                                 P::HashJoin::JoinType::kInnerJoin);
  }

  return output;
}