E::AttributeReferencePtr NameResolver::lookup( const ParseString *parse_attr_node, const ParseString *parse_rel_node) const { E::AttributeReferencePtr attribute; // Look up the attribute from local scope. if (parse_rel_node == nullptr) { // If the relation name is not given, search all visible relations. for (const std::unique_ptr<RelationInfo> &item : relations_) { E::AttributeReferencePtr found_attribute = item->findAttributeByName(parse_attr_node); if (found_attribute != nullptr) { // More than one relation has the same attribute name. if (attribute != nullptr) { THROW_SQL_ERROR_AT(parse_attr_node) << "Ambiguous attribute " << parse_attr_node->value(); } attribute = found_attribute; } } } else { const std::map<std::string, const RelationInfo *>::const_iterator found_it = rel_name_to_rel_info_map_.find(ToLower(parse_rel_node->value())); if (found_it != rel_name_to_rel_info_map_.end()) { attribute = found_it->second->findAttributeByName(parse_attr_node); } } // If cannot find the attribute in local scope, look into parent scopes. if (attribute == nullptr) { if (parent_resolver_ != nullptr) { const E::AttributeReferencePtr outer_attribute = parent_resolver_->lookup(parse_attr_node, parse_rel_node); if (outer_attribute != nullptr) { attribute = E::AttributeReference::Create(outer_attribute->id(), outer_attribute->attribute_name(), outer_attribute->attribute_alias(), outer_attribute->relation_name(), outer_attribute->getValueType(), E::AttributeReferenceScope::kOuter); } } } if (attribute == nullptr) { THROW_SQL_ERROR_AT(parse_attr_node) << "Unrecognized attribute " << parse_attr_node->value(); } return attribute; }
P::PhysicalPtr ReduceGroupByAttributes::applyToNode(const P::PhysicalPtr &input) { P::TableReferencePtr table_reference; if (P::SomeTableReference::MatchesWithConditionalCast(input, &table_reference)) { // Collect the attributes-to-TableReference mapping info. for (const auto &attr : table_reference->attribute_list()) { source_.emplace(attr->id(), std::make_pair(table_reference, attr)); } return input; } P::AggregatePtr aggregate; if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate) || aggregate->grouping_expressions().size() <= 1u) { return input; } // Divide the group-by attributes into groups based on their source table. std::map<P::TableReferencePtr, std::vector<E::AttributeReferencePtr>> table_attributes; for (const auto &expr : aggregate->grouping_expressions()) { const auto source_it = source_.find(expr->id()); if (source_it != source_.end()) { table_attributes[source_it->second.first].emplace_back(source_it->second.second); } } std::unordered_set<E::ExprId> erased_grouping_attr_ids; std::vector<std::pair<P::TableReferencePtr, E::AttributeReferencePtr>> hoisted_tables; // For each group (i.e. each source table), if it is profitable then we pull // the table up the aggregation. for (const auto &pair : table_attributes) { const P::TableReferencePtr table = pair.first; const std::vector<E::AttributeReferencePtr> &attributes = pair.second; // TODO(jianqiao): find a cost-based metic instead of hard-coding the threshold // number of group-by attributes. if (attributes.size() <= FLAGS_reduce_group_by_attributes_threshold) { continue; } std::vector<AttributeInfo> attr_infos; for (const auto &attr : attributes) { attr_infos.emplace_back(attr, cost_model_->impliesUniqueAttributes(table, {attr}), !attr->getValueType().isVariableLength(), attr->getValueType().maximumByteLength()); } std::vector<const AttributeInfo *> attr_info_refs; for (const auto &info : attr_infos) { attr_info_refs.emplace_back(&info); } std::sort(attr_info_refs.begin(), attr_info_refs.end(), AttributeInfo::IsBetterThan); const AttributeInfo &best_candidate = *attr_info_refs.front(); if (!best_candidate.is_unique) { // Cannot find a key attribute. Give up pulling this table up. continue; } const E::AttributeReferencePtr key_attribute = best_candidate.attribute; hoisted_tables.emplace_back(table, key_attribute); for (const auto &attr : attributes) { if (attr->id() != key_attribute->id()) { erased_grouping_attr_ids.emplace(attr->id()); } } } if (erased_grouping_attr_ids.empty()) { return input; } // Reconstuct the Aggregate node with reduced group-by attributes and then // construct HashJoin nodes on top of the Aggregate. std::vector<E::NamedExpressionPtr> reduced_grouping_expressions; for (const auto &expr : aggregate->grouping_expressions()) { if (erased_grouping_attr_ids.find(expr->id()) == erased_grouping_attr_ids.end()) { reduced_grouping_expressions.emplace_back(expr); } } const P::AggregatePtr new_aggregate = P::Aggregate::Create(aggregate->input(), reduced_grouping_expressions, aggregate->aggregate_expressions(), aggregate->filter_predicate()); P::PhysicalPtr output = new_aggregate; std::vector<E::NamedExpressionPtr> project_expressions = E::ToNamedExpressions(output->getOutputAttributes()); for (const auto &pair : hoisted_tables) { const P::TableReferencePtr &source_table = pair.first; const E::AttributeReferencePtr &probe_attribute = pair.second; E::AttributeReferencePtr build_attribute; std::vector<E::AttributeReferencePtr> new_attribute_list; for (const auto &attr : source_table->attribute_list()) { if (attr->id() == probe_attribute->id()) { build_attribute = E::AttributeReference::Create(optimizer_context_->nextExprId(), attr->attribute_name(), attr->attribute_alias(), attr->relation_name(), attr->getValueType(), E::AttributeReferenceScope::kLocal); new_attribute_list.emplace_back(build_attribute); } else { new_attribute_list.emplace_back(attr); project_expressions.emplace_back(attr); } } DCHECK(build_attribute != nullptr); const P::TableReferencePtr build_side_table = P::TableReference::Create(source_table->relation(), source_table->relation()->getName(), new_attribute_list); output = P::HashJoin::Create(output, build_side_table, {probe_attribute}, {build_attribute}, nullptr, project_expressions, P::HashJoin::JoinType::kInnerJoin); } return output; }