Ejemplo n.º 1
0
Block StorageMerge::getQueryHeader(
    const Names & column_names, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage)
{
    switch (processed_stage)
    {
        case QueryProcessingStage::FetchColumns:
        {
            Block header = getSampleBlockForColumns(column_names);
            if (query_info.prewhere_info)
            {
                query_info.prewhere_info->prewhere_actions->execute(header);
                header = materializeBlock(header);
                if (query_info.prewhere_info->remove_prewhere_column)
                    header.erase(query_info.prewhere_info->prewhere_column_name);
            }
            return header;
        }
        case QueryProcessingStage::WithMergeableState:
        case QueryProcessingStage::Complete:
            return materializeBlock(InterpreterSelectQuery(
                query_info.query, context, std::make_shared<OneBlockInputStream>(getSampleBlockForColumns(column_names)),
                                       processed_stage, true).getSampleBlock());
    }
    throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR);
}
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
    const ASTPtr & query_ptr_,
    const Context & context_,
    const Names & required_result_column_names,
    QueryProcessingStage::Enum to_stage_,
    size_t subquery_depth_,
    bool only_analyze,
    bool modify_inplace)
    : query_ptr(query_ptr_),
    context(context_),
    to_stage(to_stage_),
    subquery_depth(subquery_depth_)
{
    const ASTSelectWithUnionQuery & ast = typeid_cast<const ASTSelectWithUnionQuery &>(*query_ptr);

    size_t num_selects = ast.list_of_selects->children.size();

    if (!num_selects)
        throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);

    /// Initialize interpreters for each SELECT query.
    /// Note that we pass 'required_result_column_names' to first SELECT.
    /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
    ///  because names could be different.

    nested_interpreters.reserve(num_selects);

    std::vector<Names> required_result_column_names_for_other_selects(num_selects);
    if (!required_result_column_names.empty() && num_selects > 1)
    {
        /// Result header if there are no filtering by 'required_result_column_names'.
        /// We use it to determine positions of 'required_result_column_names' in SELECT clause.

        Block full_result_header = InterpreterSelectQuery(
            ast.list_of_selects->children.at(0), context, Names(), to_stage, subquery_depth, true).getSampleBlock();

        std::vector<size_t> positions_of_required_result_columns(required_result_column_names.size());
        for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num)
            positions_of_required_result_columns[required_result_num] = full_result_header.getPositionByName(required_result_column_names[required_result_num]);

        for (size_t query_num = 1; query_num < num_selects; ++query_num)
        {
            Block full_result_header_for_current_select = InterpreterSelectQuery(
                ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth, true).getSampleBlock();

            if (full_result_header_for_current_select.columns() != full_result_header.columns())
                throw Exception("Different number of columns in UNION ALL elements:\n"
                    + full_result_header.dumpNames()
                    + "\nand\n"
                    + full_result_header_for_current_select.dumpNames() + "\n",
                    ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH);

            required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size());
            for (const auto & pos : positions_of_required_result_columns)
                required_result_column_names_for_other_selects[query_num].push_back(full_result_header_for_current_select.getByPosition(pos).name);
        }
    }

    for (size_t query_num = 0; query_num < num_selects; ++query_num)
    {
        const Names & current_required_result_column_names
            = query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num];

        nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>(
            ast.list_of_selects->children.at(query_num),
            context,
            current_required_result_column_names,
            to_stage,
            subquery_depth,
            only_analyze,
            modify_inplace));
    }

    /// Determine structure of the result.

    if (num_selects == 1)
    {
        result_header = nested_interpreters.front()->getSampleBlock();
    }
    else
    {
        Blocks headers(num_selects);
        for (size_t query_num = 0; query_num < num_selects; ++query_num)
            headers[query_num] = nested_interpreters[query_num]->getSampleBlock();

        result_header = headers.front();
        size_t num_columns = result_header.columns();

        for (size_t query_num = 1; query_num < num_selects; ++query_num)
            if (headers[query_num].columns() != num_columns)
                throw Exception("Different number of columns in UNION ALL elements:\n"
                    + result_header.dumpNames()
                    + "\nand\n"
                    + headers[query_num].dumpNames() + "\n",
                    ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH);

        for (size_t column_num = 0; column_num < num_columns; ++column_num)
        {
            ColumnWithTypeAndName & result_elem = result_header.getByPosition(column_num);

            /// Determine common type.

            DataTypes types(num_selects);
            for (size_t query_num = 0; query_num < num_selects; ++query_num)
                types[query_num] = headers[query_num].getByPosition(column_num).type;

            result_elem.type = getLeastSupertype(types);

            /// If there are different constness or different values of constants, the result must be non-constant.

            if (result_elem.column->isColumnConst())
            {
                bool need_materialize = false;
                for (size_t query_num = 1; query_num < num_selects; ++query_num)
                {
                    const ColumnWithTypeAndName & source_elem = headers[query_num].getByPosition(column_num);

                    if (!source_elem.column->isColumnConst()
                        || (static_cast<const ColumnConst &>(*result_elem.column).getField()
                            != static_cast<const ColumnConst &>(*source_elem.column).getField()))
                    {
                        need_materialize = true;
                        break;
                    }
                }

                if (need_materialize)
                    result_elem.column = result_elem.type->createColumn();
            }

            /// BTW, result column names are from first SELECT.
        }
    }
}
Ejemplo n.º 3
0
BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage,
                                                    const size_t max_block_size, const Block & header, const StoragePtr & storage,
                                                    const TableStructureReadLockPtr & struct_lock, Names & real_column_names,
                                                    Context & modified_context, size_t streams_num, bool has_table_virtual_column,
                                                    bool concat_streams)
{
    SelectQueryInfo modified_query_info = query_info;
    modified_query_info.query = query_info.query->clone();

    VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", storage ? storage->getTableName() : "");

    if (!storage)
        return BlockInputStreams{
            InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared<OneBlockInputStream>(header),
                                   processed_stage, true).execute().in};

    BlockInputStreams source_streams;

    if (processed_stage <= storage->getQueryProcessingStage(modified_context))
    {
        /// If there are only virtual columns in query, you must request at least one other column.
        if (real_column_names.size() ==0)
            real_column_names.push_back(ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical()));

        source_streams = storage->read(real_column_names, modified_query_info, modified_context, processed_stage, max_block_size,
                                       UInt32(streams_num));
    }
    else if (processed_stage > storage->getQueryProcessingStage(modified_context))
    {
        typeid_cast<ASTSelectQuery *>(modified_query_info.query.get())->replaceDatabaseAndTable(source_database, storage->getTableName());

        /// Maximum permissible parallelism is streams_num
        modified_context.getSettingsRef().max_threads = UInt64(streams_num);
        modified_context.getSettingsRef().max_streams_to_max_threads_ratio = 1;

        InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, Names{}, processed_stage};
        BlockInputStreamPtr interpreter_stream = interpreter.execute().in;

        /** Materialization is needed, since from distributed storage the constants come materialized.
          * If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
          * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
          */
        source_streams.emplace_back(std::make_shared<MaterializingBlockInputStream>(interpreter_stream));
    }

    if (!source_streams.empty())
    {
        if (concat_streams)
        {
            BlockInputStreamPtr stream =
                source_streams.size() > 1 ? std::make_shared<ConcatBlockInputStream>(source_streams) : source_streams[0];

            source_streams.resize(1);
            source_streams[0] = stream;
        }

        for (BlockInputStreamPtr & source_stream : source_streams)
        {
            if (has_table_virtual_column)
                source_stream = std::make_shared<AddingConstColumnBlockInputStream<String>>(
                    source_stream, std::make_shared<DataTypeString>(), storage->getTableName(), "_table");

            /// Subordinary tables could have different but convertible types, like numeric types of different width.
            /// We must return streams with structure equals to structure of Merge table.
            convertingSourceStream(header, modified_context, modified_query_info.query, source_stream, processed_stage);

            source_stream->addTableLock(struct_lock);
        }
    }

    return source_streams;
}