Block StorageMerge::getQueryHeader( const Names & column_names, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage) { switch (processed_stage) { case QueryProcessingStage::FetchColumns: { Block header = getSampleBlockForColumns(column_names); if (query_info.prewhere_info) { query_info.prewhere_info->prewhere_actions->execute(header); header = materializeBlock(header); if (query_info.prewhere_info->remove_prewhere_column) header.erase(query_info.prewhere_info->prewhere_column_name); } return header; } case QueryProcessingStage::WithMergeableState: case QueryProcessingStage::Complete: return materializeBlock(InterpreterSelectQuery( query_info.query, context, std::make_shared<OneBlockInputStream>(getSampleBlockForColumns(column_names)), processed_stage, true).getSampleBlock()); } throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); }
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, const Names & required_result_column_names, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, bool only_analyze, bool modify_inplace) : query_ptr(query_ptr_), context(context_), to_stage(to_stage_), subquery_depth(subquery_depth_) { const ASTSelectWithUnionQuery & ast = typeid_cast<const ASTSelectWithUnionQuery &>(*query_ptr); size_t num_selects = ast.list_of_selects->children.size(); if (!num_selects) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); /// Initialize interpreters for each SELECT query. /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, /// because names could be different. nested_interpreters.reserve(num_selects); std::vector<Names> required_result_column_names_for_other_selects(num_selects); if (!required_result_column_names.empty() && num_selects > 1) { /// Result header if there are no filtering by 'required_result_column_names'. /// We use it to determine positions of 'required_result_column_names' in SELECT clause. Block full_result_header = InterpreterSelectQuery( ast.list_of_selects->children.at(0), context, Names(), to_stage, subquery_depth, true).getSampleBlock(); std::vector<size_t> positions_of_required_result_columns(required_result_column_names.size()); for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) positions_of_required_result_columns[required_result_num] = full_result_header.getPositionByName(required_result_column_names[required_result_num]); for (size_t query_num = 1; query_num < num_selects; ++query_num) { Block full_result_header_for_current_select = InterpreterSelectQuery( ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth, true).getSampleBlock(); if (full_result_header_for_current_select.columns() != full_result_header.columns()) throw Exception("Different number of columns in UNION ALL elements:\n" + full_result_header.dumpNames() + "\nand\n" + full_result_header_for_current_select.dumpNames() + "\n", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size()); for (const auto & pos : positions_of_required_result_columns) required_result_column_names_for_other_selects[query_num].push_back(full_result_header_for_current_select.getByPosition(pos).name); } } for (size_t query_num = 0; query_num < num_selects; ++query_num) { const Names & current_required_result_column_names = query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num]; nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>( ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze, modify_inplace)); } /// Determine structure of the result. if (num_selects == 1) { result_header = nested_interpreters.front()->getSampleBlock(); } else { Blocks headers(num_selects); for (size_t query_num = 0; query_num < num_selects; ++query_num) headers[query_num] = nested_interpreters[query_num]->getSampleBlock(); result_header = headers.front(); size_t num_columns = result_header.columns(); for (size_t query_num = 1; query_num < num_selects; ++query_num) if (headers[query_num].columns() != num_columns) throw Exception("Different number of columns in UNION ALL elements:\n" + result_header.dumpNames() + "\nand\n" + headers[query_num].dumpNames() + "\n", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); for (size_t column_num = 0; column_num < num_columns; ++column_num) { ColumnWithTypeAndName & result_elem = result_header.getByPosition(column_num); /// Determine common type. DataTypes types(num_selects); for (size_t query_num = 0; query_num < num_selects; ++query_num) types[query_num] = headers[query_num].getByPosition(column_num).type; result_elem.type = getLeastSupertype(types); /// If there are different constness or different values of constants, the result must be non-constant. if (result_elem.column->isColumnConst()) { bool need_materialize = false; for (size_t query_num = 1; query_num < num_selects; ++query_num) { const ColumnWithTypeAndName & source_elem = headers[query_num].getByPosition(column_num); if (!source_elem.column->isColumnConst() || (static_cast<const ColumnConst &>(*result_elem.column).getField() != static_cast<const ColumnConst &>(*source_elem.column).getField())) { need_materialize = true; break; } } if (need_materialize) result_elem.column = result_elem.type->createColumn(); } /// BTW, result column names are from first SELECT. } } }
BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, const size_t max_block_size, const Block & header, const StoragePtr & storage, const TableStructureReadLockPtr & struct_lock, Names & real_column_names, Context & modified_context, size_t streams_num, bool has_table_virtual_column, bool concat_streams) { SelectQueryInfo modified_query_info = query_info; modified_query_info.query = query_info.query->clone(); VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", storage ? storage->getTableName() : ""); if (!storage) return BlockInputStreams{ InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared<OneBlockInputStream>(header), processed_stage, true).execute().in}; BlockInputStreams source_streams; if (processed_stage <= storage->getQueryProcessingStage(modified_context)) { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.size() ==0) real_column_names.push_back(ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical())); source_streams = storage->read(real_column_names, modified_query_info, modified_context, processed_stage, max_block_size, UInt32(streams_num)); } else if (processed_stage > storage->getQueryProcessingStage(modified_context)) { typeid_cast<ASTSelectQuery *>(modified_query_info.query.get())->replaceDatabaseAndTable(source_database, storage->getTableName()); /// Maximum permissible parallelism is streams_num modified_context.getSettingsRef().max_threads = UInt64(streams_num); modified_context.getSettingsRef().max_streams_to_max_threads_ratio = 1; InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, Names{}, processed_stage}; BlockInputStreamPtr interpreter_stream = interpreter.execute().in; /** Materialization is needed, since from distributed storage the constants come materialized. * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same. */ source_streams.emplace_back(std::make_shared<MaterializingBlockInputStream>(interpreter_stream)); } if (!source_streams.empty()) { if (concat_streams) { BlockInputStreamPtr stream = source_streams.size() > 1 ? std::make_shared<ConcatBlockInputStream>(source_streams) : source_streams[0]; source_streams.resize(1); source_streams[0] = stream; } for (BlockInputStreamPtr & source_stream : source_streams) { if (has_table_virtual_column) source_stream = std::make_shared<AddingConstColumnBlockInputStream<String>>( source_stream, std::make_shared<DataTypeString>(), storage->getTableName(), "_table"); /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. convertingSourceStream(header, modified_context, modified_query_info.query, source_stream, processed_stage); source_stream->addTableLock(struct_lock); } } return source_streams; }