Paving Paving::projection(const Names &names) { Paving result; NamedBox varbox; // varbox for result, varbox_ for this paving Tuple varlist; // list of variables to be retained varlist.resize(names.size()); result.set_type(type_); // push the relevant names and intervals for (nat i = 0; i < names.size(); ++i) { nat v = varbox_.var(names[i]); if (v < varbox_.size()) { varbox.push(names[i], varbox_.val(v)); varlist[i] = v; } else { std::ostringstream os; os << "Kodiak (projection): name \"" << names[i] << "\" doesn't exist in the paving."; throw Growl(os.str()); } } result.set_varbox(varbox); // add boxes Box x; x.resize(names.size()); if (boxes_.size() == 0) return result; for (nat i = 0; i < boxes_.size(); ++i) { // iterate over types if (boxes_[i].size() == 0) continue; // add the first box for (nat k = 0; k < names.size(); ++k) x[k] = boxes_[i][0][varlist[k]]; result.push_box(i, x); if (boxes_[i].size() == 1) continue; // add remaining boxes where necessary for (nat j = 1; j < boxes_[i].size(); ++j) { for (nat k = 0; k < names.size(); ++k) x[k] = boxes_[i][j][varlist[k]]; nat subset = 0; // is the jth box a subset of any box already // pushed into the result paving? for (nat jj = 0; jj < result.boxes(i).size(); ++jj) { if (box_subset(result.boxes(i)[jj], x)) { subset = 1; break; } } if (subset == 0) result.push_box(i, x); } } for (nat i = 0; i < boxes_.size(); ++i) { // iterate over types encluster(result.boxes(i)); } result.set_type(type_); return result; }
void Path::set( size_t begin, size_t end, const Names &names ) { if( begin > m_names.size() ) { throw IECore::Exception( "Index out of range" ); } if( end > m_names.size() ) { throw IECore::Exception( "Index out of range" ); } Names::difference_type sizeDifference = names.size() - (end - begin); if( sizeDifference == 0 ) { if( equal( m_names.begin() + begin, m_names.begin() + end, names.begin() ) ) { return; } } else if( sizeDifference > 0 ) { m_names.resize( m_names.size() + sizeDifference ); std::copy_backward( m_names.begin() + end, m_names.begin() + end + sizeDifference, m_names.end() ); } else { std::copy( m_names.begin() + end, m_names.end(), m_names.begin() + end + sizeDifference ); m_names.resize( m_names.size() + sizeDifference ); } std::copy( names.begin(), names.end(), m_names.begin() + begin ); emitPathChanged(); }
void Paving::save(const std::string filename, const Names &titles, const Names &names) const { if (empty()) return; Tuple vs; for (nat v = 0; v < names.size(); ++v) { nat n = varbox_.var(names[v]); if (n < nvars()) vs.push_back(n); } std::ostringstream os; os << filename; for (nat i = 0; i < vs.size(); ++i) os << "_" << varbox_.name(vs[i]); os << ".dat"; if (vs.empty()) { vs.resize(varbox_.size()); for (nat v = 0; v < varbox_.size(); ++v) vs[v] = v; } std::ofstream f; f.open(os.str().c_str(), std::ofstream::out); f << "## File: " << os.str() << std::endl; f << "## Type: " << type_ << std::endl; f << "## Vars:" << std::endl; nat width = 2 * Kodiak::precision(); for (nat i = 0; i < vs.size(); ++i) f << std::setw(width) << varbox_.name(vs[i]); f << std::endl; for (nat i = 0; i < vs.size(); ++i) f << std::setw(width) << varbox_.box()[vs[i]].inf(); f << std::endl; for (nat i = 0; i < vs.size(); ++i) f << std::setw(width) << varbox_.box()[vs[i]].sup(); f << std::endl; f << std::endl; for (nat i = 0; i < titles.size(); ++i) { f << "## " << titles[i] << ": " << size(i) << " boxes " << std::endl; if (i < boxes_.size() && boxes_[i].size() > 0) save_boxes(f, boxes_[i], vs, width); else f << std::endl; } f.close(); std::cout << "Kodiak (save): Boxes were saved in file " << os.str() << std::endl; }
NameSet MergeTreeReadPool::injectRequiredColumns(const MergeTreeData::DataPartPtr & part, Names & columns) const { NameSet required_columns{std::begin(columns), std::end(columns)}; NameSet injected_columns; auto all_column_files_missing = true; for (size_t i = 0; i < columns.size(); ++i) { const auto & column_name = columns[i]; /// column has files and hence does not require evaluation if (part->hasColumnFiles(column_name)) { all_column_files_missing = false; continue; } const auto default_it = data.column_defaults.find(column_name); /// columns has no explicit default expression if (default_it == std::end(data.column_defaults)) continue; /// collect identifiers required for evaluation IdentifierNameSet identifiers; default_it->second.expression->collectIdentifierNames(identifiers); for (const auto & identifier : identifiers) { if (data.hasColumn(identifier)) { /// ensure each column is added only once if (required_columns.count(identifier) == 0) { columns.emplace_back(identifier); required_columns.emplace(identifier); injected_columns.emplace(identifier); } } } } /** Добавить столбец минимального размера. * Используется в случае, когда ни один столбец не нужен или файлы отсутствуют, но нужно хотя бы знать количество строк. * Добавляет в columns. */ if (all_column_files_missing) { const auto minimum_size_column_name = part->getColumnNameWithMinumumCompressedSize(); columns.push_back(minimum_size_column_name); /// correctly report added column injected_columns.insert(columns.back()); } return injected_columns; }
NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, size_t max_block_size_) : parent(parent_), max_block_size(max_block_size_) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. */ size_t num_keys = key_names_left.size(); size_t num_columns_left = left_sample_block.columns() - num_keys; size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); result_sample_block = materializeBlock(left_sample_block); /// Add columns from the right-side table to the block. for (size_t i = 0; i < num_columns_right; ++i) { const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.getByPosition(i); result_sample_block.insert(src_column.cloneEmpty()); } column_indices_left.reserve(num_columns_left); column_indices_keys_and_right.reserve(num_keys + num_columns_right); std::vector<bool> is_key_column_in_left_block(num_keys + num_columns_left, false); for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); is_key_column_in_left_block[key_pos] = true; /// Here we establish the mapping between key columns of the left- and right-side tables. /// key_pos index is inserted in the position corresponding to key column in parent.blocks /// (saved blocks of the right-side table) and points to the same key column /// in the left_sample_block and thus in the result_sample_block. column_indices_keys_and_right.push_back(key_pos); } for (size_t i = 0; i < num_keys + num_columns_left; ++i) { if (!is_key_column_in_left_block[i]) column_indices_left.push_back(i); } for (size_t i = 0; i < num_columns_right; ++i) column_indices_keys_and_right.push_back(num_keys + num_columns_left + i); /// If use_nulls, convert left columns to Nullable. if (parent.use_nulls) { for (size_t i = 0; i < num_columns_left; ++i) { convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i])); } } columns_left.resize(num_columns_left); columns_keys_and_right.resize(num_keys + num_columns_right); }
Names Macros::expand(const Names & source_names, size_t level) const { Names result_names; result_names.reserve(source_names.size()); for (const String & name : source_names) result_names.push_back(expand(name, level)); return result_names; }
void Join::checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right) const { size_t keys_size = key_names_left.size(); for (size_t i = 0; i < keys_size; ++i) { /// Compare up to Nullability. DataTypePtr left_type = removeNullable(recursiveRemoveLowCardinality(block_left.getByName(key_names_left[i]).type)); DataTypePtr right_type = removeNullable(recursiveRemoveLowCardinality(block_right.getByName(key_names_right[i]).type)); if (!left_type->equals(*right_type)) throw Exception("Type mismatch of columns to JOIN by: " + key_names_left[i] + " " + left_type->getName() + " at left, " + key_names_right[i] + " " + right_type->getName() + " at right", ErrorCodes::TYPE_MISMATCH); } }
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, const Names & required_result_column_names, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, bool only_analyze, bool modify_inplace) : query_ptr(query_ptr_), context(context_), to_stage(to_stage_), subquery_depth(subquery_depth_) { const ASTSelectWithUnionQuery & ast = typeid_cast<const ASTSelectWithUnionQuery &>(*query_ptr); size_t num_selects = ast.list_of_selects->children.size(); if (!num_selects) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); /// Initialize interpreters for each SELECT query. /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, /// because names could be different. nested_interpreters.reserve(num_selects); std::vector<Names> required_result_column_names_for_other_selects(num_selects); if (!required_result_column_names.empty() && num_selects > 1) { /// Result header if there are no filtering by 'required_result_column_names'. /// We use it to determine positions of 'required_result_column_names' in SELECT clause. Block full_result_header = InterpreterSelectQuery( ast.list_of_selects->children.at(0), context, Names(), to_stage, subquery_depth, true).getSampleBlock(); std::vector<size_t> positions_of_required_result_columns(required_result_column_names.size()); for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) positions_of_required_result_columns[required_result_num] = full_result_header.getPositionByName(required_result_column_names[required_result_num]); for (size_t query_num = 1; query_num < num_selects; ++query_num) { Block full_result_header_for_current_select = InterpreterSelectQuery( ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth, true).getSampleBlock(); if (full_result_header_for_current_select.columns() != full_result_header.columns()) throw Exception("Different number of columns in UNION ALL elements:\n" + full_result_header.dumpNames() + "\nand\n" + full_result_header_for_current_select.dumpNames() + "\n", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size()); for (const auto & pos : positions_of_required_result_columns) required_result_column_names_for_other_selects[query_num].push_back(full_result_header_for_current_select.getByPosition(pos).name); } } for (size_t query_num = 0; query_num < num_selects; ++query_num) { const Names & current_required_result_column_names = query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num]; nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>( ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze, modify_inplace)); } /// Determine structure of the result. if (num_selects == 1) { result_header = nested_interpreters.front()->getSampleBlock(); } else { Blocks headers(num_selects); for (size_t query_num = 0; query_num < num_selects; ++query_num) headers[query_num] = nested_interpreters[query_num]->getSampleBlock(); result_header = headers.front(); size_t num_columns = result_header.columns(); for (size_t query_num = 1; query_num < num_selects; ++query_num) if (headers[query_num].columns() != num_columns) throw Exception("Different number of columns in UNION ALL elements:\n" + result_header.dumpNames() + "\nand\n" + headers[query_num].dumpNames() + "\n", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); for (size_t column_num = 0; column_num < num_columns; ++column_num) { ColumnWithTypeAndName & result_elem = result_header.getByPosition(column_num); /// Determine common type. DataTypes types(num_selects); for (size_t query_num = 0; query_num < num_selects; ++query_num) types[query_num] = headers[query_num].getByPosition(column_num).type; result_elem.type = getLeastSupertype(types); /// If there are different constness or different values of constants, the result must be non-constant. if (result_elem.column->isColumnConst()) { bool need_materialize = false; for (size_t query_num = 1; query_num < num_selects; ++query_num) { const ColumnWithTypeAndName & source_elem = headers[query_num].getByPosition(column_num); if (!source_elem.column->isColumnConst() || (static_cast<const ColumnConst &>(*result_elem.column).getField() != static_cast<const ColumnConst &>(*source_elem.column).getField())) { need_materialize = true; break; } } if (need_materialize) result_elem.column = result_elem.type->createColumn(); } /// BTW, result column names are from first SELECT. } } }
int main(int argc, char ** argv) { using namespace DB; try { if (argc < 2) { std::cerr << "at least 1 argument expected" << std::endl; return 1; } Context context; NamesAndTypesList columns; for (int i = 2; i + 1 < argc; i += 2) { NameAndTypePair col; col.name = argv[i]; col.type = DataTypeFactory::instance().get(argv[i + 1]); columns.push_back(col); } ASTPtr root; ParserPtr parsers[] = {std::make_unique<ParserSelectQuery>(), std::make_unique<ParserExpressionList>(false)}; for (size_t i = 0; i < sizeof(parsers)/sizeof(parsers[0]); ++i) { IParser & parser = *parsers[i]; const char * pos = argv[1]; const char * end = argv[1] + strlen(argv[1]); const char * max_parsed_pos = pos; Expected expected = ""; if (parser.parse(pos, end, root, max_parsed_pos, expected)) break; else root = nullptr; } if (!root) { std::cerr << "invalid expression (should be select query or expression list)" << std::endl; return 2; } formatAST(*root, std::cout); std::cout << std::endl; ExpressionAnalyzer analyzer(root, context, {}, columns); Names required = analyzer.getRequiredColumns(); std::cout << "required columns:\n"; for (size_t i = 0; i < required.size(); ++i) { std::cout << required[i] << "\n"; } std::cout << "\n"; std::cout << "only consts:\n\n" << analyzer.getConstActions()->dumpActions() << "\n"; if (analyzer.hasAggregation()) { Names key_names; AggregateDescriptions aggregates; analyzer.getAggregateInfo(key_names, aggregates); std::cout << "keys:\n"; for (size_t i = 0; i < key_names.size(); ++i) std::cout << key_names[i] << "\n"; std::cout << "\n"; std::cout << "aggregates:\n"; for (size_t i = 0; i < aggregates.size(); ++i) { AggregateDescription desc = aggregates[i]; std::cout << desc.column_name << " = " << desc.function->getName() << " ( "; for (size_t j = 0; j < desc.argument_names.size(); ++j) std::cout << desc.argument_names[j] << " "; std::cout << ")\n"; } std::cout << "\n"; ExpressionActionsChain before; if (analyzer.appendWhere(before, false)) before.addStep(); analyzer.appendAggregateFunctionsArguments(before, false); analyzer.appendGroupBy(before, false); before.finalize(); ExpressionActionsChain after; if (analyzer.appendHaving(after, false)) after.addStep(); analyzer.appendSelect(after, false); analyzer.appendOrderBy(after, false); after.addStep(); analyzer.appendProjectResult(after, false); after.finalize(); std::cout << "before aggregation:\n\n"; for (size_t i = 0; i < before.steps.size(); ++i) { std::cout << before.steps[i].actions->dumpActions(); std::cout << std::endl; } std::cout << "\nafter aggregation:\n\n"; for (size_t i = 0; i < after.steps.size(); ++i) { std::cout << after.steps[i].actions->dumpActions(); std::cout << std::endl; } } else { if (typeid_cast<ASTSelectQuery *>(&*root)) { ExpressionActionsChain chain; if (analyzer.appendWhere(chain, false)) chain.addStep(); analyzer.appendSelect(chain, false); analyzer.appendOrderBy(chain, false); chain.addStep(); analyzer.appendProjectResult(chain, false); chain.finalize(); for (size_t i = 0; i < chain.steps.size(); ++i) { std::cout << chain.steps[i].actions->dumpActions(); std::cout << std::endl; } } else { std::cout << "unprojected actions:\n\n" << analyzer.getActions(false)->dumpActions() << "\n"; std::cout << "projected actions:\n\n" << analyzer.getActions(true)->dumpActions() << "\n"; } } } catch (Exception & e) { std::cerr << "Exception " << e.what() << ": " << e.displayText() << "\n" << e.getStackTrace().toString(); return 3; } return 0; }
BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, const size_t max_block_size, const Block & header, const StoragePtr & storage, const TableStructureReadLockPtr & struct_lock, Names & real_column_names, Context & modified_context, size_t streams_num, bool has_table_virtual_column, bool concat_streams) { SelectQueryInfo modified_query_info = query_info; modified_query_info.query = query_info.query->clone(); VirtualColumnUtils::rewriteEntityInAst(modified_query_info.query, "_table", storage ? storage->getTableName() : ""); if (!storage) return BlockInputStreams{ InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared<OneBlockInputStream>(header), processed_stage, true).execute().in}; BlockInputStreams source_streams; if (processed_stage <= storage->getQueryProcessingStage(modified_context)) { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.size() ==0) real_column_names.push_back(ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical())); source_streams = storage->read(real_column_names, modified_query_info, modified_context, processed_stage, max_block_size, UInt32(streams_num)); } else if (processed_stage > storage->getQueryProcessingStage(modified_context)) { typeid_cast<ASTSelectQuery *>(modified_query_info.query.get())->replaceDatabaseAndTable(source_database, storage->getTableName()); /// Maximum permissible parallelism is streams_num modified_context.getSettingsRef().max_threads = UInt64(streams_num); modified_context.getSettingsRef().max_streams_to_max_threads_ratio = 1; InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, Names{}, processed_stage}; BlockInputStreamPtr interpreter_stream = interpreter.execute().in; /** Materialization is needed, since from distributed storage the constants come materialized. * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same. */ source_streams.emplace_back(std::make_shared<MaterializingBlockInputStream>(interpreter_stream)); } if (!source_streams.empty()) { if (concat_streams) { BlockInputStreamPtr stream = source_streams.size() > 1 ? std::make_shared<ConcatBlockInputStream>(source_streams) : source_streams[0]; source_streams.resize(1); source_streams[0] = stream; } for (BlockInputStreamPtr & source_stream : source_streams) { if (has_table_virtual_column) source_stream = std::make_shared<AddingConstColumnBlockInputStream<String>>( source_stream, std::make_shared<DataTypeString>(), storage->getTableName(), "_table"); /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. convertingSourceStream(header, modified_context, modified_query_info.query, source_stream, processed_stage); source_stream->addTableLock(struct_lock); } } return source_streams; }
BlockInputStreams StorageMerge::read( const Names & column_names, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, const unsigned num_streams) { BlockInputStreams res; bool has_table_virtual_column = false; Names real_column_names; real_column_names.reserve(column_names.size()); for (const auto & column_name : column_names) { if (column_name == "_table") has_table_virtual_column = true; else real_column_names.push_back(column_name); } /** Just in case, turn off optimization "transfer to PREWHERE", * since there is no certainty that it works when one of table is MergeTree and other is not. */ Context modified_context = context; modified_context.getSettingsRef().optimize_move_to_prewhere = false; /// What will be result structure depending on query processed stage in source tables? Block header = getQueryHeader(column_names, query_info, context, processed_stage); /** First we make list of selected tables to find out its size. * This is necessary to correctly pass the recommended number of threads to each table. */ StorageListWithLocks selected_tables = getSelectedTables(query_info.query, has_table_virtual_column, true); if (selected_tables.empty()) return createSourceStreams( query_info, processed_stage, max_block_size, header, {}, {}, real_column_names, modified_context, 0, has_table_virtual_column); size_t remaining_streams = num_streams; size_t tables_count = selected_tables.size(); for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it) { size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count); size_t current_streams = std::min(current_need_streams, remaining_streams); remaining_streams -= current_streams; current_streams = std::max(size_t(1), current_streams); StoragePtr storage = it->first; TableStructureReadLockPtr struct_lock = it->second; BlockInputStreams source_streams; if (current_streams) { source_streams = createSourceStreams( query_info, processed_stage, max_block_size, header, storage, struct_lock, real_column_names, modified_context, current_streams, has_table_virtual_column); } else { source_streams.emplace_back(std::make_shared<LazyBlockInputStream>( header, [=]() mutable -> BlockInputStreamPtr { BlockInputStreams streams = createSourceStreams(query_info, processed_stage, max_block_size, header, storage, struct_lock, real_column_names, modified_context, current_streams, has_table_virtual_column, true); if (!streams.empty() && streams.size() != 1) throw Exception("LogicalError: the lazy stream size must to be one or empty.", ErrorCodes::LOGICAL_ERROR); return streams.empty() ? std::make_shared<NullBlockInputStream>(header) : streams[0]; })); } res.insert(res.end(), source_streams.begin(), source_streams.end()); } if (res.empty()) return res; res = narrowBlockInputStreams(res, num_streams); return res; }
void Join::joinBlockImpl( Block & block, const Names & key_names_left, const NameSet & needed_key_names_right, const Block & block_with_columns_to_add, const Maps & maps_) const { size_t keys_size = key_names_left.size(); ColumnRawPtrs key_columns(keys_size); /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; materialized_columns.reserve(keys_size); /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst())); key_columns[i] = materialized_columns.back().get(); } /// Keys with NULL value in any column won't join to anything. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); size_t existing_columns = block.columns(); /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. * Because if they are constants, then in the "not joined" rows, they may have different values * - default values, which can differ from the values of these constants. */ if (getFullness(kind)) { for (size_t i = 0; i < existing_columns; ++i) { block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); /// If use_nulls, convert left columns (except keys) to Nullable. if (use_nulls) { if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name)) convertColumnToNullable(block.getByPosition(i)); } } } /** For LEFT/INNER JOIN, the saved blocks do not contain keys. * For FULL/RIGHT JOIN, the saved blocks contain keys; * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. */ size_t num_columns_to_skip = 0; if (getFullness(kind)) num_columns_to_skip = keys_size; /// Add new columns to the block. size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); MutableColumns added_columns; added_columns.reserve(num_columns_to_add); std::vector<std::pair<decltype(ColumnWithTypeAndName::type), decltype(ColumnWithTypeAndName::name)>> added_type_name; added_type_name.reserve(num_columns_to_add); std::vector<size_t> right_indexes; right_indexes.reserve(num_columns_to_add); for (size_t i = 0; i < num_columns_to_add; ++i) { const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); /// Don't insert column if it's in left block or not explicitly required. if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) { added_columns.push_back(src_column.column->cloneEmpty()); added_columns.back()->reserve(src_column.column->size()); added_type_name.emplace_back(src_column.type, src_column.name); right_indexes.push_back(num_columns_to_skip + i); } } size_t rows = block.rows(); std::unique_ptr<IColumn::Filter> filter; bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; filter = std::make_unique<IColumn::Filter>(rows); /// Used with ALL ... JOIN IColumn::Offset current_offset = 0; std::unique_ptr<IColumn::Offsets> offsets_to_replicate; if (strictness == ASTTableJoin::Strictness::All) offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows); switch (type) { #define M(TYPE) \ case Join::Type::TYPE: \ joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE>::Type>(\ *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \ filter, current_offset, offsets_to_replicate, right_indexes); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M default: throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } const auto added_columns_size = added_columns.size(); for (size_t i = 0; i < added_columns_size; ++i) block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. if (filter_left_keys) for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); ColumnUInt64::Ptr mapping; /// Add join key columns from right block if they has different name. for (size_t i = 0; i < key_names_right.size(); ++i) { auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; if (needed_key_names_right.count(right_name) && !block.has(right_name)) { const auto & col = block.getByName(left_name); auto column = col.column; if (!filter_left_keys) { if (!mapping) { auto mut_mapping = ColumnUInt64::create(column->size()); auto & data = mut_mapping->getData(); size_t size = column->size(); for (size_t j = 0; j < size; ++j) data[j] = (*filter)[j] ? j : size; mapping = std::move(mut_mapping); } auto mut_column = (*std::move(column)).mutate(); mut_column->insertDefault(); column = mut_column->index(*mapping, 0); } block.insert({column, col.type, right_name}); } } /// If ALL ... JOIN - we replicate all the columns except the new ones. if (offsets_to_replicate) { for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } }