static ColumnPtr getFilteredDatabases(const ASTPtr & query, const Context & context) { MutableColumnPtr column = ColumnString::create(); for (const auto & db : context.getDatabases()) column->insert(db.first); Block block { ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "database") }; VirtualColumnUtils::filterBlockWithQuery(query, block, context); return block.getByPosition(0).column; }
Block TinyLogBlockInputStream::readImpl() { Block res; if (finished || (!streams.empty() && streams.begin()->second->compressed.eof())) { /** Close the files (before destroying the object). * When many sources are created, but simultaneously reading only a few of them, * buffers don't waste memory. */ finished = true; streams.clear(); return res; } { /// if there are no files in the folder, it means that the table is empty if (Poco::DirectoryIterator(storage.full_path()) == Poco::DirectoryIterator()) return res; } for (const auto & name_type : columns) { MutableColumnPtr column = name_type.type->createColumn(); try { readData(name_type.name, *name_type.type, *column, block_size); } catch (Exception & e) { e.addMessage("while reading column " + name_type.name + " at " + storage.full_path()); throw; } if (column->size()) res.insert(ColumnWithTypeAndName(std::move(column), name_type.type, name_type.name)); } if (!res || streams.begin()->second->compressed.eof()) { finished = true; streams.clear(); } return Nested::flatten(res); }
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr & query, bool has_virtual_column, bool get_lock) const { StorageListWithLocks selected_tables; DatabasePtr database = global_context.getDatabase(source_database); DatabaseIteratorPtr iterator = database->getIterator(global_context); auto virtual_column = ColumnString::create(); while (iterator->isValid()) { if (table_name_regexp.match(iterator->name())) { StoragePtr storage = iterator->table(); if (query && typeid_cast<ASTSelectQuery *>(query.get())->prewhere_expression && !storage->supportsPrewhere()) throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE); if (storage.get() != this) { virtual_column->insert(storage->getTableName()); selected_tables.emplace_back(storage, get_lock ? storage->lockStructure(false) : TableStructureReadLockPtr{}); } } iterator->next(); } if (has_virtual_column) { Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared<DataTypeString>(), "_table")}; VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, global_context); auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table"); /// Remove unused tables from the list selected_tables.remove_if([&] (const auto & elem) { return values.find(elem.first->getTableName()) == values.end(); }); } return selected_tables; }
void StorageCatBoostPool::createSampleBlockAndColumns() { ColumnsDescription columns; NamesAndTypesList cat_columns; NamesAndTypesList num_columns; sample_block.clear(); for (auto & desc : columns_description) { DataTypePtr type; if (desc.column_type == DatasetColumnType::Categ || desc.column_type == DatasetColumnType::Auxiliary || desc.column_type == DatasetColumnType::DocId) type = std::make_shared<DataTypeString>(); else type = std::make_shared<DataTypeFloat64>(); if (desc.column_type == DatasetColumnType::Categ) cat_columns.emplace_back(desc.column_name, type); else if (desc.column_type == DatasetColumnType::Num) num_columns.emplace_back(desc.column_name, type); else columns.materialized.emplace_back(desc.column_name, type); if (!desc.alias.empty()) { auto alias = std::make_shared<ASTIdentifier>(desc.column_name); columns.defaults[desc.alias] = {ColumnDefaultKind::Alias, alias}; columns.aliases.emplace_back(desc.alias, type); } sample_block.insert(ColumnWithTypeAndName(type, desc.column_name)); } columns.ordinary.insert(columns.ordinary.end(), num_columns.begin(), num_columns.end()); columns.ordinary.insert(columns.ordinary.end(), cat_columns.begin(), cat_columns.end()); setColumns(columns); }
static bool isInjectiveFunction( const ASTFunction * ast_function, const TypeAndConstantInference::ExpressionInfo & function_info, const TypeAndConstantInference::Info & all_info) { if (!function_info.function) return false; Block block_with_constants; const ASTs & children = ast_function->arguments->children; for (const auto & child : children) { String child_name = child->getColumnName(); const TypeAndConstantInference::ExpressionInfo & child_info = all_info.at(child_name); block_with_constants.insert(ColumnWithTypeAndName( child_info.is_constant_expression ? child_info.data_type->createConstColumn(1, child_info.value) : nullptr, child_info.data_type, child_name)); } return function_info.function->isInjective(block_with_constants); }
BlockInputStreams StorageSystemColumns::read( const Names & column_names, ASTPtr query, const Context & context, const Settings & settings, QueryProcessingStage::Enum & processed_stage, const size_t max_block_size, const unsigned threads) { check(column_names); processed_stage = QueryProcessingStage::FetchColumns; Block block; std::map<std::pair<std::string, std::string>, StoragePtr> storages; { Databases databases = context.getDatabases(); /// Добавляем столбец database. ColumnPtr database_column = std::make_shared<ColumnString>(); for (const auto & database : databases) database_column->insert(database.first); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); /// Отфильтруем блок со столбцом database. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); database_column = block.getByName("database").column; size_t rows = database_column->size(); /// Добавляем столбец table. ColumnPtr table_column = std::make_shared<ColumnString>(); IColumn::Offsets_t offsets(rows); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*database_column)[i].get<std::string>(); const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; for (auto iterator = database->getIterator(); iterator->isValid(); iterator->next()) { const String & table_name = iterator->name(); storages.emplace(std::piecewise_construct, std::forward_as_tuple(database_name, table_name), std::forward_as_tuple(iterator->table())); table_column->insert(table_name); offsets[i] += 1; } } for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.getByPosition(i).column; column = column->replicate(offsets); } block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); } /// Отфильтруем блок со столбцами database и table. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); ColumnPtr filtered_database_column = block.getByName("database").column; ColumnPtr filtered_table_column = block.getByName("table").column; /// Составляем результат. ColumnPtr database_column = std::make_shared<ColumnString>(); ColumnPtr table_column = std::make_shared<ColumnString>(); ColumnPtr name_column = std::make_shared<ColumnString>(); ColumnPtr type_column = std::make_shared<ColumnString>(); ColumnPtr default_type_column = std::make_shared<ColumnString>(); ColumnPtr default_expression_column = std::make_shared<ColumnString>(); ColumnPtr bytes_column = std::make_shared<ColumnUInt64>(); size_t rows = filtered_database_column->size(); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*filtered_database_column)[i].get<std::string>(); const std::string table_name = (*filtered_table_column)[i].get<std::string>(); NamesAndTypesList columns; ColumnDefaults column_defaults; std::unordered_map<String, size_t> column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); IStorage::TableStructureReadLockPtr table_lock; try { table_lock = storage->lockStructure(false); } catch (const Exception & e) { /** There are case when IStorage::drop was called, * but we still own the object. * Then table will throw exception at attempt to lock it. * Just skip the table. */ if (e.code() == ErrorCodes::TABLE_IS_DROPPED) continue; else throw; } columns = storage->getColumnsList(); columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); column_defaults = storage->column_defaults; /** Данные о размерах столбцов для таблиц семейства MergeTree. * NOTE: В дальнейшем можно сделать интерфейс, позволяющий получить размеры столбцов у IStorage. */ if (auto storage_concrete = dynamic_cast<StorageMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); } else if (auto storage_concrete = dynamic_cast<StorageReplicatedMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); auto unreplicated_data = storage_concrete->getUnreplicatedData(); if (unreplicated_data) { auto unreplicated_column_sizes = unreplicated_data->getColumnSizes(); for (const auto & name_size : unreplicated_column_sizes) column_sizes[name_size.first] += name_size.second; } } } for (const auto & column : columns) { database_column->insert(database_name); table_column->insert(table_name); name_column->insert(column.name); type_column->insert(column.type->getName()); { const auto it = column_defaults.find(column.name); if (it == std::end(column_defaults)) { default_type_column->insertDefault(); default_expression_column->insertDefault(); } else { default_type_column->insert(toString(it->second.type)); default_expression_column->insert(queryToString(it->second.expression)); } } { const auto it = column_sizes.find(column.name); if (it == std::end(column_sizes)) bytes_column->insertDefault(); else bytes_column->insert(it->second); } } } block.clear(); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); block.insert(ColumnWithTypeAndName(name_column, std::make_shared<DataTypeString>(), "name")); block.insert(ColumnWithTypeAndName(type_column, std::make_shared<DataTypeString>(), "type")); block.insert(ColumnWithTypeAndName(default_type_column, std::make_shared<DataTypeString>(), "default_type")); block.insert(ColumnWithTypeAndName(default_expression_column, std::make_shared<DataTypeString>(), "default_expression")); block.insert(ColumnWithTypeAndName(bytes_column, std::make_shared<DataTypeUInt64>(), "bytes")); return BlockInputStreams{ 1, std::make_shared<OneBlockInputStream>(block) }; }
Block addMissingDefaults(const Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults, const Context & context) { /// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths. /// First, remember the offset columns for all arrays in the block. std::map<String, ColumnPtr> offset_columns; for (size_t i = 0, size = block.columns(); i < size; ++i) { const auto & elem = block.getByPosition(i); if (const ColumnArray * array = typeid_cast<const ColumnArray *>(&*elem.column)) { String offsets_name = Nested::extractTableName(elem.name); auto & offsets_column = offset_columns[offsets_name]; /// If for some reason there are different offset columns for one nested structure, then we take nonempty. if (!offsets_column || offsets_column->empty()) offsets_column = array->getOffsetsPtr(); } } const size_t rows = block.rows(); Block res; /// We take given columns from input block and missed columns without default value /// (default and materialized will be computed later). for (const auto & column : required_columns) { if (block.has(column.name)) { res.insert(block.getByName(column.name)); continue; } if (column_defaults.count(column.name)) continue; String offsets_name = Nested::extractTableName(column.name); if (offset_columns.count(offsets_name)) { ColumnPtr offsets_column = offset_columns[offsets_name]; DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column.type).getNestedType(); UInt64 nested_rows = rows ? get<UInt64>((*offsets_column)[rows - 1]) : 0; ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst(); auto new_column = ColumnArray::create(nested_column, offsets_column); res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name)); continue; } /** It is necessary to turn a constant column into a full column, since in part of blocks (from other parts), * it can be full (or the interpreter may decide that it is constant everywhere). */ auto new_column = column.type->createColumnConstWithDefaultValue(rows)->convertToFullColumnIfConst(); res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name)); } /// Computes explicitly specified values (in column_defaults) by default and materialized columns. evaluateMissingDefaults(res, required_columns, column_defaults, context); return res; }
void Join::joinBlockImpl(Block & block, const Maps & maps) const { size_t keys_size = key_names_left.size(); ColumnRawPtrs key_columns(keys_size); /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { key_columns[i] = block.getByName(key_names_left[i]).column.get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { materialized_columns.emplace_back(converted); key_columns[i] = materialized_columns.back().get(); } } /// Keys with NULL value in any column won't join to anything. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); size_t existing_columns = block.columns(); /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. * Because if they are constants, then in the "not joined" rows, they may have different values * - default values, which can differ from the values of these constants. */ if (getFullness(kind)) { for (size_t i = 0; i < existing_columns; ++i) { auto & col = block.getByPosition(i).column; if (ColumnPtr converted = col->convertToFullColumnIfConst()) col = converted; /// If use_nulls, convert left columns (except keys) to Nullable. if (use_nulls) { if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name)) convertColumnToNullable(block.getByPosition(i)); } } } /** For LEFT/INNER JOIN, the saved blocks do not contain keys. * For FULL/RIGHT JOIN, the saved blocks contain keys; * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. */ size_t num_columns_to_skip = 0; if (getFullness(kind)) num_columns_to_skip = keys_size; /// Add new columns to the block. size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); MutableColumns added_columns; added_columns.reserve(num_columns_to_add); std::vector<std::pair<decltype(ColumnWithTypeAndName::type), decltype(ColumnWithTypeAndName::name)>> added_type_name; added_type_name.reserve(num_columns_to_add); std::vector<size_t> right_indexes; right_indexes.reserve(num_columns_to_add); for (size_t i = 0; i < num_columns_to_add; ++i) { const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); /// Don't insert column if it's in left block. if (!block.has(src_column.name)) { added_columns.push_back(src_column.column->cloneEmpty()); added_columns.back()->reserve(src_column.column->size()); added_type_name.emplace_back(src_column.type, src_column.name); right_indexes.push_back(num_columns_to_skip + i); } } size_t rows = block.rows(); /// Used with ANY INNER JOIN std::unique_ptr<IColumn::Filter> filter; bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; filter = std::make_unique<IColumn::Filter>(rows); /// Used with ALL ... JOIN IColumn::Offset current_offset = 0; std::unique_ptr<IColumn::Offsets> offsets_to_replicate; if (strictness == ASTTableJoin::Strictness::All) offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows); switch (type) { #define M(TYPE) \ case Join::Type::TYPE: \ joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE>::Type>(\ *maps.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \ filter, current_offset, offsets_to_replicate, right_indexes); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M default: throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } const auto added_columns_size = added_columns.size(); for (size_t i = 0; i < added_columns_size; ++i) block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. if (filter_left_keys) for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); ColumnUInt64::Ptr mapping; /// Add join key columns from right block if they has different name. for (size_t i = 0; i < key_names_right.size(); ++i) { auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; if (needed_key_names_right.count(right_name) && !block.has(right_name)) { const auto & col = block.getByName(left_name); auto column = col.column; if (!filter_left_keys) { if (!mapping) { auto mut_mapping = ColumnUInt64::create(column->size()); auto & data = mut_mapping->getData(); size_t size = column->size(); for (size_t j = 0; j < size; ++j) data[j] = (*filter)[j] ? j : size; mapping = std::move(mut_mapping); } auto mut_column = (*std::move(column)).mutate(); mut_column->insertDefault(); column = mut_column->index(*mapping, 0); } block.insert({column, col.type, right_name}); } } /// If ALL ... JOIN - we replicate all the columns except the new ones. if (offsets_to_replicate) { for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } }
size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res) { size_t read_rows = 0; try { /// Pointers to offset columns that are common to the nested data structure columns. /// If append is true, then the value will be equal to nullptr and will be used only to /// check that the offsets column has been already read. OffsetColumns offset_columns; for (const NameAndTypePair & it : columns) { /// The column is already present in the block so we will append the values to the end. bool append = res.has(it.name); if (!append) res.insert(ColumnWithTypeAndName(it.type->createColumn(), it.type, it.name)); /// To keep offsets shared. TODO Very dangerous. Get rid of this. MutableColumnPtr column = res.getByName(it.name).column->assumeMutable(); bool read_offsets = true; /// For nested data structures collect pointers to offset columns. if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(it.type.get())) { String name = Nested::extractTableName(it.name); auto it_inserted = offset_columns.emplace(name, nullptr); /// offsets have already been read on the previous iteration and we don't need to read it again if (!it_inserted.second) read_offsets = false; /// need to create new offsets if (it_inserted.second && !append) it_inserted.first->second = ColumnArray::ColumnOffsets::create(); /// share offsets in all elements of nested structure if (!append) column = ColumnArray::create(type_arr->getNestedType()->createColumn(), it_inserted.first->second); } try { size_t column_size_before_reading = column->size(); readData(it.name, *it.type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets); /// For elements of Nested, column_size_before_reading may be greater than column size /// if offsets are not empty and were already read, but elements are empty. if (column->size()) read_rows = std::max(read_rows, column->size() - column_size_before_reading); } catch (Exception & e) { /// Better diagnostics. e.addMessage("(while reading column " + it.name + ")"); throw; } if (column->size()) res.getByName(it.name).column = std::move(column); else res.erase(it.name); } /// NOTE: positions for all streams must be kept in sync. In particular, even if for some streams there are no rows to be read, /// you must ensure that no seeks are skipped and at this point they all point to to_mark. } catch (Exception & e) { if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) storage.reportBrokenPart(data_part->name); /// Better diagnostics. e.addMessage("(while reading from part " + path + " from mark " + toString(from_mark) + " with max_rows_to_read = " + toString(max_rows_to_read) + ")"); throw; } catch (...) { storage.reportBrokenPart(data_part->name); throw; } return read_rows; }