BlockIO InterpreterInsertQuery::execute() { ASTInsertQuery & query = typeid_cast<ASTInsertQuery &>(*query_ptr); checkAccess(query); StoragePtr table = getTable(); auto table_lock = table->lockStructure(true, __PRETTY_FUNCTION__); NamesAndTypesList required_columns = table->getColumnsList(); /// We create a pipeline of several streams, into which we will write data. BlockOutputStreamPtr out; out = std::make_shared<PushingToViewsBlockOutputStream>(query.database, query.table, table, context, query_ptr, query.no_destination); out = std::make_shared<MaterializingBlockOutputStream>(out); out = std::make_shared<AddingDefaultBlockOutputStream>( out, required_columns, table->column_defaults, context, static_cast<bool>(context.getSettingsRef().strict_insert_defaults)); if (!allow_materialized) out = std::make_shared<ProhibitColumnsBlockOutputStream>(out, table->materialized_columns); out = std::make_shared<SquashingBlockOutputStream>( out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out); out_wrapper->setProcessListElement(context.getProcessListElement()); out = std::move(out_wrapper); BlockIO res; res.out_sample = getSampleBlock(); /// What type of query: INSERT or INSERT SELECT? if (!query.select) { res.out = out; } else { InterpreterSelectQuery interpreter_select{query.select, context}; res.in_sample = interpreter_select.getSampleBlock(); res.in = interpreter_select.execute().in; res.in = std::make_shared<NullableAdapterBlockInputStream>(res.in, res.in_sample, res.out_sample); res.in = std::make_shared<CastTypeBlockInputStream>(context, res.in, res.out_sample); res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out); } return res; }
PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( String database, String table, StoragePtr storage, const Context & context_, const ASTPtr & query_ptr_, bool no_destination) : context(context_), query_ptr(query_ptr_) { /** TODO This is a very important line. At any insertion into the table one of streams should own lock. * Although now any insertion into the table is done via PushingToViewsBlockOutputStream, * but it's clear that here is not the best place for this functionality. */ addTableLock(storage->lockStructure(true, __PRETTY_FUNCTION__)); if (!table.empty()) { Dependencies dependencies = context.getDependencies(database, table); /// We need special context for materialized views insertions if (!dependencies.empty()) { views_context = std::make_unique<Context>(context); // Do not deduplicate insertions into MV if the main insertion is Ok views_context->getSettingsRef().insert_deduplicate = false; } for (const auto & database_table : dependencies) { auto dependent_table = context.getTable(database_table.first, database_table.second); auto & materialized_view = dynamic_cast<const StorageMaterializedView &>(*dependent_table); auto query = materialized_view.getInnerQuery(); auto out = std::make_shared<PushingToViewsBlockOutputStream>( database_table.first, database_table.second, dependent_table, *views_context, ASTPtr()); views.emplace_back(ViewInfo{std::move(query), database_table.first, database_table.second, std::move(out)}); } } /* Do not push to destination table if the flag is set */ if (!no_destination) { output = storage->write(query_ptr, context.getSettingsRef()); replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get()); } }
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr & query, bool has_virtual_column, bool get_lock) const { StorageListWithLocks selected_tables; DatabasePtr database = global_context.getDatabase(source_database); DatabaseIteratorPtr iterator = database->getIterator(global_context); auto virtual_column = ColumnString::create(); while (iterator->isValid()) { if (table_name_regexp.match(iterator->name())) { StoragePtr storage = iterator->table(); if (query && typeid_cast<ASTSelectQuery *>(query.get())->prewhere_expression && !storage->supportsPrewhere()) throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE); if (storage.get() != this) { virtual_column->insert(storage->getTableName()); selected_tables.emplace_back(storage, get_lock ? storage->lockStructure(false) : TableStructureReadLockPtr{}); } } iterator->next(); } if (has_virtual_column) { Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared<DataTypeString>(), "_table")}; VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, global_context); auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table"); /// Remove unused tables from the list selected_tables.remove_if([&] (const auto & elem) { return values.find(elem.first->getTableName()) == values.end(); }); } return selected_tables; }
BlockIO InterpreterInsertQuery::execute() { ASTInsertQuery & query = typeid_cast<ASTInsertQuery &>(*query_ptr); StoragePtr table = getTable(); auto table_lock = table->lockStructure(true); NamesAndTypesListPtr required_columns = std::make_shared<NamesAndTypesList>(table->getColumnsList()); /// Создаем кортеж из нескольких стримов, в которые будем писать данные. BlockOutputStreamPtr out = std::make_shared<ProhibitColumnsBlockOutputStream>( std::make_shared<AddingDefaultBlockOutputStream>( std::make_shared<MaterializingBlockOutputStream>( std::make_shared<PushingToViewsBlockOutputStream>(query.database, query.table, context, query_ptr)), required_columns, table->column_defaults, context, static_cast<bool>(context.getSettingsRef().strict_insert_defaults)), table->materialized_columns); BlockIO res; res.out_sample = getSampleBlock(); /// Какой тип запроса: INSERT или INSERT SELECT? if (!query.select) { res.out = out; } else { InterpreterSelectQuery interpreter_select{query.select, context}; BlockInputStreamPtr in{interpreter_select.execute().in}; res.in = std::make_shared<NullAndDoCopyBlockInputStream>(in, out); res.in_sample = interpreter_select.getSampleBlock(); } return res; }
BlockInputStreams StorageSystemColumns::read( const Names & column_names, ASTPtr query, const Context & context, const Settings & settings, QueryProcessingStage::Enum & processed_stage, const size_t max_block_size, const unsigned threads) { check(column_names); processed_stage = QueryProcessingStage::FetchColumns; Block block; std::map<std::pair<std::string, std::string>, StoragePtr> storages; { Databases databases = context.getDatabases(); /// Добавляем столбец database. ColumnPtr database_column = std::make_shared<ColumnString>(); for (const auto & database : databases) database_column->insert(database.first); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); /// Отфильтруем блок со столбцом database. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); database_column = block.getByName("database").column; size_t rows = database_column->size(); /// Добавляем столбец table. ColumnPtr table_column = std::make_shared<ColumnString>(); IColumn::Offsets_t offsets(rows); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*database_column)[i].get<std::string>(); const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; for (auto iterator = database->getIterator(); iterator->isValid(); iterator->next()) { const String & table_name = iterator->name(); storages.emplace(std::piecewise_construct, std::forward_as_tuple(database_name, table_name), std::forward_as_tuple(iterator->table())); table_column->insert(table_name); offsets[i] += 1; } } for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.getByPosition(i).column; column = column->replicate(offsets); } block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); } /// Отфильтруем блок со столбцами database и table. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); ColumnPtr filtered_database_column = block.getByName("database").column; ColumnPtr filtered_table_column = block.getByName("table").column; /// Составляем результат. ColumnPtr database_column = std::make_shared<ColumnString>(); ColumnPtr table_column = std::make_shared<ColumnString>(); ColumnPtr name_column = std::make_shared<ColumnString>(); ColumnPtr type_column = std::make_shared<ColumnString>(); ColumnPtr default_type_column = std::make_shared<ColumnString>(); ColumnPtr default_expression_column = std::make_shared<ColumnString>(); ColumnPtr bytes_column = std::make_shared<ColumnUInt64>(); size_t rows = filtered_database_column->size(); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*filtered_database_column)[i].get<std::string>(); const std::string table_name = (*filtered_table_column)[i].get<std::string>(); NamesAndTypesList columns; ColumnDefaults column_defaults; std::unordered_map<String, size_t> column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); IStorage::TableStructureReadLockPtr table_lock; try { table_lock = storage->lockStructure(false); } catch (const Exception & e) { /** There are case when IStorage::drop was called, * but we still own the object. * Then table will throw exception at attempt to lock it. * Just skip the table. */ if (e.code() == ErrorCodes::TABLE_IS_DROPPED) continue; else throw; } columns = storage->getColumnsList(); columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); column_defaults = storage->column_defaults; /** Данные о размерах столбцов для таблиц семейства MergeTree. * NOTE: В дальнейшем можно сделать интерфейс, позволяющий получить размеры столбцов у IStorage. */ if (auto storage_concrete = dynamic_cast<StorageMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); } else if (auto storage_concrete = dynamic_cast<StorageReplicatedMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); auto unreplicated_data = storage_concrete->getUnreplicatedData(); if (unreplicated_data) { auto unreplicated_column_sizes = unreplicated_data->getColumnSizes(); for (const auto & name_size : unreplicated_column_sizes) column_sizes[name_size.first] += name_size.second; } } } for (const auto & column : columns) { database_column->insert(database_name); table_column->insert(table_name); name_column->insert(column.name); type_column->insert(column.type->getName()); { const auto it = column_defaults.find(column.name); if (it == std::end(column_defaults)) { default_type_column->insertDefault(); default_expression_column->insertDefault(); } else { default_type_column->insert(toString(it->second.type)); default_expression_column->insert(queryToString(it->second.expression)); } } { const auto it = column_sizes.find(column.name); if (it == std::end(column_sizes)) bytes_column->insertDefault(); else bytes_column->insert(it->second); } } } block.clear(); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); block.insert(ColumnWithTypeAndName(name_column, std::make_shared<DataTypeString>(), "name")); block.insert(ColumnWithTypeAndName(type_column, std::make_shared<DataTypeString>(), "type")); block.insert(ColumnWithTypeAndName(default_type_column, std::make_shared<DataTypeString>(), "default_type")); block.insert(ColumnWithTypeAndName(default_expression_column, std::make_shared<DataTypeString>(), "default_expression")); block.insert(ColumnWithTypeAndName(bytes_column, std::make_shared<DataTypeUInt64>(), "bytes")); return BlockInputStreams{ 1, std::make_shared<OneBlockInputStream>(block) }; }
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { String path = context.getPath(); String current_database = context.getCurrentDatabase(); String database_name = create.database.empty() ? current_database : create.database; String database_name_escaped = escapeForFileName(database_name); String table_name = create.table; String table_name_escaped = escapeForFileName(table_name); String data_path = path + "data/" + database_name_escaped + "/"; String metadata_path = path + "metadata/" + database_name_escaped + "/" + table_name_escaped + ".sql"; std::unique_ptr<InterpreterSelectQuery> interpreter_select; Block as_select_sample; /// Для таблиц типа view, чтобы получить столбцы, может понадобиться sample_block. if (create.select && (!create.attach || (!create.columns && (create.is_view || create.is_materialized_view)))) { interpreter_select = std::make_unique<InterpreterSelectQuery>(create.select, context); as_select_sample = interpreter_select->getSampleBlock(); } String as_database_name = create.as_database.empty() ? current_database : create.as_database; String as_table_name = create.as_table; StoragePtr as_storage; IStorage::TableStructureReadLockPtr as_storage_lock; if (!as_table_name.empty()) { as_storage = context.getTable(as_database_name, as_table_name); as_storage_lock = as_storage->lockStructure(false); } /// Устанавливаем и получаем список столбцов. ColumnsInfo columns = setColumns(create, as_select_sample, as_storage); /// Выбор нужного движка таблицы String storage_name = setEngine(create, as_storage); StoragePtr res; { std::unique_ptr<DDLGuard> guard; if (!create.is_temporary) { context.assertDatabaseExists(database_name); /** Если таблица уже существует, и в запросе указано IF NOT EXISTS, * то мы разрешаем конкуррентные запросы CREATE (которые ничего не делают). * Иначе конкуррентные запросы на создание таблицы, если таблицы не существует, * могут кидать исключение, даже если указано IF NOT EXISTS. */ guard = context.getDDLGuardIfTableDoesntExist(database_name, table_name, "Table " + database_name + "." + table_name + " is creating or attaching right now"); if (!guard) { if (create.if_not_exists) return {}; else throw Exception("Table " + database_name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } } res = StorageFactory::instance().get( storage_name, data_path, table_name, database_name, context, context.getGlobalContext(), query_ptr, columns.columns, columns.materialized_columns, columns.alias_columns, columns.column_defaults, create.attach); if (create.is_temporary) context.getSessionContext().addExternalTable(table_name, res); else context.getDatabase(database_name)->createTable(table_name, res, query_ptr, storage_name); } /// Если запрос CREATE SELECT, то вставим в таблицу данные if (create.select && storage_name != "View" && (storage_name != "MaterializedView" || create.is_populate)) { auto table_lock = res->lockStructure(true); /// Также см. InterpreterInsertQuery. BlockOutputStreamPtr out = std::make_shared<ProhibitColumnsBlockOutputStream>( std::make_shared<AddingDefaultBlockOutputStream>( std::make_shared<MaterializingBlockOutputStream>( std::make_shared<PushingToViewsBlockOutputStream>( create.database, create.table, create.is_temporary ? context.getSessionContext() : context, query_ptr)), /// @note shouldn't these two contexts be session contexts in case of temporary table? columns.columns, columns.column_defaults, context, static_cast<bool>(context.getSettingsRef().strict_insert_defaults)), columns.materialized_columns); BlockIO io; io.in_sample = as_select_sample; io.in = std::make_shared<NullAndDoCopyBlockInputStream>(interpreter_select->execute().in, out); return io; } return {}; }