bool NamesAndTypesList::isSubsetOf(const NamesAndTypesList & rhs) const { NamesAndTypes vector(rhs.begin(), rhs.end()); vector.insert(vector.end(), begin(), end()); std::sort(vector.begin(), vector.end()); return std::unique(vector.begin(), vector.end()) == vector.begin() + rhs.size(); }
size_t NamesAndTypesList::sizeOfDifference(const NamesAndTypesList & rhs) const { NamesAndTypes vector(rhs.begin(), rhs.end()); vector.insert(vector.end(), begin(), end()); std::sort(vector.begin(), vector.end()); return (std::unique(vector.begin(), vector.end()) - vector.begin()) * 2 - size() - rhs.size(); }
NamesAndTypesList NamesAndTypesList::parse(const String & s) { ReadBufferFromString in(s); NamesAndTypesList res; res.readText(in); assertEOF(in); return res; }
StoragePtr TableFunctionODBC::executeImpl(const ASTPtr & ast_function, const Context & context) const { const ASTFunction & args_func = typeid_cast<const ASTFunction &>(*ast_function); if (!args_func.arguments) throw Exception("Table function 'odbc' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.arguments).children; if (args.size() != 2) throw Exception("Table function 'odbc' requires exactly 2 arguments: ODBC connection string and table name.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (int i = 0; i < 2; ++i) args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); std::string connection_string = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>(); std::string table_name = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>(); Poco::Data::ODBC::SessionImpl session(connection_string, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC); SQLHDBC hdbc = session.dbc().handle(); SQLHSTMT hstmt = nullptr; if (Poco::Data::ODBC::Utility::isError(SQLAllocStmt(hdbc, &hstmt))) throw Poco::Data::ODBC::ODBCException("Could not allocate connection handle."); SCOPE_EXIT(SQLFreeStmt(hstmt, SQL_DROP)); /// TODO Why not do SQLColumns instead? std::string query = "SELECT * FROM " + table_name + " WHERE 1 = 0"; if (Poco::Data::ODBC::Utility::isError(Poco::Data::ODBC::SQLPrepare(hstmt, reinterpret_cast<SQLCHAR *>(&query[0]), query.size()))) throw Poco::Data::ODBC::DescriptorException(session.dbc()); if (Poco::Data::ODBC::Utility::isError(SQLExecute(hstmt))) throw Poco::Data::ODBC::StatementException(hstmt); SQLSMALLINT cols = 0; if (Poco::Data::ODBC::Utility::isError(SQLNumResultCols(hstmt, &cols))) throw Poco::Data::ODBC::StatementException(hstmt); /// TODO cols not checked NamesAndTypesList columns; for (SQLSMALLINT ncol = 1; ncol <= cols; ++ncol) { SQLSMALLINT type = 0; /// TODO Why 301? SQLCHAR column_name[301]; /// TODO Result is not checked. Poco::Data::ODBC::SQLDescribeCol(hstmt, ncol, column_name, sizeof(column_name), NULL, &type, NULL, NULL, NULL); columns.emplace_back(reinterpret_cast<char *>(column_name), getDataType(type)); } auto result = StorageODBC::create(table_name, connection_string, "", table_name, ColumnsDescription{columns}); result->startup(); return result; }
NamesAndTypesList NamesAndTypesList::filter(const NameSet & names) const { NamesAndTypesList res; for (const NameAndTypePair & column : *this) { if (names.count(column.name)) res.push_back(column); } return res; }
static std::string listOfColumns(const NamesAndTypesList & available_columns) { std::stringstream s; for (NamesAndTypesList::const_iterator it = available_columns.begin(); it != available_columns.end(); ++it) { if (it != available_columns.begin()) s << ", "; s << it->name; } return s.str(); }
void NamesAndTypesList::getDifference(const NamesAndTypesList & rhs, NamesAndTypesList & deleted, NamesAndTypesList & added) const { NamesAndTypes lhs_vector(begin(), end()); std::sort(lhs_vector.begin(), lhs_vector.end()); NamesAndTypes rhs_vector(rhs.begin(), rhs.end()); std::sort(rhs_vector.begin(), rhs_vector.end()); std::set_difference(lhs_vector.begin(), lhs_vector.end(), rhs_vector.begin(), rhs_vector.end(), std::back_inserter(deleted)); std::set_difference(rhs_vector.begin(), rhs_vector.end(), lhs_vector.begin(), lhs_vector.end(), std::back_inserter(added)); }
NamesAndTypesList getStructureOfRemoteTable( const Cluster & cluster, const std::string & database, const std::string & table, const Context & context) { /// Запрос на описание таблицы String query = "DESC TABLE " + backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); Settings settings = context.getSettings(); NamesAndTypesList res; /// Отправляем на первый попавшийся удалённый шард. const auto & shard_info = cluster.getAnyShardInfo(); if (shard_info.isLocal()) return context.getTable(database, table)->getColumnsList(); ConnectionPoolPtr pool = shard_info.pool; BlockInputStreamPtr input = std::make_shared<RemoteBlockInputStream>( pool.get(), query, &settings, nullptr, Tables(), QueryProcessingStage::Complete, context); input->readPrefix(); const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); while (Block current = input->read()) { ColumnPtr name = current.getByName("name").column; ColumnPtr type = current.getByName("type").column; size_t size = name->size(); for (size_t i = 0; i < size; ++i) { String column_name = (*name)[i].get<const String &>(); String data_type_name = (*type)[i].get<const String &>(); res.emplace_back(column_name, data_type_factory.get(data_type_name)); } } return res; }
NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const { /// NOTE It's better to make a map in `IStorage` than to create it here every time again. GOOGLE_NAMESPACE::dense_hash_map<StringRef, const DataTypePtr *, StringRefHash> types; types.set_empty_key(StringRef()); for (const NameAndTypePair & column : *this) types[column.name] = &column.type; NamesAndTypesList res; for (const String & name : names) { auto it = types.find(name); if (it == types.end()) throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN); res.emplace_back(name, *it->second); } return res; }
ASTPtr InterpreterCreateQuery::formatColumns(NamesAndTypesList columns, const NamesAndTypesList & materialized_columns, const NamesAndTypesList & alias_columns, const ColumnDefaults & column_defaults) { columns.insert(std::end(columns), std::begin(materialized_columns), std::end(materialized_columns)); columns.insert(std::end(columns), std::begin(alias_columns), std::end(alias_columns)); auto columns_list = std::make_shared<ASTExpressionList>(); for (const auto & column : columns) { const auto column_declaration = std::make_shared<ASTColumnDeclaration>(); ASTPtr column_declaration_ptr{column_declaration}; column_declaration->name = column.name; StringPtr type_name = std::make_shared<String>(column.type->getName()); auto pos = type_name->data(); const auto end = pos + type_name->size(); ParserIdentifierWithOptionalParameters storage_p; column_declaration->type = parseQuery(storage_p, pos, end, "data type"); column_declaration->type->query_string = type_name; const auto it = column_defaults.find(column.name); if (it != std::end(column_defaults)) { column_declaration->default_specifier = toString(it->second.type); column_declaration->default_expression = it->second.expression->clone(); } columns_list->children.push_back(column_declaration_ptr); } return columns_list; }
BlockInputStreams StorageSystemColumns::read( const Names & column_names, ASTPtr query, const Context & context, const Settings & settings, QueryProcessingStage::Enum & processed_stage, const size_t max_block_size, const unsigned threads) { check(column_names); processed_stage = QueryProcessingStage::FetchColumns; Block block; std::map<std::pair<std::string, std::string>, StoragePtr> storages; { Databases databases = context.getDatabases(); /// Добавляем столбец database. ColumnPtr database_column = std::make_shared<ColumnString>(); for (const auto & database : databases) database_column->insert(database.first); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); /// Отфильтруем блок со столбцом database. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); database_column = block.getByName("database").column; size_t rows = database_column->size(); /// Добавляем столбец table. ColumnPtr table_column = std::make_shared<ColumnString>(); IColumn::Offsets_t offsets(rows); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*database_column)[i].get<std::string>(); const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; for (auto iterator = database->getIterator(); iterator->isValid(); iterator->next()) { const String & table_name = iterator->name(); storages.emplace(std::piecewise_construct, std::forward_as_tuple(database_name, table_name), std::forward_as_tuple(iterator->table())); table_column->insert(table_name); offsets[i] += 1; } } for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.getByPosition(i).column; column = column->replicate(offsets); } block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); } /// Отфильтруем блок со столбцами database и table. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); ColumnPtr filtered_database_column = block.getByName("database").column; ColumnPtr filtered_table_column = block.getByName("table").column; /// Составляем результат. ColumnPtr database_column = std::make_shared<ColumnString>(); ColumnPtr table_column = std::make_shared<ColumnString>(); ColumnPtr name_column = std::make_shared<ColumnString>(); ColumnPtr type_column = std::make_shared<ColumnString>(); ColumnPtr default_type_column = std::make_shared<ColumnString>(); ColumnPtr default_expression_column = std::make_shared<ColumnString>(); ColumnPtr bytes_column = std::make_shared<ColumnUInt64>(); size_t rows = filtered_database_column->size(); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*filtered_database_column)[i].get<std::string>(); const std::string table_name = (*filtered_table_column)[i].get<std::string>(); NamesAndTypesList columns; ColumnDefaults column_defaults; std::unordered_map<String, size_t> column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); IStorage::TableStructureReadLockPtr table_lock; try { table_lock = storage->lockStructure(false); } catch (const Exception & e) { /** There are case when IStorage::drop was called, * but we still own the object. * Then table will throw exception at attempt to lock it. * Just skip the table. */ if (e.code() == ErrorCodes::TABLE_IS_DROPPED) continue; else throw; } columns = storage->getColumnsList(); columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); column_defaults = storage->column_defaults; /** Данные о размерах столбцов для таблиц семейства MergeTree. * NOTE: В дальнейшем можно сделать интерфейс, позволяющий получить размеры столбцов у IStorage. */ if (auto storage_concrete = dynamic_cast<StorageMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); } else if (auto storage_concrete = dynamic_cast<StorageReplicatedMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); auto unreplicated_data = storage_concrete->getUnreplicatedData(); if (unreplicated_data) { auto unreplicated_column_sizes = unreplicated_data->getColumnSizes(); for (const auto & name_size : unreplicated_column_sizes) column_sizes[name_size.first] += name_size.second; } } } for (const auto & column : columns) { database_column->insert(database_name); table_column->insert(table_name); name_column->insert(column.name); type_column->insert(column.type->getName()); { const auto it = column_defaults.find(column.name); if (it == std::end(column_defaults)) { default_type_column->insertDefault(); default_expression_column->insertDefault(); } else { default_type_column->insert(toString(it->second.type)); default_expression_column->insert(queryToString(it->second.expression)); } } { const auto it = column_sizes.find(column.name); if (it == std::end(column_sizes)) bytes_column->insertDefault(); else bytes_column->insert(it->second); } } } block.clear(); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); block.insert(ColumnWithTypeAndName(name_column, std::make_shared<DataTypeString>(), "name")); block.insert(ColumnWithTypeAndName(type_column, std::make_shared<DataTypeString>(), "type")); block.insert(ColumnWithTypeAndName(default_type_column, std::make_shared<DataTypeString>(), "default_type")); block.insert(ColumnWithTypeAndName(default_expression_column, std::make_shared<DataTypeString>(), "default_expression")); block.insert(ColumnWithTypeAndName(bytes_column, std::make_shared<DataTypeUInt64>(), "bytes")); return BlockInputStreams{ 1, std::make_shared<OneBlockInputStream>(block) }; }
StoragePtr TableFunctionMySQL::executeImpl(const ASTPtr & ast_function, const Context & context) const { const ASTFunction & args_func = typeid_cast<const ASTFunction &>(*ast_function); if (!args_func.arguments) throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.arguments).children; if (args.size() < 5 || args.size() > 7) throw Exception("Table function 'mysql' requires 5-7 parameters: MySQL('host:port', database, table, 'user', 'password'[, replace_query, 'on_duplicate_clause']).", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (size_t i = 0; i < args.size(); ++i) args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); std::string host_port = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>(); std::string database_name = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>(); std::string table_name = static_cast<const ASTLiteral &>(*args[2]).value.safeGet<String>(); std::string user_name = static_cast<const ASTLiteral &>(*args[3]).value.safeGet<String>(); std::string password = static_cast<const ASTLiteral &>(*args[4]).value.safeGet<String>(); bool replace_query = false; std::string on_duplicate_clause; if (args.size() >= 6) replace_query = static_cast<const ASTLiteral &>(*args[5]).value.safeGet<UInt64>() > 0; if (args.size() == 7) on_duplicate_clause = static_cast<const ASTLiteral &>(*args[6]).value.safeGet<String>(); if (replace_query && !on_duplicate_clause.empty()) throw Exception( "Only one of 'replace_query' and 'on_duplicate_clause' can be specified, or none of them", ErrorCodes::BAD_ARGUMENTS); /// 3306 is the default MySQL port number auto parsed_host_port = parseAddress(host_port, 3306); mysqlxx::Pool pool(database_name, parsed_host_port.first, user_name, password, parsed_host_port.second); /// Determine table definition by running a query to INFORMATION_SCHEMA. Block sample_block { { std::make_shared<DataTypeString>(), "name" }, { std::make_shared<DataTypeString>(), "type" }, { std::make_shared<DataTypeUInt8>(), "is_nullable" }, { std::make_shared<DataTypeUInt8>(), "is_unsigned" }, { std::make_shared<DataTypeUInt64>(), "length" }, }; WriteBufferFromOwnString query; query << "SELECT" " COLUMN_NAME AS name," " DATA_TYPE AS type," " IS_NULLABLE = 'YES' AS is_nullable," " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned," " CHARACTER_MAXIMUM_LENGTH AS length" " FROM INFORMATION_SCHEMA.COLUMNS" " WHERE TABLE_SCHEMA = " << quote << database_name << " AND TABLE_NAME = " << quote << table_name << " ORDER BY ORDINAL_POSITION"; MySQLBlockInputStream result(pool.Get(), query.str(), sample_block, DEFAULT_BLOCK_SIZE); NamesAndTypesList columns; while (Block block = result.read()) { size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) columns.emplace_back( (*block.getByPosition(0).column)[i].safeGet<String>(), getDataType( (*block.getByPosition(1).column)[i].safeGet<String>(), (*block.getByPosition(2).column)[i].safeGet<UInt64>() && context.getSettings().external_table_functions_use_nulls, (*block.getByPosition(3).column)[i].safeGet<UInt64>(), (*block.getByPosition(4).column)[i].safeGet<UInt64>())); } auto res = StorageMySQL::create( table_name, std::move(pool), database_name, table_name, replace_query, on_duplicate_clause, ColumnsDescription{columns}, context); res->startup(); return res; }
int main(int argc, char ** argv) { using namespace DB; try { if (argc < 2) { std::cerr << "at least 1 argument expected" << std::endl; return 1; } Context context; NamesAndTypesList columns; for (int i = 2; i + 1 < argc; i += 2) { NameAndTypePair col; col.name = argv[i]; col.type = DataTypeFactory::instance().get(argv[i + 1]); columns.push_back(col); } ASTPtr root; ParserPtr parsers[] = {std::make_unique<ParserSelectQuery>(), std::make_unique<ParserExpressionList>(false)}; for (size_t i = 0; i < sizeof(parsers)/sizeof(parsers[0]); ++i) { IParser & parser = *parsers[i]; const char * pos = argv[1]; const char * end = argv[1] + strlen(argv[1]); const char * max_parsed_pos = pos; Expected expected = ""; if (parser.parse(pos, end, root, max_parsed_pos, expected)) break; else root = nullptr; } if (!root) { std::cerr << "invalid expression (should be select query or expression list)" << std::endl; return 2; } formatAST(*root, std::cout); std::cout << std::endl; ExpressionAnalyzer analyzer(root, context, {}, columns); Names required = analyzer.getRequiredColumns(); std::cout << "required columns:\n"; for (size_t i = 0; i < required.size(); ++i) { std::cout << required[i] << "\n"; } std::cout << "\n"; std::cout << "only consts:\n\n" << analyzer.getConstActions()->dumpActions() << "\n"; if (analyzer.hasAggregation()) { Names key_names; AggregateDescriptions aggregates; analyzer.getAggregateInfo(key_names, aggregates); std::cout << "keys:\n"; for (size_t i = 0; i < key_names.size(); ++i) std::cout << key_names[i] << "\n"; std::cout << "\n"; std::cout << "aggregates:\n"; for (size_t i = 0; i < aggregates.size(); ++i) { AggregateDescription desc = aggregates[i]; std::cout << desc.column_name << " = " << desc.function->getName() << " ( "; for (size_t j = 0; j < desc.argument_names.size(); ++j) std::cout << desc.argument_names[j] << " "; std::cout << ")\n"; } std::cout << "\n"; ExpressionActionsChain before; if (analyzer.appendWhere(before, false)) before.addStep(); analyzer.appendAggregateFunctionsArguments(before, false); analyzer.appendGroupBy(before, false); before.finalize(); ExpressionActionsChain after; if (analyzer.appendHaving(after, false)) after.addStep(); analyzer.appendSelect(after, false); analyzer.appendOrderBy(after, false); after.addStep(); analyzer.appendProjectResult(after, false); after.finalize(); std::cout << "before aggregation:\n\n"; for (size_t i = 0; i < before.steps.size(); ++i) { std::cout << before.steps[i].actions->dumpActions(); std::cout << std::endl; } std::cout << "\nafter aggregation:\n\n"; for (size_t i = 0; i < after.steps.size(); ++i) { std::cout << after.steps[i].actions->dumpActions(); std::cout << std::endl; } } else { if (typeid_cast<ASTSelectQuery *>(&*root)) { ExpressionActionsChain chain; if (analyzer.appendWhere(chain, false)) chain.addStep(); analyzer.appendSelect(chain, false); analyzer.appendOrderBy(chain, false); chain.addStep(); analyzer.appendProjectResult(chain, false); chain.finalize(); for (size_t i = 0; i < chain.steps.size(); ++i) { std::cout << chain.steps[i].actions->dumpActions(); std::cout << std::endl; } } else { std::cout << "unprojected actions:\n\n" << analyzer.getActions(false)->dumpActions() << "\n"; std::cout << "projected actions:\n\n" << analyzer.getActions(true)->dumpActions() << "\n"; } } } catch (Exception & e) { std::cerr << "Exception " << e.what() << ": " << e.displayText() << "\n" << e.getStackTrace().toString(); return 3; } return 0; }