void ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data) { if (auto * t = typeid_cast<ASTSubquery *>(ast.get())) visit(*t, ast, data); if (auto * t = typeid_cast<ASTFunction *>(ast.get())) visit(*t, ast, data); }
AnalyzeLambdas::LambdaParameters AnalyzeLambdas::extractLambdaParameters(ASTPtr & ast) { /// Lambda parameters could be specified in AST in two forms: /// - just as single parameter: x -> x + 1 /// - parameters in tuple: (x, y) -> x + 1 #define LAMBDA_ERROR_MESSAGE " There are two valid forms of lambda expressions: x -> ... and (x, y...) -> ..." if (!ast->tryGetAlias().empty()) throw Exception("Lambda parameters cannot have aliases." LAMBDA_ERROR_MESSAGE, ErrorCodes::BAD_LAMBDA); if (const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(ast.get())) { return { identifier->name }; } else if (const ASTFunction * function = typeid_cast<const ASTFunction *>(ast.get())) { if (function->name != "tuple") throw Exception("Left hand side of '->' or first argument of 'lambda' is a function, but this function is not tuple." LAMBDA_ERROR_MESSAGE " Found function '" + function->name + "' instead.", ErrorCodes::BAD_LAMBDA); if (!function->arguments || function->arguments->children.empty()) throw Exception("Left hand side of '->' or first argument of 'lambda' is empty tuple." LAMBDA_ERROR_MESSAGE, ErrorCodes::BAD_LAMBDA); LambdaParameters res; res.reserve(function->arguments->children.size()); for (const ASTPtr & arg : function->arguments->children) { const ASTIdentifier * arg_identifier = typeid_cast<const ASTIdentifier *>(arg.get()); if (!arg_identifier) throw Exception("Left hand side of '->' or first argument of 'lambda' contains something that is not just identifier." LAMBDA_ERROR_MESSAGE, ErrorCodes::BAD_LAMBDA); if (!arg_identifier->children.empty()) throw Exception("Left hand side of '->' or first argument of 'lambda' contains compound identifier." LAMBDA_ERROR_MESSAGE, ErrorCodes::BAD_LAMBDA); if (!arg_identifier->alias.empty()) throw Exception("Lambda parameters cannot have aliases." LAMBDA_ERROR_MESSAGE, ErrorCodes::BAD_LAMBDA); res.emplace_back(arg_identifier->name); } return res; } else throw Exception("Unexpected left hand side of '->' or first argument of 'lambda'." LAMBDA_ERROR_MESSAGE, ErrorCodes::BAD_LAMBDA); #undef LAMBDA_ERROR_MESSAGE }
static Field extractValueFromNode(ASTPtr & node, const IDataType & type, const Context & context) { if (ASTLiteral * lit = typeid_cast<ASTLiteral *>(node.get())) return convertFieldToType(lit->value, type); else if (typeid_cast<ASTFunction *>(node.get())) return convertFieldToType(evaluateConstantExpression(node, context), type); else throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); }
/// Extract all subfunctions of the main conjunction, but depending only on the specified columns static void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector<ASTPtr> & result) { const ASTFunction * function = typeid_cast<const ASTFunction *>(expression.get()); if (function && function->name == "and") { for (size_t i = 0; i < function->arguments->children.size(); ++i) extractFunctions(function->arguments->children[i], columns, result); } else if (isValidFunction(expression, columns)) { result.push_back(expression->clone()); } }
static ASTPtr parseRepeatedCharFL(const char** fptr, const LengthFunc** lengthFuncsPtr) { assert(**fptr == '#'); const char* f_at = *fptr; ASTPtr ast; FunctionLength length = parseFunctionLength(fptr, lengthFuncsPtr); if (**fptr == '\'') { char c = parseCharLiteral(fptr); ast.reset(new RepeatedCharFL(f_at, length, c)); } else { throw DSLException(*fptr, "Expected char literal after function length."); } return ast; }
bool MergeTreeMinMaxIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const { const String column_name = node->getColumnName(); for (const auto & name : columns) if (column_name == name) return true; if (const auto * func = typeid_cast<const ASTFunction *>(node.get())) if (func->arguments->children.size() == 1) return mayBenefitFromIndexForIn(func->arguments->children.front()); return false; }
bool ParserWithOptionalAliasImpl<ParserAlias>::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) { ParserWhiteSpaceOrComments ws; if (!elem_parser->parse(pos, end, node, max_parsed_pos, expected)) return false; /** Маленький хак. * * В секции SELECT мы разрешаем парсить алиасы без указания ключевого слова AS. * Эти алиасы не могут совпадать с ключевыми словами запроса. * А само выражение может быть идентификатором, совпадающем с ключевым словом. * Например, столбец может называться where. И в запросе может быть написано SELECT where AS x FROM table или даже SELECT where x FROM table. * Даже может быть написано SELECT where AS from FROM table, но не может быть написано SELECT where from FROM table. * Смотрите подробнее в реализации ParserAlias. * * Но возникает небольшая проблема - неудобное сообщение об ошибке, если в секции SELECT в конце есть лишняя запятая. * Хотя такая ошибка очень распространена. Пример: SELECT x, y, z, FROM tbl * Если ничего не предпринять, то это парсится как выбор столбца с именем FROM и алиасом tbl. * Чтобы избежать такой ситуации, мы не разрешаем парсить алиас без ключевого слова AS для идентификатора с именем FROM. * * Замечание: это также фильтрует случай, когда идентификатор квотирован. * Пример: SELECT x, y, z, `FROM` tbl. Но такой случай можно было бы разрешить. * * В дальнейшем было бы проще запретить неквотированные идентификаторы, совпадающие с ключевыми словами. */ bool allow_alias_without_as_keyword_now = allow_alias_without_as_keyword; if (allow_alias_without_as_keyword) if (const ASTIdentifier * id = typeid_cast<const ASTIdentifier *>(node.get())) if (0 == strcasecmp(id->name.data(), "FROM")) allow_alias_without_as_keyword_now = false; ws.ignore(pos, end); ASTPtr alias_node; if (ParserAlias(allow_alias_without_as_keyword_now).parse(pos, end, alias_node, max_parsed_pos, expected)) { String alias_name = typeid_cast<ASTIdentifier &>(*alias_node).name; if (ASTWithAlias * ast_with_alias = dynamic_cast<ASTWithAlias *>(node.get())) ast_with_alias->alias = alias_name; else { expected = "alias cannot be here"; return false; } } return true; }
String getTableDefinitionFromCreateQuery(const ASTPtr & query) { ASTPtr query_clone = query->clone(); ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*query_clone.get()); /// We remove everything that is not needed for ATTACH from the query. create.attach = true; create.database.clear(); create.as_database.clear(); create.as_table.clear(); create.if_not_exists = false; create.is_populate = false; /// For views it is necessary to save the SELECT query itself, for the rest - on the contrary if (!create.is_view && !create.is_materialized_view) create.select = nullptr; create.format = nullptr; create.out_file = nullptr; std::ostringstream statement_stream; formatAST(create, statement_stream, false); statement_stream << '\n'; return statement_stream.str(); }
bool ParserAliasImpl<ParserIdentifier>::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) { ParserWhiteSpaceOrComments ws; ParserString s_as("AS", true, true); ParserIdentifier id_p; bool has_as_word = s_as.parse(pos, end, node, max_parsed_pos, expected); if (!allow_alias_without_as_keyword && !has_as_word) return false; ws.ignore(pos, end); if (!id_p.parse(pos, end, node, max_parsed_pos, expected)) return false; if (!has_as_word) { /** В этом случае алиас не может совпадать с ключевым словом - для того, * чтобы в запросе "SELECT x FROM t", слово FROM не считалось алиасом, * а в запросе "SELECT x FRO FROM t", слово FRO считалось алиасом. */ const String & name = static_cast<const ASTIdentifier &>(*node.get()).name; for (const char ** keyword = restricted_keywords; *keyword != nullptr; ++keyword) if (0 == strcasecmp(name.data(), *keyword)) return false; } return true; }
static ASTPtr parseSpecifiedLengthContent(const char** fptr, const char*** wordSourcesPtr, const LengthFunc** lengthFuncsPtr) { assert(**fptr == '\'' || std::isdigit(**fptr) || **fptr == '#'); ASTPtr slc; if (**fptr == '\'') { slc = parseStringLiteral(fptr); } else if (**fptr == '#') { slc = parseRepeatedCharFL(fptr, lengthFuncsPtr); } else { const char* f_at = *fptr; LiteralLength length = parseLiteralLength(fptr); if (**fptr == '\'') { slc.reset(new RepeatedCharLL(f_at, length, parseCharLiteral(fptr))); } else if (**fptr == '[') { Block* block = new Block(f_at, length); slc.reset(block); ++*fptr; parseWhitespaces(fptr); // [ is a token while (**fptr != ']') { if (**fptr == '\'' || std::isdigit(**fptr) || **fptr == '#') { block->addChild(parseSpecifiedLengthContent(fptr, wordSourcesPtr, lengthFuncsPtr)); } else if (**fptr == '{') { block->addWords(parseWords(fptr, wordSourcesPtr)); } else { throw DSLException(*fptr, "Expected ', digit, or # to begin specified-length content, " "or { to begin greedy-length content."); } } ++*fptr; parseWhitespaces(fptr); // ] is a token if (**fptr == '^') { parseTopOrBottomFiller(fptr, &block->topFillers, true); if (**fptr == 'v') { parseTopOrBottomFiller(fptr, &block->bottomFillers, false); } } else if (**fptr == 'v') { parseTopOrBottomFiller(fptr, &block->bottomFillers, false); if (**fptr == '^') { parseTopOrBottomFiller(fptr, &block->topFillers, true); } } } else { throw DSLException(*fptr, "Expected ' or [ after length specifier."); } } return slc; }
static void processImpl(const ASTPtr & ast, CollectAliases::Aliases & aliases, CollectAliases::Kind kind, size_t keep_kind_for_depth) { String alias = ast->tryGetAlias(); if (!alias.empty()) { auto it_inserted = aliases.emplace(alias, CollectAliases::AliasInfo(ast, kind)); if (!it_inserted.second && ast->getTreeHash() != it_inserted.first->second.node->getTreeHash()) { std::stringstream message; message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n"; formatAST(*it_inserted.first->second.node, message, 0, false, true); message << "\nand\n"; formatAST(*ast, message, 0, false, true); message << "\n"; throw Exception(message.str(), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); } } for (auto & child : ast->children) { if (typeid_cast<const ASTSelectQuery *>(child.get())) { /// Don't go into subqueries. } else if (typeid_cast<const ASTTableExpression *>(child.get())) { processImpl(child, aliases, CollectAliases::Kind::Table, 1); } else if (typeid_cast<const ASTArrayJoin *>(child.get())) { /// ASTArrayJoin -> ASTExpressionList -> element of expression AS alias processImpl(child, aliases, CollectAliases::Kind::ArrayJoin, 3); } else if (keep_kind_for_depth > 0) { processImpl(child, aliases, kind, keep_kind_for_depth - 1); } else { processImpl(child, aliases, CollectAliases::Kind::Expression, 0); } } }
static ASTPtr parseFormat(const char** fptr, const char*** wordSourcesPtr, const LengthFunc** lengthFuncsPtr) { parseWhitespaces(fptr); // Will insert all root content as children into a super-root Block. Block* rootsParentBlock = new Block(*fptr, LiteralLength(0, false)); ASTPtr rootsParent(rootsParentBlock); while (**fptr != '\0') { if (**fptr == '\'' || std::isdigit(**fptr)) { ASTPtr root = parseSpecifiedLengthContent(fptr, wordSourcesPtr, lengthFuncsPtr); int rootLength = root->getFixedLength(); if (rootLength == UNKNOWN_COL) { throw DSLException(root->f_at, "Root content must be fixed-length."); } rootsParentBlock->addChild(std::move(root)); rootsParentBlock->length.value += rootLength; } else { throw DSLException(*fptr, "Expected ' or digit."); } } return rootsParent; }
bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { /// Processed if (typeid_cast<ASTSubquery *>(node.get()) || typeid_cast<ASTFunction *>(node.get())) return false; /// Don't descend into subqueries in FROM section if (typeid_cast<ASTTableExpression *>(node.get())) return false; if (typeid_cast<ASTSelectQuery *>(node.get())) { /// Do not go to FROM, JOIN, UNION. if (typeid_cast<ASTTableExpression *>(child.get()) || typeid_cast<ASTSelectQuery *>(child.get())) return false; } return true; }
static ASTPtr generateCCs(std::vector<ConsistentContent>* ccs, const char* format, const char*** wordSourcesPtr, const LengthFunc** lengthFuncsPtr, va_list args) { ccs->clear(); std::string evaluatedFormat; vsprintf(&evaluatedFormat, format, args); const char* f_begin = evaluatedFormat.c_str(); const char* f_at = f_begin; ASTPtr root; try { //printf("\n\n%s\n", format); root = parseFormat(&f_at, wordSourcesPtr, lengthFuncsPtr); root->convertLLSharesToLength(); root->computeStartEndCols(0, root->getFixedLength()); std::vector<FillerPtr> topFillersStack, bottomFillersStack; root->flatten(root, root, ccs, true, &topFillersStack, &bottomFillersStack); //root->print(); //printf("\n"); for (ConsistentContent& cc : *ccs) { cc.generateCCLines(); } root->computeNumContentLines(); root->computeNumTotalLines(true); root->computeBlockVerticalFillersShares(); //printf("\n"); for (ConsistentContent& cc : *ccs) { /*printf("\n"); cc.print(); printf("\n"); printf("content: %d fixed: %d total: %d\n", cc.srcAst->numContentLines, cc.srcAst->numFixedLines, cc.srcAst->numTotalLines);*/ cc.generateLinesChars(root->numTotalLines); } //printf("\n\n"); } catch (DSLException& e) { fprintf(stderr, "%s\n", f_begin); for (int i = 0; i < e.f_at - f_begin; ++i) { fputc(' ', stderr); } fprintf(stderr, "^\n"); fprintf(stderr, "Error at %d: %s\n", e.f_at - f_begin, e.what()); return ASTPtr(); } return root; }
void AnalyzeColumns::process(ASTPtr & ast, const CollectAliases & aliases, const CollectTables & tables) { /// If this is SELECT query, don't go into FORMAT and SETTINGS clauses /// - they contain identifiers that are not columns. const ASTSelectQuery * select = typeid_cast<const ASTSelectQuery *>(ast.get()); for (auto & child : ast->children) { if (select && (child.get() == select->format.get() || child.get() == select->settings.get())) continue; processImpl(child, columns, aliases, tables); } }
bool filterBlockWithQuery(ASTPtr query, Block & block, const Context & context) { query = query->clone(); const ASTSelectQuery & select = typeid_cast<ASTSelectQuery & >(*query); if (!select.where_expression && !select.prewhere_expression) return false; NameSet columns; for (const auto & it : block.getColumnsList()) columns.insert(it.name); /// Составим выражение, вычисляющее выражения в WHERE и PREWHERE, зависящие только от имеющихся столбцов. std::vector<ASTPtr> functions; if (select.where_expression) extractFunctions(select.where_expression, columns, functions); if (select.prewhere_expression) extractFunctions(select.prewhere_expression, columns, functions); ASTPtr expression_ast = buildWhereExpression(functions); if (!expression_ast) return false; /// Распарсим и вычислим выражение. ExpressionAnalyzer analyzer(expression_ast, context, {}, block.getColumnsList()); ExpressionActionsPtr actions = analyzer.getActions(false); actions->execute(block); /// Отфильтруем блок. String filter_column_name = expression_ast->getColumnName(); ColumnPtr filter_column = block.getByName(filter_column_name).column; if (auto converted = filter_column->convertToFullColumnIfConst()) filter_column = converted; const IColumn::Filter & filter = dynamic_cast<ColumnUInt8 &>(*filter_column).getData(); if (std::accumulate(filter.begin(), filter.end(), 0ul) == filter.size()) return false; for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.safeGetByPosition(i).column; column = column->filter(filter, -1); } return true; }
StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, const Context & context) const { if (const ASTFunction * function = typeid_cast<ASTFunction *>(ast_function.get())) { auto arguments = function->arguments->children; if (arguments.size() != 1 && arguments.size() != 2) throw Exception("Table function 'numbers' requires 'length' or 'offset, length'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); UInt64 offset = arguments.size() == 2 ? evaluateArgument(context, arguments[0]) : 0; UInt64 length = arguments.size() == 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); auto res = StorageSystemNumbers::create(getName(), false, length, offset); res->startup(); return res; } throw Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); }
StorageDistributed::StorageDistributed( const std::string & name_, NamesAndTypesListPtr columns_, const String & remote_database_, const String & remote_table_, const Cluster & cluster_, Context & context_, const ASTPtr & sharding_key_, const String & data_path_) : name(name_), columns(columns_), remote_database(remote_database_), remote_table(remote_table_), context(context_), cluster(cluster_), sharding_key_expr(sharding_key_ ? ExpressionAnalyzer(sharding_key_, context, nullptr, *columns).getActions(false) : nullptr), sharding_key_column_name(sharding_key_ ? sharding_key_->getColumnName() : String{}), write_enabled(!data_path_.empty() && (((cluster.getLocalShardCount() + cluster.getRemoteShardCount()) < 2) || sharding_key_)), path(data_path_.empty() ? "" : (data_path_ + escapeForFileName(name) + '/')) { createDirectoryMonitors(); }
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr & query, bool has_virtual_column, bool get_lock) const { StorageListWithLocks selected_tables; DatabasePtr database = global_context.getDatabase(source_database); DatabaseIteratorPtr iterator = database->getIterator(global_context); auto virtual_column = ColumnString::create(); while (iterator->isValid()) { if (table_name_regexp.match(iterator->name())) { StoragePtr storage = iterator->table(); if (query && typeid_cast<ASTSelectQuery *>(query.get())->prewhere_expression && !storage->supportsPrewhere()) throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE); if (storage.get() != this) { virtual_column->insert(storage->getTableName()); selected_tables.emplace_back(storage, get_lock ? storage->lockStructure(false) : TableStructureReadLockPtr{}); } } iterator->next(); } if (has_virtual_column) { Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared<DataTypeString>(), "_table")}; VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, global_context); auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table"); /// Remove unused tables from the list selected_tables.remove_if([&] (const auto & elem) { return values.find(elem.first->getTableName()) == values.end(); }); } return selected_tables; }
void StorageMerge::convertingSourceStream(const Block & header, const Context & context, ASTPtr & query, BlockInputStreamPtr & source_stream, QueryProcessingStage::Enum processed_stage) { Block before_block_header = source_stream->getHeader(); source_stream = std::make_shared<ConvertingBlockInputStream>(context, source_stream, header, ConvertingBlockInputStream::MatchColumnsMode::Name); ASTPtr where_expression = typeid_cast<ASTSelectQuery *>(query.get())->where_expression; if (!where_expression) return; for (size_t column_index : ext::range(0, header.columns())) { ColumnWithTypeAndName header_column = header.getByPosition(column_index); ColumnWithTypeAndName before_column = before_block_header.getByName(header_column.name); /// If the processed_stage greater than FetchColumns and the block structure between streams is different. /// the where expression maybe invalid because of convertingBlockInputStream. /// So we need to throw exception. if (!header_column.type->equals(*before_column.type.get()) && processed_stage > QueryProcessingStage::FetchColumns) { NamesAndTypesList source_columns = getSampleBlock().getNamesAndTypesList(); NameAndTypePair virtual_column = getColumn("_table"); source_columns.insert(source_columns.end(), virtual_column); auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns); ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false); Names required_columns = actions->getRequiredColumns(); for (const auto & required_column : required_columns) { if (required_column == header_column.name) throw Exception("Block structure mismatch in Merge Storage: different types:\n" + before_block_header.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); } } } }
String getTableDefinitionFromCreateQuery(const ASTPtr & query) { ASTPtr query_clone = query->clone(); ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*query_clone.get()); /// Удаляем из запроса всё, что не нужно для ATTACH. create.attach = true; create.database.clear(); create.as_database.clear(); create.as_table.clear(); create.if_not_exists = false; create.is_populate = false; String engine = typeid_cast<ASTFunction &>(*create.storage).name; /// Для engine VIEW необходимо сохранить сам селект запрос, для остальных - наоборот if (engine != "View" && engine != "MaterializedView") create.select = nullptr; std::ostringstream statement_stream; formatAST(create, statement_stream, 0, false); statement_stream << '\n'; return statement_stream.str(); }
void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference & expression_info) { ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(ast.get()); if (!select) throw Exception("AnalyzeResultOfQuery::process was called for not a SELECT query", ErrorCodes::UNEXPECTED_AST_STRUCTURE); if (!select->select_expression_list) throw Exception("SELECT query doesn't have select_expression_list", ErrorCodes::UNEXPECTED_AST_STRUCTURE); processGroupByLikeList(select->group_expression_list, expression_info); processGroupByLikeList(select->limit_by_expression_list, expression_info); if (select->order_expression_list) { processOrderByList(select->order_expression_list, expression_info); /// ORDER BY could be completely eliminated if (select->order_expression_list->children.empty()) { select->children.erase(std::remove( select->children.begin(), select->children.end(), select->order_expression_list), select->children.end()); select->order_expression_list.reset(); } } }
int main(int argc, char ** argv) { using namespace DB; try { NamesAndTypesList names_and_types_list { {"WatchID", std::make_shared<DataTypeUInt64>()}, {"JavaEnable", std::make_shared<DataTypeUInt8>()}, {"Title", std::make_shared<DataTypeString>()}, {"EventTime", std::make_shared<DataTypeDateTime>()}, {"CounterID", std::make_shared<DataTypeUInt32>()}, {"ClientIP", std::make_shared<DataTypeUInt32>()}, {"RegionID", std::make_shared<DataTypeUInt32>()}, {"UniqID", std::make_shared<DataTypeUInt64>()}, {"CounterClass", std::make_shared<DataTypeUInt8>()}, {"OS", std::make_shared<DataTypeUInt8>()}, {"UserAgent", std::make_shared<DataTypeUInt8>()}, {"URL", std::make_shared<DataTypeString>()}, {"Referer", std::make_shared<DataTypeString>()}, {"ResolutionWidth", std::make_shared<DataTypeUInt16>()}, {"ResolutionHeight", std::make_shared<DataTypeUInt16>()}, {"ResolutionDepth", std::make_shared<DataTypeUInt8>()}, {"FlashMajor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor2", std::make_shared<DataTypeString>()}, {"NetMajor", std::make_shared<DataTypeUInt8>()}, {"NetMinor", std::make_shared<DataTypeUInt8>()}, {"UserAgentMajor", std::make_shared<DataTypeUInt16>()}, {"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)}, {"CookieEnable", std::make_shared<DataTypeUInt8>()}, {"JavascriptEnable", std::make_shared<DataTypeUInt8>()}, {"IsMobile", std::make_shared<DataTypeUInt8>()}, {"MobilePhone", std::make_shared<DataTypeUInt8>()}, {"MobilePhoneModel", std::make_shared<DataTypeString>()}, {"Params", std::make_shared<DataTypeString>()}, {"IPNetworkID", std::make_shared<DataTypeUInt32>()}, {"TraficSourceID", std::make_shared<DataTypeInt8>()}, {"SearchEngineID", std::make_shared<DataTypeUInt16>()}, {"SearchPhrase", std::make_shared<DataTypeString>()}, {"AdvEngineID", std::make_shared<DataTypeUInt8>()}, {"IsArtifical", std::make_shared<DataTypeUInt8>()}, {"WindowClientWidth", std::make_shared<DataTypeUInt16>()}, {"WindowClientHeight", std::make_shared<DataTypeUInt16>()}, {"ClientTimeZone", std::make_shared<DataTypeInt16>()}, {"ClientEventTime", std::make_shared<DataTypeDateTime>()}, {"SilverlightVersion1", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion2", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion3", std::make_shared<DataTypeUInt32>()}, {"SilverlightVersion4", std::make_shared<DataTypeUInt16>()}, {"PageCharset", std::make_shared<DataTypeString>()}, {"CodeVersion", std::make_shared<DataTypeUInt32>()}, {"IsLink", std::make_shared<DataTypeUInt8>()}, {"IsDownload", std::make_shared<DataTypeUInt8>()}, {"IsNotBounce", std::make_shared<DataTypeUInt8>()}, {"FUniqID", std::make_shared<DataTypeUInt64>()}, {"OriginalURL", std::make_shared<DataTypeString>()}, {"HID", std::make_shared<DataTypeUInt32>()}, {"IsOldCounter", std::make_shared<DataTypeUInt8>()}, {"IsEvent", std::make_shared<DataTypeUInt8>()}, {"IsParameter", std::make_shared<DataTypeUInt8>()}, {"DontCountHits", std::make_shared<DataTypeUInt8>()}, {"WithHash", std::make_shared<DataTypeUInt8>()}, }; Context context; std::string input = "SELECT UniqID, URL, CounterID, IsLink WHERE URL = 'http://mail.yandex.ru/neo2/#inbox'"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), ""); formatAST(*ast, std::cerr); std::cerr << std::endl; std::cerr << ast->getTreeID() << std::endl; /// create an object of an existing hit log table StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list)); /// read from it, apply the expression, filter, and write in tsv form to the console ExpressionAnalyzer analyzer(ast, context, nullptr, names_and_types_list); ExpressionActionsChain chain; analyzer.appendSelect(chain, false); analyzer.appendWhere(chain, false); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); Names column_names { "UniqID", "URL", "CounterID", "IsLink", }; QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0]; in = std::make_shared<FilterBlockInputStream>(in, expression, 4); //in = std::make_shared<LimitBlockInputStream>(in, 10, 0); WriteBufferFromOStream ob(std::cout); RowOutputStreamPtr out_ = std::make_shared<TabSeparatedRowOutputStream>(ob, expression->getSampleBlock()); BlockOutputStreamFromRowOutputStream out(out_); copyData(*in, out); } catch (const Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; return 1; } return 0; }
int main(int argc, char ** argv) try { using namespace DB; size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL; std::string input = "SELECT number, number % 3 == 1"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), ""); formatAST(*ast, std::cerr); std::cerr << std::endl; std::cerr << ast->getTreeID() << std::endl; Context context = Context::createGlobal(); ExpressionAnalyzer analyzer(ast, context, {}, {NameAndTypePair("number", std::make_shared<DataTypeUInt64>())}); ExpressionActionsChain chain; analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); StoragePtr table = StorageSystemNumbers::create("numbers", false); Names column_names; column_names.push_back("number"); QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, {}, context, stage, 8192, 1)[0]; in = std::make_shared<FilterBlockInputStream>(in, expression, 1); in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10)); WriteBufferFromOStream ob(std::cout); RowOutputStreamPtr out_ = std::make_shared<TabSeparatedRowOutputStream>(ob, expression->getSampleBlock()); BlockOutputStreamFromRowOutputStream out(out_); { Stopwatch stopwatch; stopwatch.start(); copyData(*in, out); stopwatch.stop(); std::cout << std::fixed << std::setprecision(2) << "Elapsed " << stopwatch.elapsedSeconds() << " sec." << ", " << n / stopwatch.elapsedSeconds() << " rows/sec." << std::endl; } return 0; } catch (const DB::Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; throw; }
void executeQuery( ReadBuffer & istr, WriteBuffer & ostr, bool allow_into_outfile, Context & context, std::function<void(const String &)> set_content_type) { PODArray<char> parse_buf; const char * begin; const char * end; /// If 'istr' is empty now, fetch next data into buffer. if (istr.buffer().size() == 0) istr.next(); size_t max_query_size = context.getSettingsRef().max_query_size; if (istr.buffer().end() - istr.position() >= static_cast<ssize_t>(max_query_size)) { /// If remaining buffer space in 'istr' is enough to parse query up to 'max_query_size' bytes, then parse inplace. begin = istr.position(); end = istr.buffer().end(); istr.position() += end - begin; } else { /// If not - copy enough data into 'parse_buf'. parse_buf.resize(max_query_size); parse_buf.resize(istr.read(&parse_buf[0], max_query_size)); begin = &parse_buf[0]; end = begin + parse_buf.size(); } ASTPtr ast; BlockIO streams; std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete); try { if (streams.out) { InputStreamFromASTInsertQuery in(ast, istr, streams, context); copyData(in, *streams.out); } if (streams.in) { const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()); WriteBuffer * out_buf = &ostr; std::experimental::optional<WriteBufferFromFile> out_file_buf; if (ast_query_with_output && ast_query_with_output->out_file) { if (!allow_into_outfile) throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED); const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>(); out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT); out_buf = &out_file_buf.value(); } String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) ? typeid_cast<const ASTIdentifier &>(*ast_query_with_output->format).name : context.getDefaultFormat(); BlockOutputStreamPtr out = context.getOutputFormat(format_name, *out_buf, streams.in_sample); if (auto stream = dynamic_cast<IProfilingBlockInputStream *>(streams.in.get())) { /// Save previous progress callback if any. TODO Do it more conveniently. auto previous_progress_callback = context.getProgressCallback(); /// NOTE Progress callback takes shared ownership of 'out'. stream->setProgressCallback([out, previous_progress_callback] (const Progress & progress) { if (previous_progress_callback) previous_progress_callback(progress); out->onProgress(progress); }); } if (set_content_type) set_content_type(out->getContentType()); copyData(*streams.in, *out); } } catch (...) { streams.onException(); throw; } streams.onFinish(); }
static std::tuple<ASTPtr, BlockIO> executeQueryImpl( IParser::Pos begin, IParser::Pos end, Context & context, bool internal, QueryProcessingStage::Enum stage) { ProfileEvents::increment(ProfileEvents::Query); time_t current_time = time(0); const Settings & settings = context.getSettingsRef(); ParserQuery parser; ASTPtr ast; size_t query_size; size_t max_query_size = settings.max_query_size; try { ast = parseQuery(parser, begin, end, ""); /// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion. query_size = ast->range.second - ast->range.first; if (max_query_size && query_size > max_query_size) throw Exception("Query is too large (" + toString(query_size) + ")." " max_query_size = " + toString(max_query_size), ErrorCodes::QUERY_IS_TOO_LARGE); } catch (...) { /// Anyway log query. if (!internal) { String query = String(begin, begin + std::min(end - begin, static_cast<ptrdiff_t>(max_query_size))); logQuery(query.substr(0, settings.log_queries_cut_to_length), context); onExceptionBeforeStart(query, context, current_time); } throw; } String query(begin, query_size); BlockIO res; try { if (!internal) logQuery(query.substr(0, settings.log_queries_cut_to_length), context); /// Check the limits. checkLimits(*ast, settings.limits); QuotaForIntervals & quota = context.getQuota(); quota.addQuery(); /// NOTE Seems that when new time interval has come, first query is not accounted in number of queries. quota.checkExceeded(current_time); /// Put query to process list. But don't put SHOW PROCESSLIST query itself. ProcessList::EntryPtr process_list_entry; if (!internal && nullptr == typeid_cast<const ASTShowProcesslistQuery *>(&*ast)) { process_list_entry = context.getProcessList().insert( query, ast.get(), context.getClientInfo(), settings); context.setProcessListElement(&process_list_entry->get()); } auto interpreter = InterpreterFactory::get(ast, context, stage); res = interpreter->execute(); /// Delayed initialization of query streams (required for KILL QUERY purposes) if (process_list_entry) (*process_list_entry)->setQueryStreams(res); /// Hold element of process list till end of query execution. res.process_list_entry = process_list_entry; if (res.in) { if (auto stream = dynamic_cast<IProfilingBlockInputStream *>(res.in.get())) { stream->setProgressCallback(context.getProgressCallback()); stream->setProcessListElement(context.getProcessListElement()); } } if (res.out) { if (auto stream = dynamic_cast<CountingBlockOutputStream *>(res.out.get())) { stream->setProcessListElement(context.getProcessListElement()); } } /// Everything related to query log. { QueryLogElement elem; elem.type = QueryLogElement::QUERY_START; elem.event_time = current_time; elem.query_start_time = current_time; elem.query = query.substr(0, settings.log_queries_cut_to_length); elem.client_info = context.getClientInfo(); bool log_queries = settings.log_queries && !internal; /// Log into system table start of query execution, if need. if (log_queries) context.getQueryLog().add(elem); /// Also make possible for caller to log successful query finish and exception during execution. res.finish_callback = [elem, &context, log_queries] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable { ProcessListElement * process_list_elem = context.getProcessListElement(); if (!process_list_elem) return; double elapsed_seconds = process_list_elem->watch.elapsedSeconds(); elem.type = QueryLogElement::QUERY_FINISH; elem.event_time = time(0); elem.query_duration_ms = elapsed_seconds * 1000; elem.read_rows = process_list_elem->progress_in.rows; elem.read_bytes = process_list_elem->progress_in.bytes; elem.written_rows = process_list_elem->progress_out.rows; elem.written_bytes = process_list_elem->progress_out.bytes; auto memory_usage = process_list_elem->memory_tracker.getPeak(); elem.memory_usage = memory_usage > 0 ? memory_usage : 0; if (stream_in) { if (auto profiling_stream = dynamic_cast<const IProfilingBlockInputStream *>(stream_in)) { const BlockStreamProfileInfo & info = profiling_stream->getProfileInfo(); /// NOTE: INSERT SELECT query contains zero metrics elem.result_rows = info.rows; elem.result_bytes = info.bytes; } } else if (stream_out) /// will be used only for ordinary INSERT queries { if (auto counting_stream = dynamic_cast<const CountingBlockOutputStream *>(stream_out)) { /// NOTE: Redundancy. The same values could be extracted from process_list_elem->progress_out. elem.result_rows = counting_stream->getProgress().rows; elem.result_bytes = counting_stream->getProgress().bytes; } } if (elem.read_rows != 0) { LOG_INFO(&Logger::get("executeQuery"), std::fixed << std::setprecision(3) << "Read " << elem.read_rows << " rows, " << formatReadableSizeWithBinarySuffix(elem.read_bytes) << " in " << elapsed_seconds << " sec., " << static_cast<size_t>(elem.read_rows / elapsed_seconds) << " rows/sec., " << formatReadableSizeWithBinarySuffix(elem.read_bytes / elapsed_seconds) << "/sec."); } if (log_queries) context.getQueryLog().add(elem); }; res.exception_callback = [elem, &context, log_queries] () mutable { context.getQuota().addError(); elem.type = QueryLogElement::EXCEPTION_WHILE_PROCESSING; elem.event_time = time(0); elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); elem.exception = getCurrentExceptionMessage(false); ProcessListElement * process_list_elem = context.getProcessListElement(); if (process_list_elem) { double elapsed_seconds = process_list_elem->watch.elapsedSeconds(); elem.query_duration_ms = elapsed_seconds * 1000; elem.read_rows = process_list_elem->progress_in.rows; elem.read_bytes = process_list_elem->progress_in.bytes; auto memory_usage = process_list_elem->memory_tracker.getPeak(); elem.memory_usage = memory_usage > 0 ? memory_usage : 0; } setExceptionStackTrace(elem); logException(context, elem); if (log_queries) context.getQueryLog().add(elem); }; if (!internal && res.in) { std::stringstream log_str; log_str << "Query pipeline:\n"; res.in->dumpTree(log_str); LOG_DEBUG(&Logger::get("executeQuery"), log_str.str()); } } } catch (...) { if (!internal) onExceptionBeforeStart(query, context, current_time); throw; } return std::make_tuple(ast, res); }
InterpreterSystemQuery::InterpreterSystemQuery(const ASTPtr & query_ptr_, Context & context_) : query_ptr(query_ptr_->clone()), context(context_), log(&Poco::Logger::get("InterpreterSystemQuery")) {}
void executeQuery( ReadBuffer & istr, WriteBuffer & ostr, Context & context, BlockInputStreamPtr & query_plan, std::function<void(const String &)> set_content_type) { PODArray<char> parse_buf; const char * begin; const char * end; /// If 'istr' is empty now, fetch next data into buffer. if (istr.buffer().size() == 0) istr.next(); size_t max_query_size = context.getSettingsRef().max_query_size; if (istr.buffer().end() - istr.position() >= static_cast<ssize_t>(max_query_size)) { /// If remaining buffer space in 'istr' is enough to parse query up to 'max_query_size' bytes, then parse inplace. begin = istr.position(); end = istr.buffer().end(); istr.position() += end - begin; } else { /// If not - copy enough data into 'parse_buf'. parse_buf.resize(max_query_size); parse_buf.resize(istr.read(&parse_buf[0], max_query_size)); begin = &parse_buf[0]; end = begin + parse_buf.size(); } ASTPtr ast; BlockIO streams; std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete); try { if (streams.out) { const ASTInsertQuery * ast_insert_query = dynamic_cast<const ASTInsertQuery *>(ast.get()); if (!ast_insert_query) throw Exception("Logical error: query requires data to insert, but it is not INSERT query", ErrorCodes::LOGICAL_ERROR); String format = ast_insert_query->format; if (format.empty()) format = "Values"; /// Data could be in parsed (ast_insert_query.data) and in not parsed yet (istr) part of query. ConcatReadBuffer::ReadBuffers buffers; ReadBuffer buf1(const_cast<char *>(ast_insert_query->data), ast_insert_query->data ? ast_insert_query->end - ast_insert_query->data : 0, 0); if (ast_insert_query->data) buffers.push_back(&buf1); buffers.push_back(&istr); /** NOTE Must not read from 'istr' before read all between 'ast_insert_query.data' and 'ast_insert_query.end'. * - because 'query.data' could refer to memory piece, used as buffer for 'istr'. */ ConcatReadBuffer data_istr(buffers); BlockInputStreamPtr in{ context.getInputFormat( format, data_istr, streams.out_sample, context.getSettings().max_insert_block_size)}; copyData(*in, *streams.out); } if (streams.in) { const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()); String format_name = ast_query_with_output && (ast_query_with_output->getFormat() != nullptr) ? typeid_cast<const ASTIdentifier &>(*ast_query_with_output->getFormat()).name : context.getDefaultFormat(); BlockOutputStreamPtr out = context.getOutputFormat(format_name, ostr, streams.in_sample); if (IProfilingBlockInputStream * stream = dynamic_cast<IProfilingBlockInputStream *>(streams.in.get())) { /// NOTE Progress callback takes shared ownership of 'out'. stream->setProgressCallback([out] (const Progress & progress) { out->onProgress(progress); }); } if (set_content_type) set_content_type(out->getContentType()); copyData(*streams.in, *out); } } catch (...) { streams.onException(); throw; } streams.onFinish(); }
void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { Context subquery_context = data.context; Settings subquery_settings = data.context.getSettings(); subquery_settings.max_result_rows = 1; subquery_settings.extremes = 0; subquery_context.setSettings(subquery_settings); ASTPtr subquery_select = subquery.children.at(0); BlockIO res = InterpreterSelectWithUnionQuery( subquery_select, subquery_context, {}, QueryProcessingStage::Complete, data.subquery_depth + 1).execute(); Block block; try { block = res.in->read(); if (!block) { /// Interpret subquery with empty result as Null literal auto ast_new = std::make_unique<ASTLiteral>(Null()); ast_new->setAlias(ast->tryGetAlias()); ast = std::move(ast_new); return; } if (block.rows() != 1 || res.in->read()) throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); } catch (const Exception & e) { if (e.code() == ErrorCodes::TOO_MANY_ROWS) throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); else throw; } size_t columns = block.columns(); if (columns == 1) { auto lit = std::make_unique<ASTLiteral>((*block.safeGetByPosition(0).column)[0]); lit->alias = subquery.alias; lit->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; ast = addTypeConversion(std::move(lit), block.safeGetByPosition(0).type->getName()); } else { auto tuple = std::make_shared<ASTFunction>(); tuple->alias = subquery.alias; ast = tuple; tuple->name = "tuple"; auto exp_list = std::make_shared<ASTExpressionList>(); tuple->arguments = exp_list; tuple->children.push_back(tuple->arguments); exp_list->children.resize(columns); for (size_t i = 0; i < columns; ++i) { exp_list->children[i] = addTypeConversion( std::make_unique<ASTLiteral>((*block.safeGetByPosition(i).column)[0]), block.safeGetByPosition(i).type->getName()); } } }