int main(int argc, char ** argv) try { using namespace DB; size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL; std::string input = "SELECT number, number / 3, number * number"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), ""); Context context; ExpressionAnalyzer analyzer(ast, context, {}, {NameAndTypePair("number", std::make_shared<DataTypeUInt64>())}); ExpressionActionsChain chain; analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain, false); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); StoragePtr table = StorageSystemNumbers::create("Numbers"); Names column_names; column_names.push_back("number"); QueryProcessingStage::Enum stage; BlockInputStreamPtr in; in = table->read(column_names, 0, context, Settings(), stage)[0]; in = std::make_shared<ExpressionBlockInputStream>(in, expression); in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10)); WriteBufferFromOStream out1(std::cout); RowOutputStreamPtr out2 = std::make_shared<TabSeparatedRowOutputStream>(out1, expression->getSampleBlock()); BlockOutputStreamFromRowOutputStream out(out2); { Stopwatch stopwatch; stopwatch.start(); copyData(*in, out); stopwatch.stop(); std::cout << std::fixed << std::setprecision(2) << "Elapsed " << stopwatch.elapsedSeconds() << " sec." << ", " << n / stopwatch.elapsedSeconds() << " rows/sec." << std::endl; } return 0; } catch (const DB::Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; throw; }
bool filterBlockWithQuery(ASTPtr query, Block & block, const Context & context) { query = query->clone(); const ASTSelectQuery & select = typeid_cast<ASTSelectQuery & >(*query); if (!select.where_expression && !select.prewhere_expression) return false; NameSet columns; for (const auto & it : block.getColumnsList()) columns.insert(it.name); /// Составим выражение, вычисляющее выражения в WHERE и PREWHERE, зависящие только от имеющихся столбцов. std::vector<ASTPtr> functions; if (select.where_expression) extractFunctions(select.where_expression, columns, functions); if (select.prewhere_expression) extractFunctions(select.prewhere_expression, columns, functions); ASTPtr expression_ast = buildWhereExpression(functions); if (!expression_ast) return false; /// Распарсим и вычислим выражение. ExpressionAnalyzer analyzer(expression_ast, context, {}, block.getColumnsList()); ExpressionActionsPtr actions = analyzer.getActions(false); actions->execute(block); /// Отфильтруем блок. String filter_column_name = expression_ast->getColumnName(); ColumnPtr filter_column = block.getByName(filter_column_name).column; if (auto converted = filter_column->convertToFullColumnIfConst()) filter_column = converted; const IColumn::Filter & filter = dynamic_cast<ColumnUInt8 &>(*filter_column).getData(); if (std::accumulate(filter.begin(), filter.end(), 0ul) == filter.size()) return false; for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.safeGetByPosition(i).column; column = column->filter(filter, -1); } return true; }
bool filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context) { const ASTSelectQuery & select = typeid_cast<const ASTSelectQuery & >(*query); if (!select.where_expression && !select.prewhere_expression) return false; NameSet columns; for (const auto & it : block.getColumnsList()) columns.insert(it.name); /// We will create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. std::vector<ASTPtr> functions; if (select.where_expression) extractFunctions(select.where_expression, columns, functions); if (select.prewhere_expression) extractFunctions(select.prewhere_expression, columns, functions); ASTPtr expression_ast = buildWhereExpression(functions); if (!expression_ast) return false; /// Let's parse and calculate the expression. ExpressionAnalyzer analyzer(expression_ast, context, {}, block.getColumnsList()); ExpressionActionsPtr actions = analyzer.getActions(false); actions->execute(block); /// Filter the block. String filter_column_name = expression_ast->getColumnName(); ColumnPtr filter_column = block.getByName(filter_column_name).column; if (auto converted = filter_column->convertToFullColumnIfConst()) filter_column = converted; const IColumn::Filter & filter = dynamic_cast<ColumnUInt8 &>(*filter_column).getData(); if (countBytesInFilter(filter) == 0) return false; for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.safeGetByPosition(i).column; column = column->filter(filter, -1); } return true; }
void StorageMerge::convertingSourceStream(const Block & header, const Context & context, ASTPtr & query, BlockInputStreamPtr & source_stream, QueryProcessingStage::Enum processed_stage) { Block before_block_header = source_stream->getHeader(); source_stream = std::make_shared<ConvertingBlockInputStream>(context, source_stream, header, ConvertingBlockInputStream::MatchColumnsMode::Name); ASTPtr where_expression = typeid_cast<ASTSelectQuery *>(query.get())->where_expression; if (!where_expression) return; for (size_t column_index : ext::range(0, header.columns())) { ColumnWithTypeAndName header_column = header.getByPosition(column_index); ColumnWithTypeAndName before_column = before_block_header.getByName(header_column.name); /// If the processed_stage greater than FetchColumns and the block structure between streams is different. /// the where expression maybe invalid because of convertingBlockInputStream. /// So we need to throw exception. if (!header_column.type->equals(*before_column.type.get()) && processed_stage > QueryProcessingStage::FetchColumns) { NamesAndTypesList source_columns = getSampleBlock().getNamesAndTypesList(); NameAndTypePair virtual_column = getColumn("_table"); source_columns.insert(source_columns.end(), virtual_column); auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns); ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false); Names required_columns = actions->getRequiredColumns(); for (const auto & required_column : required_columns) { if (required_column == header_column.name) throw Exception("Block structure mismatch in Merge Storage: different types:\n" + before_block_header.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); } } } }
std::vector<std::size_t> MergeTreeReadPool::fillPerPartInfo( RangesInDataParts & parts, const ExpressionActionsPtr & prewhere_actions, const String & prewhere_column_name, const bool check_columns) { std::vector<std::size_t> per_part_sum_marks; for (const auto i : ext::range(0, parts.size())) { auto & part = parts[i]; /// Read marks for every data part. size_t sum_marks = 0; /// Ranges are in right-to-left order, due to 'reverse' in MergeTreeDataSelectExecutor. for (const auto & range : part.ranges) sum_marks += range.end - range.begin; per_part_sum_marks.push_back(sum_marks); per_part_columns_lock.push_back(std::make_unique<Poco::ScopedReadRWLock>( part.data_part->columns_lock)); /// inject column names required for DEFAULT evaluation in current part auto required_column_names = column_names; const auto injected_columns = injectRequiredColumns(part.data_part, required_column_names); auto should_reoder = !injected_columns.empty(); Names required_pre_column_names; if (prewhere_actions) { /// collect columns required for PREWHERE evaluation required_pre_column_names = prewhere_actions->getRequiredColumns(); /// there must be at least one column required for PREWHERE if (required_pre_column_names.empty()) required_pre_column_names.push_back(required_column_names[0]); /// PREWHERE columns may require some additional columns for DEFAULT evaluation const auto injected_pre_columns = injectRequiredColumns(part.data_part, required_pre_column_names); if (!injected_pre_columns.empty()) should_reoder = true; /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const NameSet pre_name_set{ std::begin(required_pre_column_names), std::end(required_pre_column_names) }; /** If expression in PREWHERE is not table column, then no need to return column with it to caller * (because storage is expected only to read table columns). */ per_part_remove_prewhere_column.push_back(0 == pre_name_set.count(prewhere_column_name)); Names post_column_names; for (const auto & name : required_column_names) if (!pre_name_set.count(name)) post_column_names.push_back(name); required_column_names = post_column_names; } else per_part_remove_prewhere_column.push_back(false); per_part_column_name_set.emplace_back(std::begin(required_column_names), std::end(required_column_names)); if (check_columns) { /** Under part->columns_lock check that all requested columns in part are of same type that in table. * This could be violated during ALTER MODIFY. */ if (!required_pre_column_names.empty()) data.check(part.data_part->columns, required_pre_column_names); if (!required_column_names.empty()) data.check(part.data_part->columns, required_column_names); per_part_pre_columns.push_back(data.getColumnsList().addTypes(required_pre_column_names)); per_part_columns.push_back(data.getColumnsList().addTypes(required_column_names)); } else { per_part_pre_columns.push_back(part.data_part->columns.addTypes(required_pre_column_names)); per_part_columns.push_back(part.data_part->columns.addTypes(required_column_names)); } per_part_should_reorder.push_back(should_reoder); this->parts.push_back({ part.data_part, part.part_index_in_query }); } return per_part_sum_marks; }
int main(int argc, char ** argv) { using namespace DB; try { NamesAndTypesList names_and_types_list { {"WatchID", std::make_shared<DataTypeUInt64>()}, {"JavaEnable", std::make_shared<DataTypeUInt8>()}, {"Title", std::make_shared<DataTypeString>()}, {"EventTime", std::make_shared<DataTypeDateTime>()}, {"CounterID", std::make_shared<DataTypeUInt32>()}, {"ClientIP", std::make_shared<DataTypeUInt32>()}, {"RegionID", std::make_shared<DataTypeUInt32>()}, {"UniqID", std::make_shared<DataTypeUInt64>()}, {"CounterClass", std::make_shared<DataTypeUInt8>()}, {"OS", std::make_shared<DataTypeUInt8>()}, {"UserAgent", std::make_shared<DataTypeUInt8>()}, {"URL", std::make_shared<DataTypeString>()}, {"Referer", std::make_shared<DataTypeString>()}, {"ResolutionWidth", std::make_shared<DataTypeUInt16>()}, {"ResolutionHeight", std::make_shared<DataTypeUInt16>()}, {"ResolutionDepth", std::make_shared<DataTypeUInt8>()}, {"FlashMajor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor2", std::make_shared<DataTypeString>()}, {"NetMajor", std::make_shared<DataTypeUInt8>()}, {"NetMinor", std::make_shared<DataTypeUInt8>()}, {"UserAgentMajor", std::make_shared<DataTypeUInt16>()}, {"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)}, {"CookieEnable", std::make_shared<DataTypeUInt8>()}, {"JavascriptEnable", std::make_shared<DataTypeUInt8>()}, {"IsMobile", std::make_shared<DataTypeUInt8>()}, {"MobilePhone", std::make_shared<DataTypeUInt8>()}, {"MobilePhoneModel", std::make_shared<DataTypeString>()}, {"Params", std::make_shared<DataTypeString>()}, {"IPNetworkID", std::make_shared<DataTypeUInt32>()}, {"TraficSourceID", std::make_shared<DataTypeInt8>()}, {"SearchEngineID", std::make_shared<DataTypeUInt16>()}, {"SearchPhrase", std::make_shared<DataTypeString>()}, {"AdvEngineID", std::make_shared<DataTypeUInt8>()}, {"IsArtifical", std::make_shared<DataTypeUInt8>()}, {"WindowClientWidth", std::make_shared<DataTypeUInt16>()}, {"WindowClientHeight", std::make_shared<DataTypeUInt16>()}, {"ClientTimeZone", std::make_shared<DataTypeInt16>()}, {"ClientEventTime", std::make_shared<DataTypeDateTime>()}, {"SilverlightVersion1", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion2", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion3", std::make_shared<DataTypeUInt32>()}, {"SilverlightVersion4", std::make_shared<DataTypeUInt16>()}, {"PageCharset", std::make_shared<DataTypeString>()}, {"CodeVersion", std::make_shared<DataTypeUInt32>()}, {"IsLink", std::make_shared<DataTypeUInt8>()}, {"IsDownload", std::make_shared<DataTypeUInt8>()}, {"IsNotBounce", std::make_shared<DataTypeUInt8>()}, {"FUniqID", std::make_shared<DataTypeUInt64>()}, {"OriginalURL", std::make_shared<DataTypeString>()}, {"HID", std::make_shared<DataTypeUInt32>()}, {"IsOldCounter", std::make_shared<DataTypeUInt8>()}, {"IsEvent", std::make_shared<DataTypeUInt8>()}, {"IsParameter", std::make_shared<DataTypeUInt8>()}, {"DontCountHits", std::make_shared<DataTypeUInt8>()}, {"WithHash", std::make_shared<DataTypeUInt8>()}, }; Context context; std::string input = "SELECT UniqID, URL, CounterID, IsLink WHERE URL = 'http://mail.yandex.ru/neo2/#inbox'"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), ""); formatAST(*ast, std::cerr); std::cerr << std::endl; std::cerr << ast->getTreeID() << std::endl; /// create an object of an existing hit log table StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list)); /// read from it, apply the expression, filter, and write in tsv form to the console ExpressionAnalyzer analyzer(ast, context, nullptr, names_and_types_list); ExpressionActionsChain chain; analyzer.appendSelect(chain, false); analyzer.appendWhere(chain, false); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); Names column_names { "UniqID", "URL", "CounterID", "IsLink", }; QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0]; in = std::make_shared<FilterBlockInputStream>(in, expression, 4); //in = std::make_shared<LimitBlockInputStream>(in, 10, 0); WriteBufferFromOStream ob(std::cout); RowOutputStreamPtr out_ = std::make_shared<TabSeparatedRowOutputStream>(ob, expression->getSampleBlock()); BlockOutputStreamFromRowOutputStream out(out_); copyData(*in, out); } catch (const Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; return 1; } return 0; }
int main(int argc, char ** argv) { using namespace DB; try { std::string input = "SELECT x, s1, s2, " "/*" "2 + x * 2, x * 2, x % 3 == 1, " "s1 == 'abc', s1 == s2, s1 != 'abc', s1 != s2, " "s1 < 'abc', s1 < s2, s1 > 'abc', s1 > s2, " "s1 <= 'abc', s1 <= s2, s1 >= 'abc', s1 >= s2, " "*/" "s1 < s2 AND x % 3 < x % 5"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); formatAST(*ast, std::cerr); std::cerr << std::endl; Context context = Context::createGlobal(); NamesAndTypesList columns { {"x", std::make_shared<DataTypeInt16>()}, {"s1", std::make_shared<DataTypeString>()}, {"s2", std::make_shared<DataTypeString>()} }; auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, columns); ExpressionAnalyzer analyzer(ast, syntax_result, context); ExpressionActionsChain chain(context); analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); size_t n = argc == 2 ? atoi(argv[1]) : 10; Block block; { ColumnWithTypeAndName column; column.name = "x"; column.type = std::make_shared<DataTypeInt16>(); auto col = ColumnInt16::create(); auto & vec_x = col->getData(); vec_x.resize(n); for (size_t i = 0; i < n; ++i) vec_x[i] = i % 9; column.column = std::move(col); block.insert(column); } const char * strings[] = {"abc", "def", "abcd", "defg", "ac"}; { ColumnWithTypeAndName column; column.name = "s1"; column.type = std::make_shared<DataTypeString>(); auto col = ColumnString::create(); for (size_t i = 0; i < n; ++i) col->insert(std::string(strings[i % 5])); column.column = std::move(col); block.insert(column); } { ColumnWithTypeAndName column; column.name = "s2"; column.type = std::make_shared<DataTypeString>(); auto col = ColumnString::create(); for (size_t i = 0; i < n; ++i) col->insert(std::string(strings[i % 3])); column.column = std::move(col); block.insert(column); } { Stopwatch stopwatch; stopwatch.start(); expression->execute(block); stopwatch.stop(); std::cout << std::fixed << std::setprecision(2) << "Elapsed " << stopwatch.elapsedSeconds() << " sec." << ", " << n / stopwatch.elapsedSeconds() << " rows/sec." << std::endl; } auto is = std::make_shared<OneBlockInputStream>(block); LimitBlockInputStream lis(is, 20, std::max(0, static_cast<int>(n) - 20)); WriteBufferFromOStream out_buf(std::cout); BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out_buf, block, context); copyData(lis, *out); } catch (const Exception & e) { std::cerr << e.displayText() << std::endl; } return 0; }
int main(int argc, char ** argv) try { using namespace DB; std::string input = "SELECT number, number % 10000000 == 1"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), ""); formatAST(*ast, std::cerr); std::cerr << std::endl; Context context; ExpressionAnalyzer analyzer(ast, context, {}, {NameAndTypePair("number", std::make_shared<DataTypeUInt64>())}); ExpressionActionsChain chain; analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain, false); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); StoragePtr table = StorageSystemNumbers::create("Numbers"); Names column_names; column_names.push_back("number"); QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0]; ForkBlockInputStreams fork(in); BlockInputStreamPtr in1 = fork.createInput(); BlockInputStreamPtr in2 = fork.createInput(); in1 = std::make_shared<FilterBlockInputStream>(in1, expression, 1); in1 = std::make_shared<LimitBlockInputStream>(in1, 10, 0); in2 = std::make_shared<FilterBlockInputStream>(in2, expression, 1); in2 = std::make_shared<LimitBlockInputStream>(in2, 20, 5); Block out_sample = expression->getSampleBlock(); WriteBufferFromOStream ob1(std::cout); WriteBufferFromOStream ob2(std::cerr); BlockOutputStreamPtr out1 = context.getOutputFormat("TabSeparated", ob1, out_sample); BlockOutputStreamPtr out2 = context.getOutputFormat("TabSeparated", ob2, out_sample); std::thread thr1(std::bind(thread1, in1, out1, std::ref(ob1))); std::thread thr2(std::bind(thread2, in2, out2, std::ref(ob2))); fork.run(); thr1.join(); thr2.join(); return 0; } catch (const DB::Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; throw; }