Ejemplo n.º 1
0
int main(int argc, char ** argv)
try
{
	using namespace DB;

	size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL;

	std::string input = "SELECT number, number / 3, number * number";

	ParserSelectQuery parser;
	ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "");

	Context context;

	ExpressionAnalyzer analyzer(ast, context, {}, {NameAndTypePair("number", std::make_shared<DataTypeUInt64>())});
	ExpressionActionsChain chain;
	analyzer.appendSelect(chain, false);
	analyzer.appendProjectResult(chain, false);
	chain.finalize();
	ExpressionActionsPtr expression = chain.getLastActions();

	StoragePtr table = StorageSystemNumbers::create("Numbers");

	Names column_names;
	column_names.push_back("number");

	QueryProcessingStage::Enum stage;

	BlockInputStreamPtr in;
	in = table->read(column_names, 0, context, Settings(), stage)[0];
	in = std::make_shared<ExpressionBlockInputStream>(in, expression);
	in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));

	WriteBufferFromOStream out1(std::cout);
	RowOutputStreamPtr out2 = std::make_shared<TabSeparatedRowOutputStream>(out1, expression->getSampleBlock());
	BlockOutputStreamFromRowOutputStream out(out2);

	{
		Stopwatch stopwatch;
		stopwatch.start();

		copyData(*in, out);

		stopwatch.stop();
		std::cout << std::fixed << std::setprecision(2)
			<< "Elapsed " << stopwatch.elapsedSeconds() << " sec."
			<< ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
			<< std::endl;
	}

	return 0;
}
catch (const DB::Exception & e)
{
	std::cerr << e.what() << ", " << e.displayText() << std::endl;
	throw;
}
Ejemplo n.º 2
0
bool filterBlockWithQuery(ASTPtr query, Block & block, const Context & context)
{
	query = query->clone();
	const ASTSelectQuery & select = typeid_cast<ASTSelectQuery & >(*query);
	if (!select.where_expression && !select.prewhere_expression)
		return false;

	NameSet columns;
	for (const auto & it : block.getColumnsList())
		columns.insert(it.name);

	/// Составим выражение, вычисляющее выражения в WHERE и PREWHERE, зависящие только от имеющихся столбцов.
	std::vector<ASTPtr> functions;
	if (select.where_expression)
		extractFunctions(select.where_expression, columns, functions);
	if (select.prewhere_expression)
		extractFunctions(select.prewhere_expression, columns, functions);
	ASTPtr expression_ast = buildWhereExpression(functions);
	if (!expression_ast)
		return false;

	/// Распарсим и вычислим выражение.
	ExpressionAnalyzer analyzer(expression_ast, context, {}, block.getColumnsList());
	ExpressionActionsPtr actions = analyzer.getActions(false);
	actions->execute(block);

	/// Отфильтруем блок.
	String filter_column_name = expression_ast->getColumnName();
	ColumnPtr filter_column = block.getByName(filter_column_name).column;
	if (auto converted = filter_column->convertToFullColumnIfConst())
		filter_column = converted;
	const IColumn::Filter & filter = dynamic_cast<ColumnUInt8 &>(*filter_column).getData();

	if (std::accumulate(filter.begin(), filter.end(), 0ul) == filter.size())
		return false;

	for (size_t i = 0; i < block.columns(); ++i)
	{
		ColumnPtr & column = block.safeGetByPosition(i).column;
		column = column->filter(filter, -1);
	}

	return true;
}
Ejemplo n.º 3
0
bool filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context)
{
    const ASTSelectQuery & select = typeid_cast<const ASTSelectQuery & >(*query);
    if (!select.where_expression && !select.prewhere_expression)
        return false;

    NameSet columns;
    for (const auto & it : block.getColumnsList())
        columns.insert(it.name);

    /// We will create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns.
    std::vector<ASTPtr> functions;
    if (select.where_expression)
        extractFunctions(select.where_expression, columns, functions);
    if (select.prewhere_expression)
        extractFunctions(select.prewhere_expression, columns, functions);
    ASTPtr expression_ast = buildWhereExpression(functions);
    if (!expression_ast)
        return false;

    /// Let's parse and calculate the expression.
    ExpressionAnalyzer analyzer(expression_ast, context, {}, block.getColumnsList());
    ExpressionActionsPtr actions = analyzer.getActions(false);
    actions->execute(block);

    /// Filter the block.
    String filter_column_name = expression_ast->getColumnName();
    ColumnPtr filter_column = block.getByName(filter_column_name).column;
    if (auto converted = filter_column->convertToFullColumnIfConst())
        filter_column = converted;
    const IColumn::Filter & filter = dynamic_cast<ColumnUInt8 &>(*filter_column).getData();

    if (countBytesInFilter(filter) == 0)
        return false;

    for (size_t i = 0; i < block.columns(); ++i)
    {
        ColumnPtr & column = block.safeGetByPosition(i).column;
        column = column->filter(filter, -1);
    }

    return true;
}
Ejemplo n.º 4
0
void StorageMerge::convertingSourceStream(const Block & header, const Context & context, ASTPtr & query,
                                          BlockInputStreamPtr & source_stream, QueryProcessingStage::Enum processed_stage)
{
    Block before_block_header = source_stream->getHeader();
    source_stream = std::make_shared<ConvertingBlockInputStream>(context, source_stream, header, ConvertingBlockInputStream::MatchColumnsMode::Name);

    ASTPtr where_expression = typeid_cast<ASTSelectQuery *>(query.get())->where_expression;

    if (!where_expression)
        return;

    for (size_t column_index : ext::range(0, header.columns()))
    {
        ColumnWithTypeAndName header_column = header.getByPosition(column_index);
        ColumnWithTypeAndName before_column = before_block_header.getByName(header_column.name);
        /// If the processed_stage greater than FetchColumns and the block structure between streams is different.
        /// the where expression maybe invalid because of convertingBlockInputStream.
        /// So we need to throw exception.
        if (!header_column.type->equals(*before_column.type.get()) && processed_stage > QueryProcessingStage::FetchColumns)
        {
            NamesAndTypesList source_columns = getSampleBlock().getNamesAndTypesList();
            NameAndTypePair virtual_column = getColumn("_table");
            source_columns.insert(source_columns.end(), virtual_column);
            auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns);
            ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false);
            Names required_columns = actions->getRequiredColumns();

            for (const auto & required_column : required_columns)
            {
                if (required_column == header_column.name)
                    throw Exception("Block structure mismatch in Merge Storage: different types:\n" + before_block_header.dumpStructure()
                                    + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);
            }
        }

    }
}
Ejemplo n.º 5
0
std::vector<std::size_t> MergeTreeReadPool::fillPerPartInfo(
	RangesInDataParts & parts, const ExpressionActionsPtr & prewhere_actions, const String & prewhere_column_name,
	const bool check_columns)
{
	std::vector<std::size_t> per_part_sum_marks;

	for (const auto i : ext::range(0, parts.size()))
	{
		auto & part = parts[i];

		/// Read marks for every data part.
		size_t sum_marks = 0;
		/// Ranges are in right-to-left order, due to 'reverse' in MergeTreeDataSelectExecutor.
		for (const auto & range : part.ranges)
			sum_marks += range.end - range.begin;

		per_part_sum_marks.push_back(sum_marks);

		per_part_columns_lock.push_back(std::make_unique<Poco::ScopedReadRWLock>(
			part.data_part->columns_lock));

		/// inject column names required for DEFAULT evaluation in current part
		auto required_column_names = column_names;

		const auto injected_columns = injectRequiredColumns(part.data_part, required_column_names);
		auto should_reoder = !injected_columns.empty();

		Names required_pre_column_names;

		if (prewhere_actions)
		{
			/// collect columns required for PREWHERE evaluation
			required_pre_column_names = prewhere_actions->getRequiredColumns();

			/// there must be at least one column required for PREWHERE
			if (required_pre_column_names.empty())
				required_pre_column_names.push_back(required_column_names[0]);

			/// PREWHERE columns may require some additional columns for DEFAULT evaluation
			const auto injected_pre_columns = injectRequiredColumns(part.data_part, required_pre_column_names);
			if (!injected_pre_columns.empty())
				should_reoder = true;

			/// will be used to distinguish between PREWHERE and WHERE columns when applying filter
			const NameSet pre_name_set{
				std::begin(required_pre_column_names), std::end(required_pre_column_names)
			};
			/** If expression in PREWHERE is not table column, then no need to return column with it to caller
				*	(because storage is expected only to read table columns).
				*/
			per_part_remove_prewhere_column.push_back(0 == pre_name_set.count(prewhere_column_name));

			Names post_column_names;
			for (const auto & name : required_column_names)
				if (!pre_name_set.count(name))
					post_column_names.push_back(name);

			required_column_names = post_column_names;
		}
		else
			per_part_remove_prewhere_column.push_back(false);

		per_part_column_name_set.emplace_back(std::begin(required_column_names), std::end(required_column_names));

		if (check_columns)
		{
			/** Under part->columns_lock check that all requested columns in part are of same type that in table.
				*	This could be violated during ALTER MODIFY.
				*/
			if (!required_pre_column_names.empty())
				data.check(part.data_part->columns, required_pre_column_names);
			if (!required_column_names.empty())
				data.check(part.data_part->columns, required_column_names);

			per_part_pre_columns.push_back(data.getColumnsList().addTypes(required_pre_column_names));
			per_part_columns.push_back(data.getColumnsList().addTypes(required_column_names));
		}
		else
		{
			per_part_pre_columns.push_back(part.data_part->columns.addTypes(required_pre_column_names));
			per_part_columns.push_back(part.data_part->columns.addTypes(required_column_names));
		}

		per_part_should_reorder.push_back(should_reoder);

		this->parts.push_back({ part.data_part, part.part_index_in_query });
	}

	return per_part_sum_marks;
}
Ejemplo n.º 6
0
int main(int argc, char ** argv)
{
    using namespace DB;

    try
    {
        NamesAndTypesList names_and_types_list
        {
            {"WatchID",                std::make_shared<DataTypeUInt64>()},
            {"JavaEnable",            std::make_shared<DataTypeUInt8>()},
            {"Title",                std::make_shared<DataTypeString>()},
            {"EventTime",            std::make_shared<DataTypeDateTime>()},
            {"CounterID",            std::make_shared<DataTypeUInt32>()},
            {"ClientIP",            std::make_shared<DataTypeUInt32>()},
            {"RegionID",            std::make_shared<DataTypeUInt32>()},
            {"UniqID",                std::make_shared<DataTypeUInt64>()},
            {"CounterClass",        std::make_shared<DataTypeUInt8>()},
            {"OS",                    std::make_shared<DataTypeUInt8>()},
            {"UserAgent",            std::make_shared<DataTypeUInt8>()},
            {"URL",                    std::make_shared<DataTypeString>()},
            {"Referer",                std::make_shared<DataTypeString>()},
            {"ResolutionWidth",        std::make_shared<DataTypeUInt16>()},
            {"ResolutionHeight",    std::make_shared<DataTypeUInt16>()},
            {"ResolutionDepth",        std::make_shared<DataTypeUInt8>()},
            {"FlashMajor",            std::make_shared<DataTypeUInt8>()},
            {"FlashMinor",            std::make_shared<DataTypeUInt8>()},
            {"FlashMinor2",            std::make_shared<DataTypeString>()},
            {"NetMajor",            std::make_shared<DataTypeUInt8>()},
            {"NetMinor",            std::make_shared<DataTypeUInt8>()},
            {"UserAgentMajor",        std::make_shared<DataTypeUInt16>()},
            {"UserAgentMinor",        std::make_shared<DataTypeFixedString>(2)},
            {"CookieEnable",        std::make_shared<DataTypeUInt8>()},
            {"JavascriptEnable",    std::make_shared<DataTypeUInt8>()},
            {"IsMobile",            std::make_shared<DataTypeUInt8>()},
            {"MobilePhone",            std::make_shared<DataTypeUInt8>()},
            {"MobilePhoneModel",    std::make_shared<DataTypeString>()},
            {"Params",                std::make_shared<DataTypeString>()},
            {"IPNetworkID",            std::make_shared<DataTypeUInt32>()},
            {"TraficSourceID",        std::make_shared<DataTypeInt8>()},
            {"SearchEngineID",        std::make_shared<DataTypeUInt16>()},
            {"SearchPhrase",        std::make_shared<DataTypeString>()},
            {"AdvEngineID",            std::make_shared<DataTypeUInt8>()},
            {"IsArtifical",            std::make_shared<DataTypeUInt8>()},
            {"WindowClientWidth",    std::make_shared<DataTypeUInt16>()},
            {"WindowClientHeight",    std::make_shared<DataTypeUInt16>()},
            {"ClientTimeZone",        std::make_shared<DataTypeInt16>()},
            {"ClientEventTime",        std::make_shared<DataTypeDateTime>()},
            {"SilverlightVersion1",    std::make_shared<DataTypeUInt8>()},
            {"SilverlightVersion2",    std::make_shared<DataTypeUInt8>()},
            {"SilverlightVersion3",    std::make_shared<DataTypeUInt32>()},
            {"SilverlightVersion4",    std::make_shared<DataTypeUInt16>()},
            {"PageCharset",            std::make_shared<DataTypeString>()},
            {"CodeVersion",            std::make_shared<DataTypeUInt32>()},
            {"IsLink",                std::make_shared<DataTypeUInt8>()},
            {"IsDownload",            std::make_shared<DataTypeUInt8>()},
            {"IsNotBounce",            std::make_shared<DataTypeUInt8>()},
            {"FUniqID",                std::make_shared<DataTypeUInt64>()},
            {"OriginalURL",            std::make_shared<DataTypeString>()},
            {"HID",                    std::make_shared<DataTypeUInt32>()},
            {"IsOldCounter",        std::make_shared<DataTypeUInt8>()},
            {"IsEvent",                std::make_shared<DataTypeUInt8>()},
            {"IsParameter",            std::make_shared<DataTypeUInt8>()},
            {"DontCountHits",        std::make_shared<DataTypeUInt8>()},
            {"WithHash",            std::make_shared<DataTypeUInt8>()},
        };

        Context context;

        std::string input = "SELECT UniqID, URL, CounterID, IsLink WHERE URL = 'http://mail.yandex.ru/neo2/#inbox'";
        ParserSelectQuery parser;
        ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "");

        formatAST(*ast, std::cerr);
        std::cerr << std::endl;
        std::cerr << ast->getTreeID() << std::endl;

        /// create an object of an existing hit log table

        StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list));

        /// read from it, apply the expression, filter, and write in tsv form to the console

        ExpressionAnalyzer analyzer(ast, context, nullptr, names_and_types_list);
        ExpressionActionsChain chain;
        analyzer.appendSelect(chain, false);
        analyzer.appendWhere(chain, false);
        chain.finalize();
        ExpressionActionsPtr expression = chain.getLastActions();

        Names column_names
        {
            "UniqID",
            "URL",
            "CounterID",
            "IsLink",
        };

        QueryProcessingStage::Enum stage;

        BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0];
        in = std::make_shared<FilterBlockInputStream>(in, expression, 4);
        //in = std::make_shared<LimitBlockInputStream>(in, 10, 0);

        WriteBufferFromOStream ob(std::cout);
        RowOutputStreamPtr out_ = std::make_shared<TabSeparatedRowOutputStream>(ob, expression->getSampleBlock());
        BlockOutputStreamFromRowOutputStream out(out_);

        copyData(*in, out);
    }
    catch (const Exception & e)
    {
        std::cerr << e.what() << ", " << e.displayText() << std::endl;
        return 1;
    }

    return 0;
}
Ejemplo n.º 7
0
int main(int argc, char ** argv)
{
    using namespace DB;

    try
    {
        std::string input = "SELECT x, s1, s2, "
            "/*"
            "2 + x * 2, x * 2, x % 3 == 1, "
            "s1 == 'abc', s1 == s2, s1 != 'abc', s1 != s2, "
            "s1 <  'abc', s1 <  s2, s1 >  'abc', s1 >  s2, "
            "s1 <= 'abc', s1 <= s2, s1 >= 'abc', s1 >= s2, "
            "*/"
            "s1 < s2 AND x % 3 < x % 5";

        ParserSelectQuery parser;
        ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0);

        formatAST(*ast, std::cerr);
        std::cerr << std::endl;

        Context context = Context::createGlobal();
        NamesAndTypesList columns
        {
            {"x", std::make_shared<DataTypeInt16>()},
            {"s1", std::make_shared<DataTypeString>()},
            {"s2", std::make_shared<DataTypeString>()}
        };

        auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, columns);
        ExpressionAnalyzer analyzer(ast, syntax_result, context);
        ExpressionActionsChain chain(context);
        analyzer.appendSelect(chain, false);
        analyzer.appendProjectResult(chain);
        chain.finalize();
        ExpressionActionsPtr expression = chain.getLastActions();

        size_t n = argc == 2 ? atoi(argv[1]) : 10;

        Block block;

        {
            ColumnWithTypeAndName column;
            column.name = "x";
            column.type = std::make_shared<DataTypeInt16>();
            auto col = ColumnInt16::create();
            auto & vec_x = col->getData();

            vec_x.resize(n);
            for (size_t i = 0; i < n; ++i)
                vec_x[i] = i % 9;

            column.column = std::move(col);
            block.insert(column);
        }

        const char * strings[] = {"abc", "def", "abcd", "defg", "ac"};

        {
            ColumnWithTypeAndName column;
            column.name = "s1";
            column.type = std::make_shared<DataTypeString>();
            auto col = ColumnString::create();

            for (size_t i = 0; i < n; ++i)
                col->insert(std::string(strings[i % 5]));

            column.column = std::move(col);
            block.insert(column);
        }

        {
            ColumnWithTypeAndName column;
            column.name = "s2";
            column.type = std::make_shared<DataTypeString>();
            auto col = ColumnString::create();

            for (size_t i = 0; i < n; ++i)
                col->insert(std::string(strings[i % 3]));

            column.column = std::move(col);
            block.insert(column);
        }

        {
            Stopwatch stopwatch;
            stopwatch.start();

            expression->execute(block);

            stopwatch.stop();
            std::cout << std::fixed << std::setprecision(2)
                << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
                << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
                << std::endl;
        }

        auto is = std::make_shared<OneBlockInputStream>(block);
        LimitBlockInputStream lis(is, 20, std::max(0, static_cast<int>(n) - 20));
        WriteBufferFromOStream out_buf(std::cout);
        BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out_buf, block, context);

        copyData(lis, *out);
    }
    catch (const Exception & e)
    {
        std::cerr << e.displayText() << std::endl;
    }

    return 0;
}
Ejemplo n.º 8
0
int main(int argc, char ** argv)
try
{
    using namespace DB;

    std::string input = "SELECT number, number % 10000000 == 1";

    ParserSelectQuery parser;
    ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "");

    formatAST(*ast, std::cerr);
    std::cerr << std::endl;

    Context context;

    ExpressionAnalyzer analyzer(ast, context, {}, {NameAndTypePair("number", std::make_shared<DataTypeUInt64>())});
    ExpressionActionsChain chain;
    analyzer.appendSelect(chain, false);
    analyzer.appendProjectResult(chain, false);
    chain.finalize();
    ExpressionActionsPtr expression = chain.getLastActions();

    StoragePtr table = StorageSystemNumbers::create("Numbers");

    Names column_names;
    column_names.push_back("number");

    QueryProcessingStage::Enum stage;

    BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0];

    ForkBlockInputStreams fork(in);

    BlockInputStreamPtr in1 = fork.createInput();
    BlockInputStreamPtr in2 = fork.createInput();

    in1 = std::make_shared<FilterBlockInputStream>(in1, expression, 1);
    in1 = std::make_shared<LimitBlockInputStream>(in1, 10, 0);

    in2 = std::make_shared<FilterBlockInputStream>(in2, expression, 1);
    in2 = std::make_shared<LimitBlockInputStream>(in2, 20, 5);

    Block out_sample = expression->getSampleBlock();

    WriteBufferFromOStream ob1(std::cout);
    WriteBufferFromOStream ob2(std::cerr);

    BlockOutputStreamPtr out1 = context.getOutputFormat("TabSeparated", ob1, out_sample);
    BlockOutputStreamPtr out2 = context.getOutputFormat("TabSeparated", ob2, out_sample);

    std::thread thr1(std::bind(thread1, in1, out1, std::ref(ob1)));
    std::thread thr2(std::bind(thread2, in2, out2, std::ref(ob2)));

    fork.run();

    thr1.join();
    thr2.join();

    return 0;
}
catch (const DB::Exception & e)
{
    std::cerr << e.what() << ", " << e.displayText() << std::endl;
    throw;
}