コード例 #1
0
void IndexForNativeFormat::read(ReadBuffer & istr, const NameSet & required_columns)
{
	while (!istr.eof())
	{
		blocks.emplace_back();
		IndexOfBlockForNativeFormat & block = blocks.back();

		readVarUInt(block.num_columns, istr);
		readVarUInt(block.num_rows, istr);

		if (block.num_columns < required_columns.size())
			throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX);

		for (size_t i = 0; i < block.num_columns; ++i)
		{
			IndexOfOneColumnForNativeFormat column_index;

			readBinary(column_index.name, istr);
			readBinary(column_index.type, istr);
			readBinary(column_index.location.offset_in_compressed_file, istr);
			readBinary(column_index.location.offset_in_decompressed_block, istr);

			if (required_columns.count(column_index.name))
				block.columns.push_back(std::move(column_index));
		}

		if (block.columns.size() < required_columns.size())
			throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX);
		if (block.columns.size() > required_columns.size())
			throw Exception("Index contain duplicate columns", ErrorCodes::INCORRECT_INDEX);

		block.num_columns = block.columns.size();
	}
}
コード例 #2
0
NamesAndTypesList NamesAndTypesList::filter(const NameSet & names) const
{
    NamesAndTypesList res;
    for (const NameAndTypePair & column : *this)
    {
        if (names.count(column.name))
            res.push_back(column);
    }
    return res;
}
コード例 #3
0
/// Verifying that the function depends only on the specified columns
static bool isValidFunction(ASTPtr expression, const NameSet & columns)
{
    for (size_t i = 0; i < expression->children.size(); ++i)
        if (!isValidFunction(expression->children[i], columns))
            return false;

    if (const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(&*expression))
    {
        if (identifier->kind == ASTIdentifier::Kind::Column)
            return columns.count(identifier->name);
    }
    return true;
}
コード例 #4
0
std::vector<std::size_t> MergeTreeReadPool::fillPerPartInfo(
	RangesInDataParts & parts, const ExpressionActionsPtr & prewhere_actions, const String & prewhere_column_name,
	const bool check_columns)
{
	std::vector<std::size_t> per_part_sum_marks;

	for (const auto i : ext::range(0, parts.size()))
	{
		auto & part = parts[i];

		/// Read marks for every data part.
		size_t sum_marks = 0;
		/// Ranges are in right-to-left order, due to 'reverse' in MergeTreeDataSelectExecutor.
		for (const auto & range : part.ranges)
			sum_marks += range.end - range.begin;

		per_part_sum_marks.push_back(sum_marks);

		per_part_columns_lock.push_back(std::make_unique<Poco::ScopedReadRWLock>(
			part.data_part->columns_lock));

		/// inject column names required for DEFAULT evaluation in current part
		auto required_column_names = column_names;

		const auto injected_columns = injectRequiredColumns(part.data_part, required_column_names);
		auto should_reoder = !injected_columns.empty();

		Names required_pre_column_names;

		if (prewhere_actions)
		{
			/// collect columns required for PREWHERE evaluation
			required_pre_column_names = prewhere_actions->getRequiredColumns();

			/// there must be at least one column required for PREWHERE
			if (required_pre_column_names.empty())
				required_pre_column_names.push_back(required_column_names[0]);

			/// PREWHERE columns may require some additional columns for DEFAULT evaluation
			const auto injected_pre_columns = injectRequiredColumns(part.data_part, required_pre_column_names);
			if (!injected_pre_columns.empty())
				should_reoder = true;

			/// will be used to distinguish between PREWHERE and WHERE columns when applying filter
			const NameSet pre_name_set{
				std::begin(required_pre_column_names), std::end(required_pre_column_names)
			};
			/** If expression in PREWHERE is not table column, then no need to return column with it to caller
				*	(because storage is expected only to read table columns).
				*/
			per_part_remove_prewhere_column.push_back(0 == pre_name_set.count(prewhere_column_name));

			Names post_column_names;
			for (const auto & name : required_column_names)
				if (!pre_name_set.count(name))
					post_column_names.push_back(name);

			required_column_names = post_column_names;
		}
		else
			per_part_remove_prewhere_column.push_back(false);

		per_part_column_name_set.emplace_back(std::begin(required_column_names), std::end(required_column_names));

		if (check_columns)
		{
			/** Under part->columns_lock check that all requested columns in part are of same type that in table.
				*	This could be violated during ALTER MODIFY.
				*/
			if (!required_pre_column_names.empty())
				data.check(part.data_part->columns, required_pre_column_names);
			if (!required_column_names.empty())
				data.check(part.data_part->columns, required_column_names);

			per_part_pre_columns.push_back(data.getColumnsList().addTypes(required_pre_column_names));
			per_part_columns.push_back(data.getColumnsList().addTypes(required_column_names));
		}
		else
		{
			per_part_pre_columns.push_back(part.data_part->columns.addTypes(required_pre_column_names));
			per_part_columns.push_back(part.data_part->columns.addTypes(required_column_names));
		}

		per_part_should_reorder.push_back(should_reoder);

		this->parts.push_back({ part.data_part, part.part_index_in_query });
	}

	return per_part_sum_marks;
}
コード例 #5
0
bool MergeTreeBlockInputStream::getNewTask()
try
{
    /// Produce only one task
    if (!is_first_task)
    {
        finish();
        return false;
    }
    is_first_task = false;

    Names pre_column_names, column_names = ordered_names;
    bool remove_prewhere_column = false;

    /// inject columns required for defaults evaluation
    bool should_reorder = !injectRequiredColumns(storage, data_part, column_names).empty();

    if (prewhere_actions)
    {
        pre_column_names = prewhere_actions->getRequiredColumns();

        if (pre_column_names.empty())
            pre_column_names.push_back(column_names[0]);

        const auto injected_pre_columns = injectRequiredColumns(storage, data_part, pre_column_names);
        if (!injected_pre_columns.empty())
            should_reorder = true;

        const NameSet pre_name_set(pre_column_names.begin(), pre_column_names.end());
        /// If the expression in PREWHERE is not a column of the table, you do not need to output a column with it
        ///  (from storage expect to receive only the columns of the table).
        remove_prewhere_column = !pre_name_set.count(prewhere_column);

        Names post_column_names;
        for (const auto & name : column_names)
            if (!pre_name_set.count(name))
                post_column_names.push_back(name);

        column_names = post_column_names;
    }

    /// will be used to distinguish between PREWHERE and WHERE columns when applying filter
    column_name_set = NameSet{column_names.begin(), column_names.end()};

    if (check_columns)
    {
        /// Under owned_data_part->columns_lock we check that all requested columns are of the same type as in the table.
        /// This may be not true in case of ALTER MODIFY.
        if (!pre_column_names.empty())
            storage.check(data_part->columns, pre_column_names);
        if (!column_names.empty())
            storage.check(data_part->columns, column_names);

        pre_columns = storage.getColumnsList().addTypes(pre_column_names);
        columns = storage.getColumnsList().addTypes(column_names);
    }
    else
    {
        pre_columns = data_part->columns.addTypes(pre_column_names);
        columns = data_part->columns.addTypes(column_names);
    }

    /** @note you could simply swap `reverse` in if and else branches of MergeTreeDataSelectExecutor,
     * and remove this reverse. */
    MarkRanges remaining_mark_ranges = all_mark_ranges;
    std::reverse(remaining_mark_ranges.begin(), remaining_mark_ranges.end());

    auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
                          : std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, data_part->storage.getSampleBlock());

    task = std::make_unique<MergeTreeReadTask>(data_part, remaining_mark_ranges, part_index_in_query, ordered_names,
                                               column_name_set, columns, pre_columns, remove_prewhere_column, should_reorder,
                                               std::move(size_predictor));

    if (!reader)
    {
        if (use_uncompressed_cache)
            owned_uncompressed_cache = storage.context.getUncompressedCache();

        owned_mark_cache = storage.context.getMarkCache();

        reader = std::make_unique<MergeTreeReader>(
            path, data_part, columns, owned_uncompressed_cache.get(),
            owned_mark_cache.get(), save_marks_in_cache, storage,
            all_mark_ranges, min_bytes_to_use_direct_io, max_read_buffer_size);

        if (prewhere_actions)
            pre_reader = std::make_unique<MergeTreeReader>(
                path, data_part, pre_columns, owned_uncompressed_cache.get(),
                owned_mark_cache.get(), save_marks_in_cache, storage,
                all_mark_ranges, min_bytes_to_use_direct_io, max_read_buffer_size);
    }

    return true;
}
catch (...)
{
    /// Suspicion of the broken part. A part is added to the queue for verification.
    if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
        storage.reportBrokenPart(data_part->name);
    throw;
}
コード例 #6
0
ファイル: Join.cpp プロジェクト: chipitsine/ClickHouse
void Join::joinBlockImpl(
    Block & block,
    const Names & key_names_left,
    const NameSet & needed_key_names_right,
    const Block & block_with_columns_to_add,
    const Maps & maps_) const
{
    size_t keys_size = key_names_left.size();
    ColumnRawPtrs key_columns(keys_size);

    /// Rare case, when keys are constant. To avoid code bloat, simply materialize them.
    Columns materialized_columns;
    materialized_columns.reserve(keys_size);

    /// Memoize key columns to work with.
    for (size_t i = 0; i < keys_size; ++i)
    {
        materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column->convertToFullColumnIfConst()));
        key_columns[i] = materialized_columns.back().get();
    }

    /// Keys with NULL value in any column won't join to anything.
    ColumnPtr null_map_holder;
    ConstNullMapPtr null_map{};
    extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);

    size_t existing_columns = block.columns();

    /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized.
      * Because if they are constants, then in the "not joined" rows, they may have different values
      *  - default values, which can differ from the values of these constants.
      */
    if (getFullness(kind))
    {
        for (size_t i = 0; i < existing_columns; ++i)
        {
            block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst();

            /// If use_nulls, convert left columns (except keys) to Nullable.
            if (use_nulls)
            {
                if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name))
                    convertColumnToNullable(block.getByPosition(i));
            }
        }
    }

    /** For LEFT/INNER JOIN, the saved blocks do not contain keys.
      * For FULL/RIGHT JOIN, the saved blocks contain keys;
      *  but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped.
      */
    size_t num_columns_to_skip = 0;
    if (getFullness(kind))
        num_columns_to_skip = keys_size;

    /// Add new columns to the block.
    size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
    MutableColumns added_columns;
    added_columns.reserve(num_columns_to_add);

    std::vector<std::pair<decltype(ColumnWithTypeAndName::type), decltype(ColumnWithTypeAndName::name)>> added_type_name;
    added_type_name.reserve(num_columns_to_add);

    std::vector<size_t> right_indexes;
    right_indexes.reserve(num_columns_to_add);

    for (size_t i = 0; i < num_columns_to_add; ++i)
    {
        const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i);

        /// Don't insert column if it's in left block or not explicitly required.
        if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name))
        {
            added_columns.push_back(src_column.column->cloneEmpty());
            added_columns.back()->reserve(src_column.column->size());
            added_type_name.emplace_back(src_column.type, src_column.name);
            right_indexes.push_back(num_columns_to_skip + i);
        }
    }

    size_t rows = block.rows();

    std::unique_ptr<IColumn::Filter> filter;

    bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any;
    filter = std::make_unique<IColumn::Filter>(rows);

    /// Used with ALL ... JOIN
    IColumn::Offset current_offset = 0;
    std::unique_ptr<IColumn::Offsets> offsets_to_replicate;

    if (strictness == ASTTableJoin::Strictness::All)
        offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);

    switch (type)
    {
    #define M(TYPE) \
        case Join::Type::TYPE: \
            joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE>::Type>(\
                *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \
                filter, current_offset, offsets_to_replicate, right_indexes); \
            break;
        APPLY_FOR_JOIN_VARIANTS(M)
    #undef M

        default:
            throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
    }

    const auto added_columns_size = added_columns.size();
    for (size_t i = 0; i < added_columns_size; ++i)
        block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second));

    /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones.
    if (filter_left_keys)
        for (size_t i = 0; i < existing_columns; ++i)
            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1);

    ColumnUInt64::Ptr mapping;

    /// Add join key columns from right block if they has different name.
    for (size_t i = 0; i < key_names_right.size(); ++i)
    {
        auto & right_name = key_names_right[i];
        auto & left_name = key_names_left[i];

        if (needed_key_names_right.count(right_name) && !block.has(right_name))
        {
            const auto & col = block.getByName(left_name);
            auto column = col.column;
            if (!filter_left_keys)
            {
                if (!mapping)
                {
                    auto mut_mapping = ColumnUInt64::create(column->size());
                    auto & data = mut_mapping->getData();
                    size_t size = column->size();
                    for (size_t j = 0; j < size; ++j)
                        data[j] = (*filter)[j] ? j : size;

                    mapping = std::move(mut_mapping);
                }

                auto mut_column = (*std::move(column)).mutate();
                mut_column->insertDefault();
                column = mut_column->index(*mapping, 0);
            }
            block.insert({column, col.type, right_name});
        }
    }

    /// If ALL ... JOIN - we replicate all the columns except the new ones.
    if (offsets_to_replicate)
    {
        for (size_t i = 0; i < existing_columns; ++i)
            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
    }
}