コード例 #1
0
static ColumnPtr getFilteredDatabases(const ASTPtr & query, const Context & context)
{
    MutableColumnPtr column = ColumnString::create();
    for (const auto & db : context.getDatabases())
        column->insert(db.first);

    Block block { ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "database") };
    VirtualColumnUtils::filterBlockWithQuery(query, block, context);
    return block.getByPosition(0).column;
}
コード例 #2
0
Block TinyLogBlockInputStream::readImpl()
{
    Block res;

    if (finished || (!streams.empty() && streams.begin()->second->compressed.eof()))
    {
        /** Close the files (before destroying the object).
          * When many sources are created, but simultaneously reading only a few of them,
          * buffers don't waste memory.
          */
        finished = true;
        streams.clear();
        return res;
    }

    {
        /// if there are no files in the folder, it means that the table is empty
        if (Poco::DirectoryIterator(storage.full_path()) == Poco::DirectoryIterator())
            return res;
    }

    for (const auto & name_type : columns)
    {
        MutableColumnPtr column = name_type.type->createColumn();

        try
        {
            readData(name_type.name, *name_type.type, *column, block_size);
        }
        catch (Exception & e)
        {
            e.addMessage("while reading column " + name_type.name + " at " + storage.full_path());
            throw;
        }

        if (column->size())
            res.insert(ColumnWithTypeAndName(std::move(column), name_type.type, name_type.name));
    }

    if (!res || streams.begin()->second->compressed.eof())
    {
        finished = true;
        streams.clear();
    }

    return Nested::flatten(res);
}
コード例 #3
0
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr & query, bool has_virtual_column, bool get_lock) const
{
    StorageListWithLocks selected_tables;
    DatabasePtr database = global_context.getDatabase(source_database);
    DatabaseIteratorPtr iterator = database->getIterator(global_context);

    auto virtual_column = ColumnString::create();

    while (iterator->isValid())
    {
        if (table_name_regexp.match(iterator->name()))
        {
            StoragePtr storage = iterator->table();

            if (query && typeid_cast<ASTSelectQuery *>(query.get())->prewhere_expression && !storage->supportsPrewhere())
                throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE);

            if (storage.get() != this)
            {
                virtual_column->insert(storage->getTableName());
                selected_tables.emplace_back(storage, get_lock ? storage->lockStructure(false) : TableStructureReadLockPtr{});
            }
        }

        iterator->next();
    }

    if (has_virtual_column)
    {
        Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared<DataTypeString>(), "_table")};
        VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, global_context);
        auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table");

        /// Remove unused tables from the list
        selected_tables.remove_if([&] (const auto & elem) { return values.find(elem.first->getTableName()) == values.end(); });
    }

    return selected_tables;
}
コード例 #4
0
void StorageCatBoostPool::createSampleBlockAndColumns()
{
    ColumnsDescription columns;
    NamesAndTypesList cat_columns;
    NamesAndTypesList num_columns;
    sample_block.clear();
    for (auto & desc : columns_description)
    {
        DataTypePtr type;
        if (desc.column_type == DatasetColumnType::Categ
            || desc.column_type == DatasetColumnType::Auxiliary
            || desc.column_type == DatasetColumnType::DocId)
            type = std::make_shared<DataTypeString>();
        else
            type = std::make_shared<DataTypeFloat64>();

        if (desc.column_type == DatasetColumnType::Categ)
            cat_columns.emplace_back(desc.column_name, type);
        else if (desc.column_type == DatasetColumnType::Num)
            num_columns.emplace_back(desc.column_name, type);
        else
            columns.materialized.emplace_back(desc.column_name, type);

        if (!desc.alias.empty())
        {
            auto alias = std::make_shared<ASTIdentifier>(desc.column_name);
            columns.defaults[desc.alias] = {ColumnDefaultKind::Alias, alias};
            columns.aliases.emplace_back(desc.alias, type);
        }

        sample_block.insert(ColumnWithTypeAndName(type, desc.column_name));
    }
    columns.ordinary.insert(columns.ordinary.end(), num_columns.begin(), num_columns.end());
    columns.ordinary.insert(columns.ordinary.end(), cat_columns.begin(), cat_columns.end());

    setColumns(columns);
}
コード例 #5
0
static bool isInjectiveFunction(
    const ASTFunction * ast_function,
    const TypeAndConstantInference::ExpressionInfo & function_info,
    const TypeAndConstantInference::Info & all_info)
{
    if (!function_info.function)
        return false;

    Block block_with_constants;

    const ASTs & children = ast_function->arguments->children;
    for (const auto & child : children)
    {
        String child_name = child->getColumnName();
        const TypeAndConstantInference::ExpressionInfo & child_info = all_info.at(child_name);

        block_with_constants.insert(ColumnWithTypeAndName(
            child_info.is_constant_expression ? child_info.data_type->createConstColumn(1, child_info.value) : nullptr,
            child_info.data_type,
            child_name));
    }

    return function_info.function->isInjective(block_with_constants);
}
コード例 #6
0
BlockInputStreams StorageSystemColumns::read(
	const Names & column_names,
	ASTPtr query,
	const Context & context,
	const Settings & settings,
	QueryProcessingStage::Enum & processed_stage,
	const size_t max_block_size,
	const unsigned threads)
{
	check(column_names);
	processed_stage = QueryProcessingStage::FetchColumns;

	Block block;

	std::map<std::pair<std::string, std::string>, StoragePtr> storages;

	{
		Databases databases = context.getDatabases();

		/// Добавляем столбец database.
		ColumnPtr database_column = std::make_shared<ColumnString>();
		for (const auto & database : databases)
			database_column->insert(database.first);
		block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database"));

		/// Отфильтруем блок со столбцом database.
		VirtualColumnUtils::filterBlockWithQuery(query, block, context);

		if (!block.rows())
			return BlockInputStreams();

		database_column = block.getByName("database").column;
		size_t rows = database_column->size();

		/// Добавляем столбец table.
		ColumnPtr table_column = std::make_shared<ColumnString>();
		IColumn::Offsets_t offsets(rows);
		for (size_t i = 0; i < rows; ++i)
		{
			const std::string database_name = (*database_column)[i].get<std::string>();
			const DatabasePtr database = databases.at(database_name);
			offsets[i] = i ? offsets[i - 1] : 0;

			for (auto iterator = database->getIterator(); iterator->isValid(); iterator->next())
			{
				const String & table_name = iterator->name();
				storages.emplace(std::piecewise_construct,
					std::forward_as_tuple(database_name, table_name),
					std::forward_as_tuple(iterator->table()));
				table_column->insert(table_name);
				offsets[i] += 1;
			}
		}

		for (size_t i = 0; i < block.columns(); ++i)
		{
			ColumnPtr & column = block.getByPosition(i).column;
			column = column->replicate(offsets);
		}

		block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table"));
	}

	/// Отфильтруем блок со столбцами database и table.
	VirtualColumnUtils::filterBlockWithQuery(query, block, context);

	if (!block.rows())
		return BlockInputStreams();

	ColumnPtr filtered_database_column = block.getByName("database").column;
	ColumnPtr filtered_table_column = block.getByName("table").column;

	/// Составляем результат.
	ColumnPtr database_column = std::make_shared<ColumnString>();
	ColumnPtr table_column = std::make_shared<ColumnString>();
	ColumnPtr name_column = std::make_shared<ColumnString>();
	ColumnPtr type_column = std::make_shared<ColumnString>();
	ColumnPtr default_type_column = std::make_shared<ColumnString>();
	ColumnPtr default_expression_column = std::make_shared<ColumnString>();
	ColumnPtr bytes_column = std::make_shared<ColumnUInt64>();

	size_t rows = filtered_database_column->size();
	for (size_t i = 0; i < rows; ++i)
	{
		const std::string database_name = (*filtered_database_column)[i].get<std::string>();
		const std::string table_name = (*filtered_table_column)[i].get<std::string>();

		NamesAndTypesList columns;
		ColumnDefaults column_defaults;
		std::unordered_map<String, size_t> column_sizes;

		{
			StoragePtr storage = storages.at(std::make_pair(database_name, table_name));
			IStorage::TableStructureReadLockPtr table_lock;

			try
			{
				table_lock = storage->lockStructure(false);
			}
			catch (const Exception & e)
			{
				/** There are case when IStorage::drop was called,
				  *  but we still own the object.
				  * Then table will throw exception at attempt to lock it.
				  * Just skip the table.
				  */
				if (e.code() == ErrorCodes::TABLE_IS_DROPPED)
					continue;
				else
					throw;
			}

			columns = storage->getColumnsList();
			columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns));
			column_defaults = storage->column_defaults;

			/** Данные о размерах столбцов для таблиц семейства MergeTree.
			  * NOTE: В дальнейшем можно сделать интерфейс, позволяющий получить размеры столбцов у IStorage.
			  */
			if (auto storage_concrete = dynamic_cast<StorageMergeTree *>(storage.get()))
			{
				column_sizes = storage_concrete->getData().getColumnSizes();
			}
			else if (auto storage_concrete = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
			{
				column_sizes = storage_concrete->getData().getColumnSizes();

				auto unreplicated_data = storage_concrete->getUnreplicatedData();
				if (unreplicated_data)
				{
					auto unreplicated_column_sizes = unreplicated_data->getColumnSizes();
					for (const auto & name_size : unreplicated_column_sizes)
						column_sizes[name_size.first] += name_size.second;
				}
			}
		}

		for (const auto & column : columns)
		{
			database_column->insert(database_name);
			table_column->insert(table_name);
			name_column->insert(column.name);
			type_column->insert(column.type->getName());

			{
				const auto it = column_defaults.find(column.name);
				if (it == std::end(column_defaults))
				{
					default_type_column->insertDefault();
					default_expression_column->insertDefault();
				}
				else
				{
					default_type_column->insert(toString(it->second.type));
					default_expression_column->insert(queryToString(it->second.expression));
				}
			}

			{
				const auto it = column_sizes.find(column.name);
				if (it == std::end(column_sizes))
					bytes_column->insertDefault();
				else
					bytes_column->insert(it->second);
			}
		}
	}

	block.clear();

	block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database"));
	block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table"));
	block.insert(ColumnWithTypeAndName(name_column, std::make_shared<DataTypeString>(), "name"));
	block.insert(ColumnWithTypeAndName(type_column, std::make_shared<DataTypeString>(), "type"));
	block.insert(ColumnWithTypeAndName(default_type_column, std::make_shared<DataTypeString>(), "default_type"));
	block.insert(ColumnWithTypeAndName(default_expression_column, std::make_shared<DataTypeString>(), "default_expression"));
	block.insert(ColumnWithTypeAndName(bytes_column, std::make_shared<DataTypeUInt64>(), "bytes"));

	return BlockInputStreams{ 1, std::make_shared<OneBlockInputStream>(block) };
}
コード例 #7
0
Block addMissingDefaults(const Block & block,
                         const NamesAndTypesList & required_columns,
                         const ColumnDefaults & column_defaults,
                         const Context & context)
{
    /// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths.
    /// First, remember the offset columns for all arrays in the block.
    std::map<String, ColumnPtr> offset_columns;

    for (size_t i = 0, size = block.columns(); i < size; ++i)
    {
        const auto & elem = block.getByPosition(i);

        if (const ColumnArray * array = typeid_cast<const ColumnArray *>(&*elem.column))
        {
            String offsets_name = Nested::extractTableName(elem.name);
            auto & offsets_column = offset_columns[offsets_name];

            /// If for some reason there are different offset columns for one nested structure, then we take nonempty.
            if (!offsets_column || offsets_column->empty())
                offsets_column = array->getOffsetsPtr();
        }
    }

    const size_t rows = block.rows();
    Block res;

    /// We take given columns from input block and missed columns without default value
    /// (default and materialized will be computed later).
    for (const auto & column : required_columns)
    {
        if (block.has(column.name))
        {
            res.insert(block.getByName(column.name));
            continue;
        }

        if (column_defaults.count(column.name))
            continue;

        String offsets_name = Nested::extractTableName(column.name);
        if (offset_columns.count(offsets_name))
        {
            ColumnPtr offsets_column = offset_columns[offsets_name];
            DataTypePtr nested_type = typeid_cast<const DataTypeArray &>(*column.type).getNestedType();
            UInt64 nested_rows = rows ? get<UInt64>((*offsets_column)[rows - 1]) : 0;

            ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst();
            auto new_column = ColumnArray::create(nested_column, offsets_column);
            res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name));
            continue;
        }

        /** It is necessary to turn a constant column into a full column, since in part of blocks (from other parts),
        *  it can be full (or the interpreter may decide that it is constant everywhere).
        */
        auto new_column = column.type->createColumnConstWithDefaultValue(rows)->convertToFullColumnIfConst();
        res.insert(ColumnWithTypeAndName(std::move(new_column), column.type, column.name));
    }

    /// Computes explicitly specified values (in column_defaults) by default and materialized columns.
    evaluateMissingDefaults(res, required_columns, column_defaults, context);
    return res;
}
コード例 #8
0
ファイル: Join.cpp プロジェクト: filimonov/ClickHouse
void Join::joinBlockImpl(Block & block, const Maps & maps) const
{
    size_t keys_size = key_names_left.size();
    ColumnRawPtrs key_columns(keys_size);

    /// Rare case, when keys are constant. To avoid code bloat, simply materialize them.
    Columns materialized_columns;

    /// Memoize key columns to work with.
    for (size_t i = 0; i < keys_size; ++i)
    {
        key_columns[i] = block.getByName(key_names_left[i]).column.get();

        if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst())
        {
            materialized_columns.emplace_back(converted);
            key_columns[i] = materialized_columns.back().get();
        }
    }

    /// Keys with NULL value in any column won't join to anything.
    ColumnPtr null_map_holder;
    ConstNullMapPtr null_map{};
    extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);

    size_t existing_columns = block.columns();

    /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized.
      * Because if they are constants, then in the "not joined" rows, they may have different values
      *  - default values, which can differ from the values of these constants.
      */
    if (getFullness(kind))
    {
        for (size_t i = 0; i < existing_columns; ++i)
        {
            auto & col = block.getByPosition(i).column;

            if (ColumnPtr converted = col->convertToFullColumnIfConst())
                col = converted;

            /// If use_nulls, convert left columns (except keys) to Nullable.
            if (use_nulls)
            {
                if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name))
                    convertColumnToNullable(block.getByPosition(i));
            }
        }
    }

    /** For LEFT/INNER JOIN, the saved blocks do not contain keys.
      * For FULL/RIGHT JOIN, the saved blocks contain keys;
      *  but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped.
      */
    size_t num_columns_to_skip = 0;
    if (getFullness(kind))
        num_columns_to_skip = keys_size;

    /// Add new columns to the block.
    size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
    MutableColumns added_columns;
    added_columns.reserve(num_columns_to_add);

    std::vector<std::pair<decltype(ColumnWithTypeAndName::type), decltype(ColumnWithTypeAndName::name)>> added_type_name;
    added_type_name.reserve(num_columns_to_add);

    std::vector<size_t> right_indexes;
    right_indexes.reserve(num_columns_to_add);

    for (size_t i = 0; i < num_columns_to_add; ++i)
    {
        const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i);

        /// Don't insert column if it's in left block.
        if (!block.has(src_column.name))
        {
            added_columns.push_back(src_column.column->cloneEmpty());
            added_columns.back()->reserve(src_column.column->size());
            added_type_name.emplace_back(src_column.type, src_column.name);
            right_indexes.push_back(num_columns_to_skip + i);
        }
    }

    size_t rows = block.rows();

    /// Used with ANY INNER JOIN
    std::unique_ptr<IColumn::Filter> filter;

    bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any;
    filter = std::make_unique<IColumn::Filter>(rows);

    /// Used with ALL ... JOIN
    IColumn::Offset current_offset = 0;
    std::unique_ptr<IColumn::Offsets> offsets_to_replicate;

    if (strictness == ASTTableJoin::Strictness::All)
        offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);

    switch (type)
    {
    #define M(TYPE) \
        case Join::Type::TYPE: \
            joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE>::Type>(\
                *maps.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \
                filter, current_offset, offsets_to_replicate, right_indexes); \
            break;
        APPLY_FOR_JOIN_VARIANTS(M)
    #undef M

        default:
            throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
    }

    const auto added_columns_size = added_columns.size();
    for (size_t i = 0; i < added_columns_size; ++i)
        block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second));

    /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones.
    if (filter_left_keys)
        for (size_t i = 0; i < existing_columns; ++i)
            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1);

    ColumnUInt64::Ptr mapping;

    /// Add join key columns from right block if they has different name.
    for (size_t i = 0; i < key_names_right.size(); ++i)
    {
        auto & right_name = key_names_right[i];
        auto & left_name = key_names_left[i];

        if (needed_key_names_right.count(right_name) && !block.has(right_name))
        {
            const auto & col = block.getByName(left_name);
            auto column = col.column;
            if (!filter_left_keys)
            {
                if (!mapping)
                {
                    auto mut_mapping = ColumnUInt64::create(column->size());
                    auto & data = mut_mapping->getData();
                    size_t size = column->size();
                    for (size_t j = 0; j < size; ++j)
                        data[j] = (*filter)[j] ? j : size;

                    mapping = std::move(mut_mapping);
                }

                auto mut_column = (*std::move(column)).mutate();
                mut_column->insertDefault();
                column = mut_column->index(*mapping, 0);
            }
            block.insert({column, col.type, right_name});
        }
    }

    /// If ALL ... JOIN - we replicate all the columns except the new ones.
    if (offsets_to_replicate)
    {
        for (size_t i = 0; i < existing_columns; ++i)
            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
    }
}
コード例 #9
0
ファイル: MergeTreeReader.cpp プロジェクト: bamx23/ClickHouse
size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res)
{
    size_t read_rows = 0;
    try
    {
        /// Pointers to offset columns that are common to the nested data structure columns.
        /// If append is true, then the value will be equal to nullptr and will be used only to
        /// check that the offsets column has been already read.
        OffsetColumns offset_columns;

        for (const NameAndTypePair & it : columns)
        {
            /// The column is already present in the block so we will append the values to the end.
            bool append = res.has(it.name);
            if (!append)
                res.insert(ColumnWithTypeAndName(it.type->createColumn(), it.type, it.name));

            /// To keep offsets shared. TODO Very dangerous. Get rid of this.
            MutableColumnPtr column = res.getByName(it.name).column->assumeMutable();

            bool read_offsets = true;

            /// For nested data structures collect pointers to offset columns.
            if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(it.type.get()))
            {
                String name = Nested::extractTableName(it.name);

                auto it_inserted = offset_columns.emplace(name, nullptr);

                /// offsets have already been read on the previous iteration and we don't need to read it again
                if (!it_inserted.second)
                    read_offsets = false;

                /// need to create new offsets
                if (it_inserted.second && !append)
                    it_inserted.first->second = ColumnArray::ColumnOffsets::create();

                /// share offsets in all elements of nested structure
                if (!append)
                    column = ColumnArray::create(type_arr->getNestedType()->createColumn(), it_inserted.first->second);
            }

            try
            {
                size_t column_size_before_reading = column->size();

                readData(it.name, *it.type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets);

                /// For elements of Nested, column_size_before_reading may be greater than column size
                ///  if offsets are not empty and were already read, but elements are empty.
                if (column->size())
                    read_rows = std::max(read_rows, column->size() - column_size_before_reading);
            }
            catch (Exception & e)
            {
                /// Better diagnostics.
                e.addMessage("(while reading column " + it.name + ")");
                throw;
            }

            if (column->size())
                res.getByName(it.name).column = std::move(column);
            else
                res.erase(it.name);
        }

        /// NOTE: positions for all streams must be kept in sync. In particular, even if for some streams there are no rows to be read,
        /// you must ensure that no seeks are skipped and at this point they all point to to_mark.
    }
    catch (Exception & e)
    {
        if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
            storage.reportBrokenPart(data_part->name);

        /// Better diagnostics.
        e.addMessage("(while reading from part " + path + " from mark " + toString(from_mark) + " with max_rows_to_read = " + toString(max_rows_to_read) + ")");
        throw;
    }
    catch (...)
    {
        storage.reportBrokenPart(data_part->name);

        throw;
    }

    return read_rows;
}