void DistributedBlockOutputStream::writeToShard(const Block & block, const std::vector<std::string> & dir_names)
{
    /** tmp directory is used to ensure atomicity of transactions
      *  and keep monitor thread out from reading incomplete data
      */
    std::string first_file_tmp_path{};

    auto first = true;
    const auto & query_string = queryToString(query_ast);

    /// write first file, hardlink the others
    for (const auto & dir_name : dir_names)
    {
        const auto & path = storage.getPath() + dir_name + '/';

        /// ensure shard subdirectory creation and notify storage
        if (Poco::File(path).createDirectory())
            storage.requireDirectoryMonitor(dir_name);

        const auto & file_name = toString(storage.file_names_increment.get()) + ".bin";
        const auto & block_file_path = path + file_name;

        /** on first iteration write block to a temporary directory for subsequent hardlinking to ensure
            *  the inode is not freed until we're done */
        if (first)
        {
            first = false;

            const auto & tmp_path = path + "tmp/";
            Poco::File(tmp_path).createDirectory();
            const auto & block_file_tmp_path = tmp_path + file_name;

            first_file_tmp_path = block_file_tmp_path;

            WriteBufferFromFile out{block_file_tmp_path};
            CompressedWriteBuffer compress{out};
            NativeBlockOutputStream stream{compress, ClickHouseRevision::get()};

            writeStringBinary(query_string, out);

            stream.writePrefix();
            stream.write(block);
            stream.writeSuffix();
        }

        if (link(first_file_tmp_path.data(), block_file_path.data()))
            throwFromErrno("Could not link " + block_file_path + " to " + first_file_tmp_path);
    }

    /** remove the temporary file, enabling the OS to reclaim inode after all threads
        *  have removed their corresponding files */
    Poco::File(first_file_tmp_path).remove();
}
Block InterpreterSelectWithUnionQuery::getSampleBlock(
    const ASTPtr & query_ptr,
    const Context & context)
{
    auto & cache = context.getSampleBlockCache();
    /// Using query string because query_ptr changes for every internal SELECT
    auto key = queryToString(query_ptr);
    if (cache.find(key) != cache.end())
    {
        return cache[key];
    }

    return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, {}, QueryProcessingStage::Complete, 0, true).getSampleBlock();
}
Exemple #3
0
std::string ReshardingJob::toString() const
{
    std::string serialized_job;
    WriteBufferFromString buf{serialized_job};

    writeBinary(database_name, buf);
    writeBinary(table_name, buf);
    writeBinary(partition, buf);
    writeBinary(queryToString(sharding_key_expr), buf);
    writeBinary(coordinator_id, buf);
    writeVarUInt(block_number, buf);
    writeBinary(do_copy, buf);

    writeVarUInt(paths.size(), buf);
    for (const auto & path : paths)
    {
        writeBinary(path.first, buf);
        writeVarUInt(path.second, buf);
    }
    buf.next();

    return serialized_job;
}
BlockInputStreams StorageSystemColumns::read(
	const Names & column_names,
	ASTPtr query,
	const Context & context,
	const Settings & settings,
	QueryProcessingStage::Enum & processed_stage,
	const size_t max_block_size,
	const unsigned threads)
{
	check(column_names);
	processed_stage = QueryProcessingStage::FetchColumns;

	Block block;

	std::map<std::pair<std::string, std::string>, StoragePtr> storages;

	{
		Databases databases = context.getDatabases();

		/// Добавляем столбец database.
		ColumnPtr database_column = std::make_shared<ColumnString>();
		for (const auto & database : databases)
			database_column->insert(database.first);
		block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database"));

		/// Отфильтруем блок со столбцом database.
		VirtualColumnUtils::filterBlockWithQuery(query, block, context);

		if (!block.rows())
			return BlockInputStreams();

		database_column = block.getByName("database").column;
		size_t rows = database_column->size();

		/// Добавляем столбец table.
		ColumnPtr table_column = std::make_shared<ColumnString>();
		IColumn::Offsets_t offsets(rows);
		for (size_t i = 0; i < rows; ++i)
		{
			const std::string database_name = (*database_column)[i].get<std::string>();
			const DatabasePtr database = databases.at(database_name);
			offsets[i] = i ? offsets[i - 1] : 0;

			for (auto iterator = database->getIterator(); iterator->isValid(); iterator->next())
			{
				const String & table_name = iterator->name();
				storages.emplace(std::piecewise_construct,
					std::forward_as_tuple(database_name, table_name),
					std::forward_as_tuple(iterator->table()));
				table_column->insert(table_name);
				offsets[i] += 1;
			}
		}

		for (size_t i = 0; i < block.columns(); ++i)
		{
			ColumnPtr & column = block.getByPosition(i).column;
			column = column->replicate(offsets);
		}

		block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table"));
	}

	/// Отфильтруем блок со столбцами database и table.
	VirtualColumnUtils::filterBlockWithQuery(query, block, context);

	if (!block.rows())
		return BlockInputStreams();

	ColumnPtr filtered_database_column = block.getByName("database").column;
	ColumnPtr filtered_table_column = block.getByName("table").column;

	/// Составляем результат.
	ColumnPtr database_column = std::make_shared<ColumnString>();
	ColumnPtr table_column = std::make_shared<ColumnString>();
	ColumnPtr name_column = std::make_shared<ColumnString>();
	ColumnPtr type_column = std::make_shared<ColumnString>();
	ColumnPtr default_type_column = std::make_shared<ColumnString>();
	ColumnPtr default_expression_column = std::make_shared<ColumnString>();
	ColumnPtr bytes_column = std::make_shared<ColumnUInt64>();

	size_t rows = filtered_database_column->size();
	for (size_t i = 0; i < rows; ++i)
	{
		const std::string database_name = (*filtered_database_column)[i].get<std::string>();
		const std::string table_name = (*filtered_table_column)[i].get<std::string>();

		NamesAndTypesList columns;
		ColumnDefaults column_defaults;
		std::unordered_map<String, size_t> column_sizes;

		{
			StoragePtr storage = storages.at(std::make_pair(database_name, table_name));
			IStorage::TableStructureReadLockPtr table_lock;

			try
			{
				table_lock = storage->lockStructure(false);
			}
			catch (const Exception & e)
			{
				/** There are case when IStorage::drop was called,
				  *  but we still own the object.
				  * Then table will throw exception at attempt to lock it.
				  * Just skip the table.
				  */
				if (e.code() == ErrorCodes::TABLE_IS_DROPPED)
					continue;
				else
					throw;
			}

			columns = storage->getColumnsList();
			columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns));
			column_defaults = storage->column_defaults;

			/** Данные о размерах столбцов для таблиц семейства MergeTree.
			  * NOTE: В дальнейшем можно сделать интерфейс, позволяющий получить размеры столбцов у IStorage.
			  */
			if (auto storage_concrete = dynamic_cast<StorageMergeTree *>(storage.get()))
			{
				column_sizes = storage_concrete->getData().getColumnSizes();
			}
			else if (auto storage_concrete = dynamic_cast<StorageReplicatedMergeTree *>(storage.get()))
			{
				column_sizes = storage_concrete->getData().getColumnSizes();

				auto unreplicated_data = storage_concrete->getUnreplicatedData();
				if (unreplicated_data)
				{
					auto unreplicated_column_sizes = unreplicated_data->getColumnSizes();
					for (const auto & name_size : unreplicated_column_sizes)
						column_sizes[name_size.first] += name_size.second;
				}
			}
		}

		for (const auto & column : columns)
		{
			database_column->insert(database_name);
			table_column->insert(table_name);
			name_column->insert(column.name);
			type_column->insert(column.type->getName());

			{
				const auto it = column_defaults.find(column.name);
				if (it == std::end(column_defaults))
				{
					default_type_column->insertDefault();
					default_expression_column->insertDefault();
				}
				else
				{
					default_type_column->insert(toString(it->second.type));
					default_expression_column->insert(queryToString(it->second.expression));
				}
			}

			{
				const auto it = column_sizes.find(column.name);
				if (it == std::end(column_sizes))
					bytes_column->insertDefault();
				else
					bytes_column->insert(it->second);
			}
		}
	}

	block.clear();

	block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database"));
	block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table"));
	block.insert(ColumnWithTypeAndName(name_column, std::make_shared<DataTypeString>(), "name"));
	block.insert(ColumnWithTypeAndName(type_column, std::make_shared<DataTypeString>(), "type"));
	block.insert(ColumnWithTypeAndName(default_type_column, std::make_shared<DataTypeString>(), "default_type"));
	block.insert(ColumnWithTypeAndName(default_expression_column, std::make_shared<DataTypeString>(), "default_expression"));
	block.insert(ColumnWithTypeAndName(bytes_column, std::make_shared<DataTypeUInt64>(), "bytes"));

	return BlockInputStreams{ 1, std::make_shared<OneBlockInputStream>(block) };
}
void StorageDistributed::reshardPartitions(ASTPtr query, const String & database_name,
	const Field & first_partition, const Field & last_partition,
	const WeightedZooKeeperPaths & weighted_zookeeper_paths,
	const ASTPtr & sharding_key_expr, bool do_copy, const Field & coordinator,
	const Settings & settings)
{
	auto & resharding_worker = context.getReshardingWorker();
	if (!resharding_worker.isStarted())
		throw Exception{"Resharding background thread is not running", ErrorCodes::RESHARDING_NO_WORKER};

	if (!coordinator.isNull())
		throw Exception{"Use of COORDINATE WITH is forbidden in ALTER TABLE ... RESHARD"
			" queries for distributed tables",
			ErrorCodes::RESHARDING_INVALID_PARAMETERS};

	std::string coordinator_id = resharding_worker.createCoordinator(cluster);

	std::atomic<bool> has_notified_error{false};

	std::string dumped_coordinator_state;

	auto handle_exception = [&](const std::string & msg = "")
	{
		try
		{
			if (!has_notified_error)
				resharding_worker.setStatus(coordinator_id, ReshardingWorker::STATUS_ERROR, msg);
			dumped_coordinator_state = resharding_worker.dumpCoordinatorState(coordinator_id);
			resharding_worker.deleteCoordinator(coordinator_id);
		}
		catch (...)
		{
			tryLogCurrentException(__PRETTY_FUNCTION__);
		}
	};

	try
	{
		/// Создать запрос ALTER TABLE ... RESHARD [COPY] PARTITION ... COORDINATE WITH ...

		ASTPtr alter_query_ptr = std::make_shared<ASTAlterQuery>();
		auto & alter_query = static_cast<ASTAlterQuery &>(*alter_query_ptr);

		alter_query.database = remote_database;
		alter_query.table = remote_table;

		alter_query.parameters.emplace_back();
		ASTAlterQuery::Parameters & parameters = alter_query.parameters.back();

		parameters.type = ASTAlterQuery::RESHARD_PARTITION;
		if (!first_partition.isNull())
			parameters.partition = std::make_shared<ASTLiteral>(StringRange(), first_partition);
		if (!last_partition.isNull())
			parameters.last_partition = std::make_shared<ASTLiteral>(StringRange(), last_partition);

		ASTPtr expr_list = std::make_shared<ASTExpressionList>();
		for (const auto & entry : weighted_zookeeper_paths)
		{
			ASTPtr weighted_path_ptr = std::make_shared<ASTWeightedZooKeeperPath>();
			auto & weighted_path = static_cast<ASTWeightedZooKeeperPath &>(*weighted_path_ptr);
			weighted_path.path = entry.first;
			weighted_path.weight = entry.second;
			expr_list->children.push_back(weighted_path_ptr);
		}

		parameters.weighted_zookeeper_paths = expr_list;
		parameters.sharding_key_expr = sharding_key_expr;
		parameters.do_copy = do_copy;
		parameters.coordinator = std::make_shared<ASTLiteral>(StringRange(), Field(coordinator_id));

		resharding_worker.registerQuery(coordinator_id, queryToString(alter_query_ptr));

		/** Функциональность shard_multiplexing не доделана - выключаем её.
		* (Потому что установка соединений с разными шардами в рамках одного потока выполняется не параллельно.)
		* Подробнее смотрите в https://███████████.yandex-team.ru/METR-18300
		*/
		bool enable_shard_multiplexing = false;

		ClusterProxy::AlterQueryConstructor alter_query_constructor;

		BlockInputStreams streams = ClusterProxy::Query{alter_query_constructor, cluster, alter_query_ptr,
			context, settings, enable_shard_multiplexing}.execute();

		/// This callback is called if an exception has occurred while attempting to read
		/// a block from a shard. This is to avoid a potential deadlock if other shards are
		/// waiting inside a barrier. Actually, even without this solution, we would avoid
		/// such a deadlock because we would eventually time out while trying to get remote
		/// blocks. Nevertheless this is not the ideal way of sorting out this issue since
		/// we would then not get to know the actual cause of the failure.
		auto exception_callback = [&resharding_worker, coordinator_id, &has_notified_error]()
		{
			try
			{
				resharding_worker.setStatus(coordinator_id, ReshardingWorker::STATUS_ERROR);
				has_notified_error = true;
			}
			catch (...)
			{
				tryLogCurrentException(__PRETTY_FUNCTION__);
			}
		};

		streams[0] = std::make_shared<UnionBlockInputStream<>>(
			streams, nullptr, settings.max_distributed_connections, exception_callback);
		streams.resize(1);

		auto stream_ptr = dynamic_cast<IProfilingBlockInputStream *>(&*streams[0]);
		if (stream_ptr == nullptr)
			throw Exception{"StorageDistributed: Internal error", ErrorCodes::LOGICAL_ERROR};
		auto & stream = *stream_ptr;

		stream.readPrefix();

		while (!stream.isCancelled() && stream.read())
			;

		if (!stream.isCancelled())
			stream.readSuffix();
	}
	catch (const Exception & ex)
	{
		handle_exception(ex.message());
		LOG_ERROR(log, dumped_coordinator_state);
		throw;
	}
	catch (const std::exception & ex)
	{
		handle_exception(ex.what());
		LOG_ERROR(log, dumped_coordinator_state);
		throw;
	}
	catch (...)
	{
		handle_exception();
		LOG_ERROR(log, dumped_coordinator_state);
		throw;
	}
}
    Block readImpl() override
    {
        if (done)
            return {};

        Block res = header;
        MutableColumns res_columns = header.cloneEmptyColumns();

        size_t rows_count = 0;
        while (rows_count < max_block_size)
        {
            if (tables_it && !tables_it->isValid())
                ++database_idx;

            while (database_idx < databases->size() && (!tables_it || !tables_it->isValid()))
            {
                database_name = databases->getDataAt(database_idx).toString();
                database = context.tryGetDatabase(database_name);

                if (!database || !context.hasDatabaseAccessRights(database_name))
                {
                    /// Database was deleted just now or the user has no access.
                    ++database_idx;
                    continue;
                }

                break;
            }

            /// This is for temporary tables. They are output in single block regardless to max_block_size.
            if (database_idx >= databases->size())
            {
                if (context.hasSessionContext())
                {
                    Tables external_tables = context.getSessionContext().getExternalTables();

                    for (auto table : external_tables)
                    {
                        size_t src_index = 0;
                        size_t res_index = 0;

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insert(table.first);

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insert(table.second->getName());

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insert(1u);

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insert(table.second->getName());

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();

                        if (columns_mask[src_index++])
                            res_columns[res_index++]->insertDefault();
                    }
                }

                res.setColumns(std::move(res_columns));
                done = true;
                return res;
            }

            if (!tables_it || !tables_it->isValid())
                tables_it = database->getIterator(context);

            for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next())
            {
                ++rows_count;
                auto table_name = tables_it->name();

                size_t src_index = 0;
                size_t res_index = 0;

                if (columns_mask[src_index++])
                    res_columns[res_index++]->insert(database_name);

                if (columns_mask[src_index++])
                    res_columns[res_index++]->insert(table_name);

                if (columns_mask[src_index++])
                    res_columns[res_index++]->insert(tables_it->table()->getName());

                if (columns_mask[src_index++])
                    res_columns[res_index++]->insert(0u);  // is_temporary

                if (columns_mask[src_index++])
                    res_columns[res_index++]->insert(tables_it->table()->getDataPath());

                if (columns_mask[src_index++])
                    res_columns[res_index++]->insert(database->getTableMetadataPath(table_name));

                if (columns_mask[src_index++])
                    res_columns[res_index++]->insert(static_cast<UInt64>(database->getTableMetadataModificationTime(context, table_name)));

                {
                    Array dependencies_table_name_array;
                    Array dependencies_database_name_array;
                    if (columns_mask[src_index] || columns_mask[src_index + 1])
                    {
                        const auto dependencies = context.getDependencies(database_name, table_name);

                        dependencies_table_name_array.reserve(dependencies.size());
                        dependencies_database_name_array.reserve(dependencies.size());
                        for (const auto & dependency : dependencies)
                        {
                            dependencies_table_name_array.push_back(dependency.second);
                            dependencies_database_name_array.push_back(dependency.first);
                        }
                    }

                    if (columns_mask[src_index++])
                        res_columns[res_index++]->insert(dependencies_database_name_array);

                    if (columns_mask[src_index++])
                        res_columns[res_index++]->insert(dependencies_table_name_array);
                }

                if (columns_mask[src_index] || columns_mask[src_index + 1])
                {
                    ASTPtr ast = database->tryGetCreateTableQuery(context, table_name);

                    if (columns_mask[src_index++])
                        res_columns[res_index++]->insert(ast ? queryToString(ast) : "");

                    if (columns_mask[src_index++])
                    {
                        String engine_full;

                        if (ast)
                        {
                            const ASTCreateQuery & ast_create = typeid_cast<const ASTCreateQuery &>(*ast);
                            if (ast_create.storage)
                            {
                                engine_full = queryToString(*ast_create.storage);

                                static const char * const extra_head = " ENGINE = ";
                                if (startsWith(engine_full, extra_head))
                                    engine_full = engine_full.substr(strlen(extra_head));
                            }
                        }

                        res_columns[res_index++]->insert(engine_full);
                    }
                }
                else
                    src_index += 2;

                const auto table_it = context.getTable(database_name, table_name);
                ASTPtr expression_ptr;
                if (columns_mask[src_index++])
                {
                    if ((expression_ptr = table_it->getPartitionKeyAST()))
                        res_columns[res_index++]->insert(queryToString(expression_ptr));
                    else
                        res_columns[res_index++]->insertDefault();
                }

                if (columns_mask[src_index++])
                {
                    if ((expression_ptr = table_it->getSortingKeyAST()))
                        res_columns[res_index++]->insert(queryToString(expression_ptr));
                    else
                        res_columns[res_index++]->insertDefault();
                }

                if (columns_mask[src_index++])
                {
                    if ((expression_ptr = table_it->getPrimaryKeyAST()))
                        res_columns[res_index++]->insert(queryToString(expression_ptr));
                    else
                        res_columns[res_index++]->insertDefault();
                }

                if (columns_mask[src_index++])
                {
                    if ((expression_ptr = table_it->getSamplingKeyAST()))
                        res_columns[res_index++]->insert(queryToString(expression_ptr));
                    else
                        res_columns[res_index++]->insertDefault();
                }
            }
        }

        res.setColumns(std::move(res_columns));
        return res;
    }
std::string ASTQueryWithOnCluster::getRewrittenQueryWithoutOnCluster(const std::string & new_database) const
{
    return queryToString(getRewrittenASTWithoutOnCluster(new_database));
}
 String queryToString(const ASTPtr & query)
 {
     return queryToString(*query);
 }