void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) { if (is_cancelled) throw Exception{"RemoteQueryExecutor service terminated", ErrorCodes::ABORTED}; std::string query = params.get("query"); bool flag = true; try { (void) executeQuery(query, context, true); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); flag = false; } writeBinary(flag, out); out.next(); }
void ThreadStatus::finalizePerformanceCounters() { if (performance_counters_finalized) return; performance_counters_finalized = true; updatePerformanceCounters(); try { if (global_context && query_context) { auto & settings = query_context->getSettingsRef(); if (settings.log_queries && settings.log_query_threads) if (auto thread_log = global_context->getQueryThreadLog()) logToQueryThreadLog(*thread_log); } } catch (...) { tryLogCurrentException(log); } }
void StorageDistributedDirectoryMonitor::run() { setThreadName("DistrDirMonitor"); std::unique_lock<std::mutex> lock{mutex}; const auto quit_requested = [this] { return quit; }; while (!quit_requested()) { auto do_sleep = true; try { do_sleep = !findFiles(); } catch (...) { do_sleep = true; ++error_count; sleep_time = std::min( std::chrono::milliseconds{Int64(default_sleep_time.count() * std::exp2(error_count))}, std::chrono::milliseconds{max_sleep_time}); tryLogCurrentException(getLoggerName().data()); }; if (do_sleep) cond.wait_for(lock, sleep_time, quit_requested); const auto now = std::chrono::system_clock::now(); if (now - last_decrease_time > decrease_error_count_period) { error_count /= 2; last_decrease_time = now; } } }
void StorageDistributed::reshardPartitions(ASTPtr query, const String & database_name, const Field & first_partition, const Field & last_partition, const WeightedZooKeeperPaths & weighted_zookeeper_paths, const ASTPtr & sharding_key_expr, bool do_copy, const Field & coordinator, const Settings & settings) { auto & resharding_worker = context.getReshardingWorker(); if (!resharding_worker.isStarted()) throw Exception{"Resharding background thread is not running", ErrorCodes::RESHARDING_NO_WORKER}; if (!coordinator.isNull()) throw Exception{"Use of COORDINATE WITH is forbidden in ALTER TABLE ... RESHARD" " queries for distributed tables", ErrorCodes::RESHARDING_INVALID_PARAMETERS}; std::string coordinator_id = resharding_worker.createCoordinator(cluster); std::atomic<bool> has_notified_error{false}; std::string dumped_coordinator_state; auto handle_exception = [&](const std::string & msg = "") { try { if (!has_notified_error) resharding_worker.setStatus(coordinator_id, ReshardingWorker::STATUS_ERROR, msg); dumped_coordinator_state = resharding_worker.dumpCoordinatorState(coordinator_id); resharding_worker.deleteCoordinator(coordinator_id); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } }; try { /// Создать запрос ALTER TABLE ... RESHARD [COPY] PARTITION ... COORDINATE WITH ... ASTPtr alter_query_ptr = std::make_shared<ASTAlterQuery>(); auto & alter_query = static_cast<ASTAlterQuery &>(*alter_query_ptr); alter_query.database = remote_database; alter_query.table = remote_table; alter_query.parameters.emplace_back(); ASTAlterQuery::Parameters & parameters = alter_query.parameters.back(); parameters.type = ASTAlterQuery::RESHARD_PARTITION; if (!first_partition.isNull()) parameters.partition = std::make_shared<ASTLiteral>(StringRange(), first_partition); if (!last_partition.isNull()) parameters.last_partition = std::make_shared<ASTLiteral>(StringRange(), last_partition); ASTPtr expr_list = std::make_shared<ASTExpressionList>(); for (const auto & entry : weighted_zookeeper_paths) { ASTPtr weighted_path_ptr = std::make_shared<ASTWeightedZooKeeperPath>(); auto & weighted_path = static_cast<ASTWeightedZooKeeperPath &>(*weighted_path_ptr); weighted_path.path = entry.first; weighted_path.weight = entry.second; expr_list->children.push_back(weighted_path_ptr); } parameters.weighted_zookeeper_paths = expr_list; parameters.sharding_key_expr = sharding_key_expr; parameters.do_copy = do_copy; parameters.coordinator = std::make_shared<ASTLiteral>(StringRange(), Field(coordinator_id)); resharding_worker.registerQuery(coordinator_id, queryToString(alter_query_ptr)); /** Функциональность shard_multiplexing не доделана - выключаем её. * (Потому что установка соединений с разными шардами в рамках одного потока выполняется не параллельно.) * Подробнее смотрите в https://███████████.yandex-team.ru/METR-18300 */ bool enable_shard_multiplexing = false; ClusterProxy::AlterQueryConstructor alter_query_constructor; BlockInputStreams streams = ClusterProxy::Query{alter_query_constructor, cluster, alter_query_ptr, context, settings, enable_shard_multiplexing}.execute(); /// This callback is called if an exception has occurred while attempting to read /// a block from a shard. This is to avoid a potential deadlock if other shards are /// waiting inside a barrier. Actually, even without this solution, we would avoid /// such a deadlock because we would eventually time out while trying to get remote /// blocks. Nevertheless this is not the ideal way of sorting out this issue since /// we would then not get to know the actual cause of the failure. auto exception_callback = [&resharding_worker, coordinator_id, &has_notified_error]() { try { resharding_worker.setStatus(coordinator_id, ReshardingWorker::STATUS_ERROR); has_notified_error = true; } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } }; streams[0] = std::make_shared<UnionBlockInputStream<>>( streams, nullptr, settings.max_distributed_connections, exception_callback); streams.resize(1); auto stream_ptr = dynamic_cast<IProfilingBlockInputStream *>(&*streams[0]); if (stream_ptr == nullptr) throw Exception{"StorageDistributed: Internal error", ErrorCodes::LOGICAL_ERROR}; auto & stream = *stream_ptr; stream.readPrefix(); while (!stream.isCancelled() && stream.read()) ; if (!stream.isCancelled()) stream.readSuffix(); } catch (const Exception & ex) { handle_exception(ex.message()); LOG_ERROR(log, dumped_coordinator_state); throw; } catch (const std::exception & ex) { handle_exception(ex.what()); LOG_ERROR(log, dumped_coordinator_state); throw; } catch (...) { handle_exception(); LOG_ERROR(log, dumped_coordinator_state); throw; } }
void AsynchronousMetrics::update() { { if (auto mark_cache = context.getMarkCache()) { set("MarkCacheBytes", mark_cache->weight()); set("MarkCacheFiles", mark_cache->count()); } } { if (auto uncompressed_cache = context.getUncompressedCache()) { set("UncompressedCacheBytes", uncompressed_cache->weight()); set("UncompressedCacheCells", uncompressed_cache->count()); } } { auto databases = context.getDatabases(); size_t max_queue_size = 0; size_t max_inserts_in_queue = 0; size_t max_merges_in_queue = 0; size_t sum_queue_size = 0; size_t sum_inserts_in_queue = 0; size_t sum_merges_in_queue = 0; size_t max_absolute_delay = 0; size_t max_relative_delay = 0; size_t max_part_count_for_partition = 0; for (const auto & db : databases) { for (auto iterator = db.second->getIterator(); iterator->isValid(); iterator->next()) { auto & table = iterator->table(); StorageMergeTree * table_merge_tree = typeid_cast<StorageMergeTree *>(table.get()); StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast<StorageReplicatedMergeTree *>(table.get()); if (table_replicated_merge_tree) { StorageReplicatedMergeTree::Status status; table_replicated_merge_tree->getStatus(status, false); calculateMaxAndSum(max_queue_size, sum_queue_size, status.queue.queue_size); calculateMaxAndSum(max_inserts_in_queue, sum_inserts_in_queue, status.queue.inserts_in_queue); calculateMaxAndSum(max_merges_in_queue, sum_merges_in_queue, status.queue.merges_in_queue); try { time_t absolute_delay = 0; time_t relative_delay = 0; table_replicated_merge_tree->getReplicaDelays(absolute_delay, relative_delay); calculateMax(max_absolute_delay, absolute_delay); calculateMax(max_relative_delay, relative_delay); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot get replica delay for table: " + backQuoteIfNeed(db.first) + "." + backQuoteIfNeed(iterator->name())); } calculateMax(max_part_count_for_partition, table_replicated_merge_tree->getData().getMaxPartsCountForMonth()); } if (table_merge_tree) { calculateMax(max_part_count_for_partition, table_merge_tree->getData().getMaxPartsCountForMonth()); } } } set("ReplicasMaxQueueSize", max_queue_size); set("ReplicasMaxInsertsInQueue", max_inserts_in_queue); set("ReplicasMaxMergesInQueue", max_merges_in_queue); set("ReplicasSumQueueSize", sum_queue_size); set("ReplicasSumInsertsInQueue", sum_inserts_in_queue); set("ReplicasSumMergesInQueue", sum_merges_in_queue); set("ReplicasMaxAbsoluteDelay", max_absolute_delay); set("ReplicasMaxRelativeDelay", max_relative_delay); set("MaxPartCountForPartition", max_part_count_for_partition); } #if USE_TCMALLOC { /// tcmalloc related metrics. Remove if you switch to different allocator. MallocExtension & malloc_extension = *MallocExtension::instance(); auto malloc_metrics = { "generic.current_allocated_bytes", "generic.heap_size", "tcmalloc.current_total_thread_cache_bytes", "tcmalloc.central_cache_free_bytes", "tcmalloc.transfer_cache_free_bytes", "tcmalloc.thread_cache_free_bytes", "tcmalloc.pageheap_free_bytes", "tcmalloc.pageheap_unmapped_bytes", }; for (auto malloc_metric : malloc_metrics) { size_t value = 0; if (malloc_extension.GetNumericProperty(malloc_metric, &value)) set(malloc_metric, value); } } #endif /// Add more metrics as you wish. }
SharedLibraryPtr Compiler::getOrCount( const std::string & key, UInt32 min_count_to_compile, const std::string & additional_compiler_flags, CodeGenerator get_code, ReadyCallback on_ready) { HashedKey hashed_key = getHash(key); std::lock_guard<std::mutex> lock(mutex); UInt32 count = ++counts[hashed_key]; /// Есть готовая открытая библиотека? Или, если библиотека в процессе компиляции, там будет nullptr. Libraries::iterator it = libraries.find(hashed_key); if (libraries.end() != it) { if (!it->second) LOG_INFO(log, "Library " << hashedKeyToFileName(hashed_key) << " is already compiling or compilation was failed."); /// TODO В этом случае, после окончания компиляции, не будет дёрнут колбэк. return it->second; } /// Есть файл с библиотекой, оставшийся от предыдущего запуска? std::string file_name = hashedKeyToFileName(hashed_key); if (files.count(file_name)) { std::string so_file_path = path + '/' + file_name + ".so"; LOG_INFO(log, "Loading existing library " << so_file_path); SharedLibraryPtr lib(new SharedLibrary(so_file_path)); libraries[hashed_key] = lib; return lib; } /// Достигнуто ли min_count_to_compile? if (count >= min_count_to_compile) { /// Значение min_count_to_compile, равное нулю, обозначает необходимость синхронной компиляции. /// Есть ли свободные потоки. if (min_count_to_compile == 0 || pool.active() < pool.size()) { /// Обозначает, что библиотека в процессе компиляции. libraries[hashed_key] = nullptr; LOG_INFO(log, "Compiling code " << file_name << ", key: " << key); if (min_count_to_compile == 0) { { ext::unlock_guard<std::mutex> unlock(mutex); compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); } return libraries[hashed_key]; } else { pool.schedule([=] { try { compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); } catch (...) { tryLogCurrentException("Compiler"); } }); } } else LOG_INFO(log, "All threads are busy."); } return nullptr; }
void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) { try { HTMLForm params(request); /// Даже в случае, когда отставание небольшое, выводить подробную информацию об отставании. bool verbose = params.get("verbose", "") == "1"; const MergeTreeSettings & settings = context.getMergeTreeSettings(); bool ok = true; std::stringstream message; auto databases = context.getDatabases(); /// Перебираем все реплицируемые таблицы. for (const auto & db : databases) { for (auto iterator = db.second->getIterator(); iterator->isValid(); iterator->next()) { auto & table = iterator->table(); StorageReplicatedMergeTree * table_replicated = typeid_cast<StorageReplicatedMergeTree *>(table.get()); if (!table_replicated) continue; time_t absolute_delay = 0; time_t relative_delay = 0; table_replicated->getReplicaDelays(absolute_delay, relative_delay); if ((settings.min_absolute_delay_to_close && absolute_delay >= static_cast<time_t>(settings.min_absolute_delay_to_close)) || (settings.min_relative_delay_to_close && relative_delay >= static_cast<time_t>(settings.min_relative_delay_to_close))) ok = false; message << backQuoteIfNeed(db.first) << "." << backQuoteIfNeed(iterator->name()) << ":\tAbsolute delay: " << absolute_delay << ". Relative delay: " << relative_delay << ".\n"; } } setResponseDefaultHeaders(response); if (ok && !verbose) { const char * data = "Ok.\n"; response.sendBuffer(data, strlen(data)); } else { response.send() << message.rdbuf(); } } catch (...) { tryLogCurrentException("ReplicasStatusHandler"); try { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) { /// Ещё ничего не отправляли, и даже не знаем, нужно ли сжимать ответ. response.send() << getCurrentExceptionMessage(false) << std::endl; } } catch (...) { LOG_ERROR((&Logger::get("ReplicasStatusHandler")), "Cannot send exception to client"); } } }
void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) { Poco::Net::HTMLForm params(request, request.stream()); LOG_TRACE(log, "Request URI: " + request.getURI()); auto process_error = [&response, this](const std::string & message) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) response.send() << message << std::endl; LOG_WARNING(log, message); }; if (!params.has("query")) { process_error("No 'query' in request body"); return; } if (!params.has("columns")) { process_error("No 'columns' in request URL"); return; } if (!params.has("connection_string")) { process_error("No 'connection_string' in request URL"); return; } UInt64 max_block_size = DEFAULT_BLOCK_SIZE; if (params.has("max_block_size")) { std::string max_block_size_str = params.get("max_block_size", ""); if (max_block_size_str.empty()) { process_error("Empty max_block_size specified"); return; } max_block_size = parse<size_t>(max_block_size_str); } std::string columns = params.get("columns"); std::unique_ptr<Block> sample_block; try { sample_block = parseColumns(std::move(columns)); } catch (const Exception & ex) { process_error("Invalid 'columns' parameter in request body '" + ex.message() + "'"); LOG_WARNING(log, ex.getStackTrace().toString()); return; } std::string format = params.get("format", "RowBinary"); std::string query = params.get("query"); LOG_TRACE(log, "Query: " << query); std::string connection_string = params.get("connection_string"); LOG_TRACE(log, "Connection string: '" << connection_string << "'"); WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); try { BlockOutputStreamPtr writer = FormatFactory::instance().getOutput(format, out, *sample_block, *context); auto pool = getPool(connection_string); ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size); copyData(inp, *writer); } catch (...) { auto message = getCurrentExceptionMessage(true); response.setStatusAndReason( Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, bacause of too soon response sending writeStringBinary(message, out); tryLogCurrentException(log); } }
void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed) { std::lock_guard lock(reload_mutex); FilesChangesTracker new_files = getNewFileList(); if (force || need_reload_from_zk || new_files.isDifferOrNewerThan(files)) { ConfigProcessor config_processor(path); ConfigProcessor::LoadedConfig loaded_config; try { LOG_DEBUG(log, "Loading config `" << path << "'"); loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true); if (loaded_config.has_zk_includes) loaded_config = config_processor.loadConfigWithZooKeeperIncludes( zk_node_cache, zk_changed_event, fallback_to_preprocessed); } catch (const Coordination::Exception & e) { if (Coordination::isHardwareError(e.code)) need_reload_from_zk = true; if (throw_on_error) throw; tryLogCurrentException(log, "ZooKeeper error when loading config from `" + path + "'"); return; } catch (...) { if (throw_on_error) throw; tryLogCurrentException(log, "Error loading config from `" + path + "'"); return; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); /** We should remember last modification time if and only if config was sucessfully loaded * Otherwise a race condition could occur during config files update: * File is contain raw (and non-valid) data, therefore config is not applied. * When file has been written (and contain valid data), we don't load new data since modification time remains the same. */ if (!loaded_config.loaded_from_preprocessed) { files = std::move(new_files); need_reload_from_zk = false; } try { updater(loaded_config.configuration); } catch (...) { if (throw_on_error) throw; tryLogCurrentException(log, "Error updating configuration from `" + path + "' config."); } } }
void BackgroundProcessingPool::threadFunction() { setThreadName("BackgrProcPool"); MemoryTracker memory_tracker; memory_tracker.setMetric(CurrentMetrics::MemoryTrackingInBackgroundProcessingPool); current_memory_tracker = &memory_tracker; std::mt19937 rng(reinterpret_cast<intptr_t>(&rng)); std::this_thread::sleep_for(std::chrono::duration<double>(std::uniform_real_distribution<double>(0, sleep_seconds_random_part)(rng))); while (!shutdown) { bool done_work = false; TaskHandle task; try { Poco::Timestamp min_time; { std::unique_lock<std::mutex> lock(tasks_mutex); if (!tasks.empty()) { for (const auto & time_handle : tasks) { if (!time_handle.second->removed) { min_time = time_handle.first; task = time_handle.second; break; } } } } if (shutdown) break; if (!task) { std::unique_lock<std::mutex> lock(tasks_mutex); wake_event.wait_for(lock, std::chrono::duration<double>(sleep_seconds + std::uniform_real_distribution<double>(0, sleep_seconds_random_part)(rng))); continue; } /// No tasks ready for execution. Poco::Timestamp current_time; if (min_time > current_time) { std::unique_lock<std::mutex> lock(tasks_mutex); wake_event.wait_for(lock, std::chrono::microseconds( min_time - current_time + std::uniform_int_distribution<uint64_t>(0, sleep_seconds_random_part * 1000000)(rng))); } Poco::ScopedReadRWLock rlock(task->rwlock); if (task->removed) continue; { CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundPoolTask}; done_work = task->function(); } } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } if (shutdown) break; /// If task has done work, it could be executed again immediately. /// If not, add delay before next run. Poco::Timestamp next_time_to_execute = Poco::Timestamp() + (done_work ? 0 : sleep_seconds * 1000000); { std::unique_lock<std::mutex> lock(tasks_mutex); if (task->removed) continue; tasks.erase(task->iterator); task->iterator = tasks.emplace(next_time_to_execute, task); } } current_memory_tracker = nullptr; }
SharedLibraryPtr Compiler::getOrCount( const std::string & key, UInt32 min_count_to_compile, const std::string & additional_compiler_flags, CodeGenerator get_code, ReadyCallback on_ready) { HashedKey hashed_key = getHash(key); std::lock_guard<std::mutex> lock(mutex); UInt32 count = ++counts[hashed_key]; /// Is there a ready open library? Or, if the library is in the process of compiling, there will be nullptr. Libraries::iterator it = libraries.find(hashed_key); if (libraries.end() != it) { if (!it->second) LOG_INFO(log, "Library " << hashedKeyToFileName(hashed_key) << " is already compiling or compilation was failed."); /// TODO In this case, after the compilation is finished, the callback will not be called. return it->second; } /// Is there a file with the library left over from the previous launch? std::string file_name = hashedKeyToFileName(hashed_key); if (files.count(file_name)) { std::string so_file_path = path + '/' + file_name + ".so"; LOG_INFO(log, "Loading existing library " << so_file_path); SharedLibraryPtr lib(new SharedLibrary(so_file_path)); libraries[hashed_key] = lib; return lib; } /// Has min_count_to_compile been reached? if (count >= min_count_to_compile) { /// The min_count_to_compile value of zero indicates the need for synchronous compilation. /// Are there any free threads? if (min_count_to_compile == 0 || pool.active() < pool.size()) { /// Indicates that the library is in the process of compiling. libraries[hashed_key] = nullptr; LOG_INFO(log, "Compiling code " << file_name << ", key: " << key); if (min_count_to_compile == 0) { { ext::unlock_guard<std::mutex> unlock(mutex); compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); } return libraries[hashed_key]; } else { pool.schedule([=] { try { compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready); } catch (...) { tryLogCurrentException("Compiler"); } }); } } else LOG_INFO(log, "All threads are busy."); } return nullptr; }
void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) { LOG_WARNING(log, "Checking part " << part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); /// If the part is still in the PreCommitted -> Committed transition, it is not lost /// and there is no need to go searching for it on other replicas. To definitely find the needed part /// if it exists (or a part containing it) we first search among the PreCommitted parts. auto part = storage.data.getPartIfExists(part_name, {MergeTreeDataPartState::PreCommitted}); if (!part) part = storage.data.getActiveContainingPart(part_name); /// We do not have this or a covering part. if (!part) { searchForMissingPart(part_name); } /// We have this part, and it's active. We will check whether we need this part and whether it has the right data. else if (part->name == part_name) { auto zookeeper = storage.getZooKeeper(); auto table_lock = storage.lockStructure(false, __PRETTY_FUNCTION__); /// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper. if (zookeeper->exists(storage.replica_path + "/parts/" + part_name)) { LOG_WARNING(log, "Checking data of part " << part_name << "."); try { auto zk_checksums = MinimalisticDataPartChecksums::deserializeFrom( zookeeper->get(storage.replica_path + "/parts/" + part_name + "/checksums")); zk_checksums.checkEqual(part->checksums, true); auto zk_columns = NamesAndTypesList::parse( zookeeper->get(storage.replica_path + "/parts/" + part_name + "/columns")); if (part->columns != zk_columns) throw Exception("Columns of local part " + part_name + " are different from ZooKeeper"); checkDataPart( storage.data.getFullPath() + part_name, storage.data.index_granularity, true, storage.data.primary_key_data_types, [this] { return need_stop.load(); }); if (need_stop) { LOG_INFO(log, "Checking part was cancelled."); return; } LOG_INFO(log, "Part " << part_name << " looks good."); } catch (const Exception & e) { /// TODO Better to check error code. tryLogCurrentException(log, __PRETTY_FUNCTION__); LOG_ERROR(log, "Part " << part_name << " looks broken. Removing it and queueing a fetch."); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); storage.removePartAndEnqueueFetch(part_name); /// Delete part locally. storage.data.forgetPartAndMoveToDetached(part, "broken_"); } } else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr)) { /// If the part is not in ZooKeeper, delete it locally. /// Probably, someone just wrote down the part, and has not yet added to ZK. /// Therefore, delete only if the part is old (not very reliable). ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); LOG_ERROR(log, "Unexpected part " << part_name << " in filesystem. Removing."); storage.data.forgetPartAndMoveToDetached(part, "unexpected_"); } else { /// TODO You need to make sure that the part is still checked after a while. /// Otherwise, it's possible that the part was not added to ZK, /// but remained in the filesystem and in a number of active parts. /// And then for a long time (before restarting), the data on the replicas will be different. LOG_TRACE(log, "Young part " << part_name << " with age " << (time(nullptr) - part->modification_time) << " seconds hasn't been added to ZooKeeper yet. It's ok."); } } else { /// If we have a covering part, ignore all the problems with this part. /// In the worst case, errors will still appear `old_parts_lifetime` seconds in error log until the part is removed as the old one. LOG_WARNING(log, "We have part " << part->name << " covering part " << part_name); } }
void ReplicatedMergeTreeAlterThread::run() { try { /** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns), * as well as a description of columns in local file with metadata (storage.data.getColumnsList()). * * If these descriptions are different - you need to do ALTER. * * If stored version of the node (columns_version) differs from the version in ZK, * then the description of the columns in ZK does not necessarily differ from the local * - this can happen with a loop from ALTER-s, which as a whole, does not change anything. * In this case, you need to update the stored version number, * and also check the structure of parts, and, if necessary, make ALTER. * * Recorded version number needs to be updated after updating the metadata, under lock. * This version number is checked against the current one for INSERT. * That is, we make sure to insert blocks with the correct structure. * * When the server starts, previous ALTER might not have been completed. * Therefore, for the first time, regardless of the changes, we check the structure of all parts, * (Example: /clickhouse/tables/02-06/visits/replicas/example02-06-1.yandex.ru/parts/20140806_20140831_131664_134988_3296/columns) * and do ALTER if necessary. * * TODO: Too complicated, rewrite everything. */ auto zookeeper = storage.getZooKeeper(); String columns_path = storage.zookeeper_path + "/columns"; auto columns_znode = zk_node_cache.get(columns_path, task->getWatchCallback()); if (!columns_znode.exists) throw Exception(columns_path + " doesn't exist", ErrorCodes::NOT_FOUND_NODE); int32_t columns_version = columns_znode.stat.version; String metadata_path = storage.zookeeper_path + "/metadata"; auto metadata_znode = zk_node_cache.get(metadata_path, task->getWatchCallback()); if (!metadata_znode.exists) throw Exception(metadata_path + " doesn't exist", ErrorCodes::NOT_FOUND_NODE); int32_t metadata_version = metadata_znode.stat.version; const bool changed_columns_version = (columns_version != storage.columns_version); const bool changed_metadata_version = (metadata_version != storage.metadata_version); if (!(changed_columns_version || changed_metadata_version || force_recheck_parts)) return; const String & columns_str = columns_znode.contents; auto columns_in_zk = ColumnsDescription::parse(columns_str); const String & metadata_str = metadata_znode.contents; auto metadata_in_zk = ReplicatedMergeTreeTableMetadata::parse(metadata_str); auto metadata_diff = ReplicatedMergeTreeTableMetadata(storage.data).checkAndFindDiff(metadata_in_zk, /* allow_alter = */ true); /// If you need to lock table structure, then suspend merges. ActionLock merge_blocker = storage.merger_mutator.actions_blocker.cancel(); MergeTreeData::DataParts parts; /// If metadata nodes have changed, we will update table structure locally. if (changed_columns_version || changed_metadata_version) { /// Temporarily cancel part checks to avoid locking for long time. auto temporarily_stop_part_checks = storage.part_check_thread.temporarilyStop(); /// Temporarily cancel parts sending ActionLock data_parts_exchange_blocker; if (storage.data_parts_exchange_endpoint_holder) data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->getBlocker().cancel(); /// Temporarily cancel part fetches auto fetches_blocker = storage.fetcher.blocker.cancel(); LOG_INFO(log, "Version of metadata nodes in ZooKeeper changed. Waiting for structure write lock."); auto table_lock = storage.lockExclusively(RWLockImpl::NO_QUERY); if (columns_in_zk == storage.getColumns() && metadata_diff.empty()) { LOG_INFO(log, "Metadata nodes changed in ZooKeeper, but their contents didn't change. " "Most probably it is a cyclic ALTER."); } else { LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); storage.setTableStructure(std::move(columns_in_zk), metadata_diff); LOG_INFO(log, "Applied changes to the metadata of the table."); } /// You need to get a list of parts under table lock to avoid race condition with merge. parts = storage.data.getDataParts(); storage.columns_version = columns_version; storage.metadata_version = metadata_version; } /// Update parts. if (changed_columns_version || force_recheck_parts) { auto table_lock = storage.lockStructureForShare(false, RWLockImpl::NO_QUERY); if (changed_columns_version) LOG_INFO(log, "ALTER-ing parts"); int changed_parts = 0; if (!changed_columns_version) parts = storage.data.getDataParts(); const auto columns_for_parts = storage.getColumns().getAllPhysical(); const auto indices_for_parts = storage.getIndicesDescription(); for (const MergeTreeData::DataPartPtr & part : parts) { /// Update the part and write result to temporary files. /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, /// node /flags/force_alter. auto transaction = storage.data.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false); if (!transaction) continue; storage.updatePartHeaderInZooKeeperAndCommit(zookeeper, *transaction); ++changed_parts; } /// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN storage.data.recalculateColumnSizes(); if (changed_columns_version) { if (changed_parts != 0) LOG_INFO(log, "ALTER-ed " << changed_parts << " parts"); else LOG_INFO(log, "No parts ALTER-ed"); } } /// Update metadata ZK nodes for a specific replica. if (changed_columns_version || force_recheck_parts) zookeeper->set(storage.replica_path + "/columns", columns_str); if (changed_metadata_version || force_recheck_parts) zookeeper->set(storage.replica_path + "/metadata", metadata_str); force_recheck_parts = false; } catch (const Coordination::Exception & e) { tryLogCurrentException(log, __PRETTY_FUNCTION__); if (e.code == Coordination::ZSESSIONEXPIRED) return; force_recheck_parts = true; task->scheduleAfter(ALTER_ERROR_SLEEP_MS); } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); force_recheck_parts = true; task->scheduleAfter(ALTER_ERROR_SLEEP_MS); } }
void ExternalLoader::reloadAndUpdate(bool throw_on_error) { reloadFromConfigFiles(throw_on_error); /// list of recreated loadable objects to perform delayed removal from unordered_map std::list<std::string> recreated_failed_loadable_objects; std::unique_lock<std::mutex> all_lock(all_mutex); /// retry loading failed loadable objects for (auto & failed_loadable_object : failed_loadable_objects) { if (std::chrono::system_clock::now() < failed_loadable_object.second.next_attempt_time) continue; const auto & name = failed_loadable_object.first; try { auto loadable_ptr = failed_loadable_object.second.loadable->clone(); if (const auto exception_ptr = loadable_ptr->getCreationException()) { /// recalculate next attempt time std::uniform_int_distribution<UInt64> distribution( 0, static_cast<UInt64>(std::exp2(failed_loadable_object.second.error_count))); std::chrono::seconds delay(std::min<UInt64>( update_settings.backoff_max_sec, update_settings.backoff_initial_sec + distribution(rnd_engine))); failed_loadable_object.second.next_attempt_time = std::chrono::system_clock::now() + delay; ++failed_loadable_object.second.error_count; std::rethrow_exception(exception_ptr); } else { const std::lock_guard<std::mutex> lock{map_mutex}; const auto & lifetime = loadable_ptr->getLifetime(); std::uniform_int_distribution<UInt64> distribution{lifetime.min_sec, lifetime.max_sec}; update_times[name] = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; const auto dict_it = loadable_objects.find(name); dict_it->second.loadable.reset(); dict_it->second.loadable = std::move(loadable_ptr); /// clear stored exception on success dict_it->second.exception = std::exception_ptr{}; recreated_failed_loadable_objects.push_back(name); } } catch (...) { tryLogCurrentException(log, "Failed reloading '" + name + "' " + object_name); if (throw_on_error) throw; } } /// do not undertake further attempts to recreate these loadable objects for (const auto & name : recreated_failed_loadable_objects) failed_loadable_objects.erase(name); /// periodic update for (auto & loadable_object : loadable_objects) { const auto & name = loadable_object.first; try { /// If the loadable objects failed to load or even failed to initialize from the config. if (!loadable_object.second.loadable) continue; auto current = loadable_object.second.loadable; const auto & lifetime = current->getLifetime(); /// do not update loadable objects with zero as lifetime if (lifetime.min_sec == 0 || lifetime.max_sec == 0) continue; if (current->supportUpdates()) { auto & update_time = update_times[current->getName()]; /// check that timeout has passed if (std::chrono::system_clock::now() < update_time) continue; SCOPE_EXIT({ /// calculate next update time std::uniform_int_distribution<UInt64> distribution{lifetime.min_sec, lifetime.max_sec}; update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; }); /// check source modified if (current->isModified()) { /// create new version of loadable object auto new_version = current->clone(); if (const auto exception_ptr = new_version->getCreationException()) std::rethrow_exception(exception_ptr); loadable_object.second.loadable.reset(); loadable_object.second.loadable = std::move(new_version); } } /// erase stored exception on success loadable_object.second.exception = std::exception_ptr{}; }
void tryLogCurrentException(const char * log_name, const std::string & start_of_message) { tryLogCurrentException(&Logger::get(log_name), start_of_message); }
void ExternalDictionaries::reloadImpl(const bool throw_on_error) { const auto config_paths = getDictionariesConfigPaths(Poco::Util::Application::instance().config()); for (const auto & config_path : config_paths) { try { reloadFromFile(config_path, throw_on_error); } catch (...) { tryLogCurrentException(log, "reloadFromFile has thrown while reading from " + config_path); if (throw_on_error) throw; } } /// list of recreated dictionaries to perform delayed removal from unordered_map std::list<std::string> recreated_failed_dictionaries; /// retry loading failed dictionaries for (auto & failed_dictionary : failed_dictionaries) { if (std::chrono::system_clock::now() < failed_dictionary.second.next_attempt_time) continue; const auto & name = failed_dictionary.first; try { auto dict_ptr = failed_dictionary.second.dict->clone(); if (const auto exception_ptr = dict_ptr->getCreationException()) { /// recalculate next attempt time std::uniform_int_distribution<UInt64> distribution( 0, std::exp2(failed_dictionary.second.error_count)); failed_dictionary.second.next_attempt_time = std::chrono::system_clock::now() + std::chrono::seconds{ std::min<UInt64>(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine))}; ++failed_dictionary.second.error_count; std::rethrow_exception(exception_ptr); } else { const std::lock_guard<std::mutex> lock{dictionaries_mutex}; const auto & lifetime = dict_ptr->getLifetime(); std::uniform_int_distribution<UInt64> distribution{lifetime.min_sec, lifetime.max_sec}; update_times[name] = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; const auto dict_it = dictionaries.find(name); if (dict_it->second.dict) dict_it->second.dict->set(dict_ptr.release()); else dict_it->second.dict = std::make_shared<MultiVersion<IDictionaryBase>>(dict_ptr.release()); /// erase stored exception on success dict_it->second.exception = std::exception_ptr{}; recreated_failed_dictionaries.push_back(name); } } catch (...) { tryLogCurrentException(log, "Failed reloading '" + name + "' dictionary"); if (throw_on_error) throw; } } /// do not undertake further attempts to recreate these dictionaries for (const auto & name : recreated_failed_dictionaries) failed_dictionaries.erase(name); /// periodic update for (auto & dictionary : dictionaries) { const auto & name = dictionary.first; try { /// If the dictionary failed to load or even failed to initialize from the config. if (!dictionary.second.dict) continue; auto current = dictionary.second.dict->get(); const auto & lifetime = current->getLifetime(); /// do not update dictionaries with zero as lifetime if (lifetime.min_sec == 0 || lifetime.max_sec == 0) continue; /// update only non-cached dictionaries if (!current->isCached()) { auto & update_time = update_times[current->getName()]; /// check that timeout has passed if (std::chrono::system_clock::now() < update_time) continue; SCOPE_EXIT({ /// calculate next update time std::uniform_int_distribution<UInt64> distribution{lifetime.min_sec, lifetime.max_sec}; update_time = std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}; }); /// check source modified if (current->getSource()->isModified()) { /// create new version of dictionary auto new_version = current->clone(); if (const auto exception_ptr = new_version->getCreationException()) std::rethrow_exception(exception_ptr); dictionary.second.dict->set(new_version.release()); } } /// erase stored exception on success dictionary.second.exception = std::exception_ptr{}; }
void ReplicatedMergeTreePartCheckThread::run() { if (need_stop) return; try { time_t current_time = time(nullptr); /// Take part from the queue for verification. PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated time_t min_check_time = std::numeric_limits<time_t>::max(); { std::lock_guard<std::mutex> lock(parts_mutex); if (parts_queue.empty()) { if (!parts_set.empty()) { LOG_ERROR(log, "Non-empty parts_set with empty parts_queue. This is a bug."); parts_set.clear(); } } else { for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) { if (it->second <= current_time) { selected = it; break; } if (it->second < min_check_time) min_check_time = it->second; } } } if (selected == parts_queue.end()) return; checkPart(selected->first); if (need_stop) return; /// Remove the part from check queue. { std::lock_guard<std::mutex> lock(parts_mutex); if (parts_queue.empty()) { LOG_ERROR(log, "Someone erased cheking part from parts_queue. This is a bug."); } else { parts_set.erase(selected->first); parts_queue.erase(selected); } } task->schedule(); } catch (const zkutil::KeeperException & e) { tryLogCurrentException(log, __PRETTY_FUNCTION__); if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) return; task->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); task->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); } }
void ReplicatedMergeTreeRestartingThread::run() { constexpr auto retry_period_ms = 10 * 1000; /// Периодичность проверки истечения сессии в ZK. time_t check_period_ms = 60 * 1000; /// Периодичность проверки величины отставания реплики. if (check_period_ms > static_cast<time_t>(storage.data.settings.check_delay_period) * 1000) check_period_ms = storage.data.settings.check_delay_period * 1000; setThreadName("ReplMTRestart"); try { bool first_time = true; /// Активация реплики в первый раз. bool need_restart = false; /// Перезапуск по собственной инициативе, чтобы отдать лидерство. time_t prev_time_of_check_delay = 0; /// Запуск реплики при старте сервера/создании таблицы. Перезапуск реплики при истечении сессии с ZK. while (!need_stop) { if (first_time || need_restart || storage.getZooKeeper()->expired()) { if (first_time) { LOG_DEBUG(log, "Activating replica."); } else { if (need_restart) LOG_WARNING(log, "Will reactivate replica."); else LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); if (!storage.is_readonly) CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); storage.is_readonly = true; partialShutdown(); } while (true) { try { storage.setZooKeeper(storage.context.getZooKeeper()); } catch (const zkutil::KeeperException & e) { /// Исключение при попытке zookeeper_init обычно бывает, если не работает DNS. Будем пытаться сделать это заново. tryLogCurrentException(__PRETTY_FUNCTION__); wakeup_event.tryWait(retry_period_ms); continue; } if (!need_stop && !tryStartup()) { wakeup_event.tryWait(retry_period_ms); continue; } break; } if (storage.is_readonly) CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); storage.is_readonly = false; first_time = false; need_restart = false; } time_t current_time = time(0); if (current_time >= prev_time_of_check_delay + static_cast<time_t>(storage.data.settings.check_delay_period)) { /// Выясняем отставания реплик. time_t absolute_delay = 0; time_t relative_delay = 0; bool error = false; try { storage.getReplicaDelays(absolute_delay, relative_delay); LOG_TRACE(log, "Absolute delay: " << absolute_delay << ". Relative delay: " << relative_delay << "."); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot get replica delays"); error = true; } prev_time_of_check_delay = current_time; /// Уступаем лидерство, если относительное отставание больше порога. if (storage.is_leader_node && (error || relative_delay > static_cast<time_t>(storage.data.settings.min_relative_delay_to_yield_leadership))) { if (error) LOG_INFO(log, "Will yield leadership."); else LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold (" << storage.data.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership."); ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership); need_restart = true; continue; } } wakeup_event.tryWait(check_period_ms); } } catch (...) { tryLogCurrentException("StorageReplicatedMergeTree::restartingThread"); LOG_ERROR(log, "Unexpected exception in restartingThread. The storage will be readonly until server restart."); goReadOnlyPermanently(); LOG_DEBUG(log, "Restarting thread finished"); return; } try { storage.endpoint_holder->cancel(); storage.endpoint_holder = nullptr; storage.disk_space_monitor_endpoint_holder->cancel(); storage.disk_space_monitor_endpoint_holder = nullptr; storage.sharded_partition_uploader_endpoint_holder->cancel(); storage.sharded_partition_uploader_endpoint_holder = nullptr; storage.remote_query_executor_endpoint_holder->cancel(); storage.remote_query_executor_endpoint_holder = nullptr; storage.remote_part_checker_endpoint_holder->cancel(); storage.remote_part_checker_endpoint_holder = nullptr; partialShutdown(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } LOG_DEBUG(log, "Restarting thread finished"); }
void ReplicatedMergeTreeAlterThread::run() { setThreadName("ReplMTAlter"); bool force_recheck_parts = true; while (!need_stop) { try { /** Имеем описание столбцов в ZooKeeper, общее для всех реплик (Пример: /clickhouse/tables/02-06/visits/columns), * а также описание столбцов в локальном файле с метаданными (storage.data.getColumnsList()). * * Если эти описания отличаются - нужно сделать ALTER. * * Если запомненная версия ноды (columns_version) отличается от версии в ZK, * то описание столбцов в ZK не обязательно отличается от локального * - такое может быть при цикле из ALTER-ов, который в целом, ничего не меняет. * В этом случае, надо обновить запомненный номер версии, * а также всё-равно проверить структуру кусков, и, при необходимости, сделать ALTER. * * Запомненный номер версии нужно обновить после обновления метаданных, под блокировкой. * Этот номер версии проверяется на соответствие актуальному при INSERT-е. * То есть, так добиваемся, чтобы вставлялись блоки с правильной структурой. * * При старте сервера, мог быть не завершён предыдущий ALTER. * Поэтому, в первый раз, независимо от изменений, проверяем структуру всех part-ов, * (Пример: /clickhouse/tables/02-06/visits/replicas/example02-06-1.yandex.ru/parts/20140806_20140831_131664_134988_3296/columns) * и делаем ALTER, если необходимо. * * TODO: Слишком сложно, всё переделать. */ auto zookeeper = storage.getZooKeeper(); zkutil::Stat stat; const String columns_str = zookeeper->get(storage.zookeeper_path + "/columns", &stat, wakeup_event); auto columns_desc = ColumnsDescription<true>::parse(columns_str); auto & columns = columns_desc.columns; auto & materialized_columns = columns_desc.materialized; auto & alias_columns = columns_desc.alias; auto & column_defaults = columns_desc.defaults; bool changed_version = (stat.version != storage.columns_version); { /// Если потребуется блокировать структуру таблицы, то приостановим мерджи. std::unique_ptr<MergeTreeMergeBlocker> merge_blocker; std::unique_ptr<MergeTreeMergeBlocker> unreplicated_merge_blocker; if (changed_version || force_recheck_parts) { merge_blocker = std::make_unique<MergeTreeMergeBlocker>(storage.merger); if (storage.unreplicated_merger) unreplicated_merge_blocker = std::make_unique<MergeTreeMergeBlocker>(*storage.unreplicated_merger); } MergeTreeData::DataParts parts; /// Если описание столбцов изменилось, обновим структуру таблицы локально. if (changed_version) { LOG_INFO(log, "Changed version of 'columns' node in ZooKeeper. Waiting for structure write lock."); auto table_lock = storage.lockStructureForAlter(); const auto columns_changed = columns != storage.data.getColumnsListNonMaterialized(); const auto materialized_columns_changed = materialized_columns != storage.data.materialized_columns; const auto alias_columns_changed = alias_columns != storage.data.alias_columns; const auto column_defaults_changed = column_defaults != storage.data.column_defaults; if (columns_changed || materialized_columns_changed || alias_columns_changed || column_defaults_changed) { LOG_INFO(log, "Columns list changed in ZooKeeper. Applying changes locally."); storage.context.getDatabase(storage.database_name)->alterTable( storage.context, storage.table_name, columns, materialized_columns, alias_columns, column_defaults, {}); if (columns_changed) { storage.data.setColumnsList(columns); if (storage.unreplicated_data) storage.unreplicated_data->setColumnsList(columns); } if (materialized_columns_changed) { storage.materialized_columns = materialized_columns; storage.data.materialized_columns = std::move(materialized_columns); } if (alias_columns_changed) { storage.alias_columns = alias_columns; storage.data.alias_columns = std::move(alias_columns); } if (column_defaults_changed) { storage.column_defaults = column_defaults; storage.data.column_defaults = std::move(column_defaults); } LOG_INFO(log, "Applied changes to table."); } else { LOG_INFO(log, "Columns version changed in ZooKeeper, but data wasn't changed. It's like cyclic ALTERs."); } /// Нужно получить список кусков под блокировкой таблицы, чтобы избежать race condition с мерджем. parts = storage.data.getDataParts(); storage.columns_version = stat.version; } /// Обновим куски. if (changed_version || force_recheck_parts) { auto table_lock = storage.lockStructure(false); if (changed_version) LOG_INFO(log, "ALTER-ing parts"); int changed_parts = 0; if (!changed_version) parts = storage.data.getDataParts(); const auto columns_plus_materialized = storage.data.getColumnsList(); for (const MergeTreeData::DataPartPtr & part : parts) { /// Обновим кусок и запишем результат во временные файлы. /// TODO: Можно пропускать проверку на слишком большие изменения, если в ZooKeeper есть, например, /// нода /flags/force_alter. auto transaction = storage.data.alterDataPart( part, columns_plus_materialized, storage.data.primary_expr_ast, false); if (!transaction) continue; ++changed_parts; /// Обновим метаданные куска в ZooKeeper. zkutil::Ops ops; ops.push_back(new zkutil::Op::SetData( storage.replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1)); ops.push_back(new zkutil::Op::SetData( storage.replica_path + "/parts/" + part->name + "/checksums", transaction->getNewChecksums().toString(), -1)); try { zookeeper->multi(ops); } catch (const zkutil::KeeperException & e) { /// Куска не существует в ZK. Добавим в очередь для проверки - может быть, кусок лишний, и его надо убрать локально. if (e.code == ZNONODE) storage.enqueuePartForCheck(part->name); throw; } /// Применим изменения файлов. transaction->commit(); } /// То же самое для нереплицируемых данных. if (storage.unreplicated_data) { parts = storage.unreplicated_data->getDataParts(); for (const MergeTreeData::DataPartPtr & part : parts) { auto transaction = storage.unreplicated_data->alterDataPart( part, columns_plus_materialized, storage.data.primary_expr_ast, false); if (!transaction) continue; ++changed_parts; transaction->commit(); } } /// Список столбцов для конкретной реплики. zookeeper->set(storage.replica_path + "/columns", columns_str); if (changed_version) { if (changed_parts != 0) LOG_INFO(log, "ALTER-ed " << changed_parts << " parts"); else LOG_INFO(log, "No parts ALTER-ed"); } force_recheck_parts = false; } /// Важно, что уничтожается parts и merge_blocker перед wait-ом. } wakeup_event->wait(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); force_recheck_parts = true; wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); } } LOG_DEBUG(log, "Alter thread finished"); }