MergeTreeSequentialBlockInputStream::MergeTreeSequentialBlockInputStream( const MergeTreeData & storage_, const MergeTreeData::DataPartPtr & data_part_, Names columns_to_read_, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet) : storage(storage_) , data_part(data_part_) , part_columns_lock(data_part->columns_lock) , columns_to_read(columns_to_read_) , read_with_direct_io(read_with_direct_io_) , mark_cache(storage.global_context.getMarkCache()) { if (!quiet) { std::stringstream message; message << "Reading " << data_part->marks_count << " marks from part " << data_part->name << ", total " << data_part->rows_count << " rows starting from the beginning of the part, columns: "; for (size_t i = 0, size = columns_to_read.size(); i < size; ++i) message << (i == 0 ? "" : ", ") << columns_to_read[i]; LOG_TRACE(log, message.rdbuf()); } addTotalRowsApprox(data_part->rows_count); header = storage.getSampleBlockForColumns(columns_to_read); fixHeader(header); /// Add columns because we don't want to read empty blocks injectRequiredColumns(storage, data_part, columns_to_read); NamesAndTypesList columns_for_reader; if (take_column_types_from_storage) { const NamesAndTypesList & physical_columns = storage.getColumns().getAllPhysical(); columns_for_reader = physical_columns.addTypes(columns_to_read); } else { /// take columns from data_part columns_for_reader = data_part->columns.addTypes(columns_to_read); } reader = std::make_unique<MergeTreeReader>( data_part->getFullPath(), data_part, columns_for_reader, /* uncompressed_cache = */ nullptr, mark_cache.get(), /* save_marks_in_cache = */ false, storage, MarkRanges{MarkRange(0, data_part->marks_count)}, /* bytes to use AIO (this is hack) */ read_with_direct_io ? 1UL : std::numeric_limits<size_t>::max(), DBMS_DEFAULT_BUFFER_SIZE); }
std::vector<std::size_t> MergeTreeReadPool::fillPerPartInfo( RangesInDataParts & parts, const ExpressionActionsPtr & prewhere_actions, const String & prewhere_column_name, const bool check_columns) { std::vector<std::size_t> per_part_sum_marks; for (const auto i : ext::range(0, parts.size())) { auto & part = parts[i]; /// Read marks for every data part. size_t sum_marks = 0; /// Ranges are in right-to-left order, due to 'reverse' in MergeTreeDataSelectExecutor. for (const auto & range : part.ranges) sum_marks += range.end - range.begin; per_part_sum_marks.push_back(sum_marks); per_part_columns_lock.push_back(std::make_unique<Poco::ScopedReadRWLock>( part.data_part->columns_lock)); /// inject column names required for DEFAULT evaluation in current part auto required_column_names = column_names; const auto injected_columns = injectRequiredColumns(part.data_part, required_column_names); auto should_reoder = !injected_columns.empty(); Names required_pre_column_names; if (prewhere_actions) { /// collect columns required for PREWHERE evaluation required_pre_column_names = prewhere_actions->getRequiredColumns(); /// there must be at least one column required for PREWHERE if (required_pre_column_names.empty()) required_pre_column_names.push_back(required_column_names[0]); /// PREWHERE columns may require some additional columns for DEFAULT evaluation const auto injected_pre_columns = injectRequiredColumns(part.data_part, required_pre_column_names); if (!injected_pre_columns.empty()) should_reoder = true; /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const NameSet pre_name_set{ std::begin(required_pre_column_names), std::end(required_pre_column_names) }; /** If expression in PREWHERE is not table column, then no need to return column with it to caller * (because storage is expected only to read table columns). */ per_part_remove_prewhere_column.push_back(0 == pre_name_set.count(prewhere_column_name)); Names post_column_names; for (const auto & name : required_column_names) if (!pre_name_set.count(name)) post_column_names.push_back(name); required_column_names = post_column_names; } else per_part_remove_prewhere_column.push_back(false); per_part_column_name_set.emplace_back(std::begin(required_column_names), std::end(required_column_names)); if (check_columns) { /** Under part->columns_lock check that all requested columns in part are of same type that in table. * This could be violated during ALTER MODIFY. */ if (!required_pre_column_names.empty()) data.check(part.data_part->columns, required_pre_column_names); if (!required_column_names.empty()) data.check(part.data_part->columns, required_column_names); per_part_pre_columns.push_back(data.getColumnsList().addTypes(required_pre_column_names)); per_part_columns.push_back(data.getColumnsList().addTypes(required_column_names)); } else { per_part_pre_columns.push_back(part.data_part->columns.addTypes(required_pre_column_names)); per_part_columns.push_back(part.data_part->columns.addTypes(required_column_names)); } per_part_should_reorder.push_back(should_reoder); this->parts.push_back({ part.data_part, part.part_index_in_query }); } return per_part_sum_marks; }
bool MergeTreeBlockInputStream::getNewTask() try { /// Produce only one task if (!is_first_task) { finish(); return false; } is_first_task = false; Names pre_column_names, column_names = ordered_names; bool remove_prewhere_column = false; /// inject columns required for defaults evaluation bool should_reorder = !injectRequiredColumns(storage, data_part, column_names).empty(); if (prewhere_actions) { pre_column_names = prewhere_actions->getRequiredColumns(); if (pre_column_names.empty()) pre_column_names.push_back(column_names[0]); const auto injected_pre_columns = injectRequiredColumns(storage, data_part, pre_column_names); if (!injected_pre_columns.empty()) should_reorder = true; const NameSet pre_name_set(pre_column_names.begin(), pre_column_names.end()); /// If the expression in PREWHERE is not a column of the table, you do not need to output a column with it /// (from storage expect to receive only the columns of the table). remove_prewhere_column = !pre_name_set.count(prewhere_column); Names post_column_names; for (const auto & name : column_names) if (!pre_name_set.count(name)) post_column_names.push_back(name); column_names = post_column_names; } /// will be used to distinguish between PREWHERE and WHERE columns when applying filter column_name_set = NameSet{column_names.begin(), column_names.end()}; if (check_columns) { /// Under owned_data_part->columns_lock we check that all requested columns are of the same type as in the table. /// This may be not true in case of ALTER MODIFY. if (!pre_column_names.empty()) storage.check(data_part->columns, pre_column_names); if (!column_names.empty()) storage.check(data_part->columns, column_names); pre_columns = storage.getColumnsList().addTypes(pre_column_names); columns = storage.getColumnsList().addTypes(column_names); } else { pre_columns = data_part->columns.addTypes(pre_column_names); columns = data_part->columns.addTypes(column_names); } /** @note you could simply swap `reverse` in if and else branches of MergeTreeDataSelectExecutor, * and remove this reverse. */ MarkRanges remaining_mark_ranges = all_mark_ranges; std::reverse(remaining_mark_ranges.begin(), remaining_mark_ranges.end()); auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr : std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, data_part->storage.getSampleBlock()); task = std::make_unique<MergeTreeReadTask>(data_part, remaining_mark_ranges, part_index_in_query, ordered_names, column_name_set, columns, pre_columns, remove_prewhere_column, should_reorder, std::move(size_predictor)); if (!reader) { if (use_uncompressed_cache) owned_uncompressed_cache = storage.context.getUncompressedCache(); owned_mark_cache = storage.context.getMarkCache(); reader = std::make_unique<MergeTreeReader>( path, data_part, columns, owned_uncompressed_cache.get(), owned_mark_cache.get(), save_marks_in_cache, storage, all_mark_ranges, min_bytes_to_use_direct_io, max_read_buffer_size); if (prewhere_actions) pre_reader = std::make_unique<MergeTreeReader>( path, data_part, pre_columns, owned_uncompressed_cache.get(), owned_mark_cache.get(), save_marks_in_cache, storage, all_mark_ranges, min_bytes_to_use_direct_io, max_read_buffer_size); } return true; } catch (...) { /// Suspicion of the broken part. A part is added to the queue for verification. if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) storage.reportBrokenPart(data_part->name); throw; }