MergeTreeRangeReader MergeTreeReader::readRange(size_t from_mark, size_t to_mark) { return MergeTreeRangeReader(*this, from_mark, to_mark, storage.index_granularity); }
Block MergeTreeBaseSelectBlockInputStream::readFromPart() { if (task->size_predictor) task->size_predictor->startBlock(); const auto current_max_block_size_rows = max_block_size_rows; const auto current_preferred_block_size_bytes = preferred_block_size_bytes; const auto current_preferred_max_column_in_block_size_bytes = preferred_max_column_in_block_size_bytes; const auto & index_granularity = task->data_part->index_granularity; const double min_filtration_ratio = 0.00001; auto estimateNumRows = [current_preferred_block_size_bytes, current_max_block_size_rows, index_granularity, current_preferred_max_column_in_block_size_bytes, min_filtration_ratio]( MergeTreeReadTask & current_task, MergeTreeRangeReader & current_reader) { if (!current_task.size_predictor) return static_cast<size_t>(current_max_block_size_rows); /// Calculates number of rows will be read using preferred_block_size_bytes. /// Can't be less than avg_index_granularity. size_t rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes); if (!rows_to_read) return rows_to_read; auto total_row_in_current_granule = current_reader.numRowsInCurrentGranule(); rows_to_read = std::max(total_row_in_current_granule, rows_to_read); if (current_preferred_max_column_in_block_size_bytes) { /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes. auto rows_to_read_for_max_size_column = current_task.size_predictor->estimateNumRowsForMaxSizeColumn(current_preferred_max_column_in_block_size_bytes); double filtration_ratio = std::max(min_filtration_ratio, 1.0 - current_task.size_predictor->filtered_rows_ratio); auto rows_to_read_for_max_size_column_with_filtration = static_cast<size_t>(rows_to_read_for_max_size_column / filtration_ratio); /// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity. rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration); } auto unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule(); if (unread_rows_in_current_granule >= rows_to_read) return rows_to_read; return index_granularity.countMarksForRows(current_reader.currentMark(), rows_to_read, current_reader.numReadRowsInCurrentGranule()); }; if (!task->range_reader.isInitialized()) { if (prewhere_info) { if (reader->getColumns().empty()) { task->range_reader = MergeTreeRangeReader( pre_reader.get(), nullptr, prewhere_info->alias_actions, prewhere_info->prewhere_actions, &prewhere_info->prewhere_column_name, &task->ordered_names, task->should_reorder, task->remove_prewhere_column, true); } else { MergeTreeRangeReader * pre_reader_ptr = nullptr; if (pre_reader != nullptr) { task->pre_range_reader = MergeTreeRangeReader( pre_reader.get(), nullptr, prewhere_info->alias_actions, prewhere_info->prewhere_actions, &prewhere_info->prewhere_column_name, &task->ordered_names, task->should_reorder, task->remove_prewhere_column, false); pre_reader_ptr = &task->pre_range_reader; } task->range_reader = MergeTreeRangeReader( reader.get(), pre_reader_ptr, nullptr, nullptr, nullptr, &task->ordered_names, true, false, true); } } else { task->range_reader = MergeTreeRangeReader( reader.get(), nullptr, nullptr, nullptr, nullptr, &task->ordered_names, task->should_reorder, false, true); } } UInt64 recommended_rows = estimateNumRows(*task, task->range_reader); UInt64 rows_to_read = std::max(UInt64(1), std::min(current_max_block_size_rows, recommended_rows)); auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges); /// All rows were filtered. Repeat. if (read_result.block.rows() == 0) read_result.block.clear(); UInt64 num_filtered_rows = read_result.numReadRows() - read_result.block.rows(); progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); if (task->size_predictor) { task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows); if (read_result.block) task->size_predictor->update(read_result.block); } if (read_result.block && prewhere_info && !task->remove_prewhere_column) { /// Convert const column to full here because it's cheaper to filter const column than full. auto & column = read_result.block.getByName(prewhere_info->prewhere_column_name); column.column = column.column->convertToFullColumnIfConst(); } read_result.block.checkNumberOfRows(); return read_result.block; }