Block MergeTreeBaseBlockInputStream::readFromPart() { Block res; if (task->size_predictor) task->size_predictor->startBlock(); if (prewhere_actions) { do { /// Let's read the full block of columns needed to calculate the expression in PREWHERE. size_t space_left = std::max(1LU, max_block_size_marks); MarkRanges ranges_to_read; if (task->size_predictor) { /// FIXME: size prediction model is updated by filtered rows, but it predicts size of unfiltered rows also size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity); if (res && recommended_marks < 1) break; space_left = std::min(space_left, std::max(1LU, recommended_marks)); } while (!task->mark_ranges.empty() && space_left && !isCancelled()) { auto & range = task->mark_ranges.back(); size_t marks_to_read = std::min(range.end - range.begin, space_left); pre_reader->readRange(range.begin, range.begin + marks_to_read, res); ranges_to_read.emplace_back(range.begin, range.begin + marks_to_read); space_left -= marks_to_read; range.begin += marks_to_read; if (range.begin == range.end) task->mark_ranges.pop_back(); } /// In case of isCancelled. if (!res) return res; progressImpl({ res.rows(), res.bytes() }); pre_reader->fillMissingColumns(res, task->ordered_names, task->should_reorder); /// Compute the expression in PREWHERE. prewhere_actions->execute(res); ColumnPtr column = res.getByName(prewhere_column).column; if (task->remove_prewhere_column) res.erase(prewhere_column); const auto pre_bytes = res.bytes(); ColumnPtr observed_column; if (column->isNullable()) { ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*column); observed_column = nullable_col.getNestedColumn(); } else observed_column = column; /** If the filter is a constant (for example, it says PREWHERE 1), * then either return an empty block, or return the block unchanged. */ if (const auto column_const = typeid_cast<const ColumnConstUInt8 *>(observed_column.get())) { if (!column_const->getData()) { res.clear(); return res; } for (const auto & range : ranges_to_read) reader->readRange(range.begin, range.end, res); progressImpl({ 0, res.bytes() - pre_bytes }); } else if (const auto column_vec = typeid_cast<const ColumnUInt8 *>(observed_column.get())) { size_t index_granularity = storage.index_granularity; const auto & pre_filter = column_vec->getData(); IColumn::Filter post_filter(pre_filter.size()); /// Let's read the rest of the columns in the required segments and compose our own filter for them. size_t pre_filter_pos = 0; size_t post_filter_pos = 0; for (const auto & range : ranges_to_read) { auto begin = range.begin; auto pre_filter_begin_pos = pre_filter_pos; for (auto mark = range.begin; mark <= range.end; ++mark) { UInt8 nonzero = 0; if (mark != range.end) { const size_t limit = std::min(pre_filter.size(), pre_filter_pos + index_granularity); for (size_t row = pre_filter_pos; row < limit; ++row) nonzero |= pre_filter[row]; } if (!nonzero) { if (mark > begin) { memcpy( &post_filter[post_filter_pos], &pre_filter[pre_filter_begin_pos], pre_filter_pos - pre_filter_begin_pos); post_filter_pos += pre_filter_pos - pre_filter_begin_pos; reader->readRange(begin, mark, res); } begin = mark + 1; pre_filter_begin_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size()); } if (mark < range.end) pre_filter_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size()); } } if (!post_filter_pos) { res.clear(); continue; } progressImpl({ 0, res.bytes() - pre_bytes }); post_filter.resize(post_filter_pos); /// Filter the columns related to PREWHERE using pre_filter, /// other columns - using post_filter. size_t rows = 0; for (const auto i : ext::range(0, res.columns())) { auto & col = res.safeGetByPosition(i); if (col.name == prewhere_column && res.columns() > 1) continue; col.column = col.column->filter(task->column_name_set.count(col.name) ? post_filter : pre_filter, -1); rows = col.column->size(); } /// Replace column with condition value from PREWHERE to a constant. if (!task->remove_prewhere_column) res.getByName(prewhere_column).column = std::make_shared<ColumnConstUInt8>(rows, 1); } else throw Exception{ "Illegal type " + column->getName() + " of column for filter. Must be ColumnUInt8 or ColumnConstUInt8.", ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }; if (res) { if (task->size_predictor) task->size_predictor->update(res); reader->fillMissingColumnsAndReorder(res, task->ordered_names); } } while (!task->mark_ranges.empty() && !res && !isCancelled()); } else { size_t space_left = std::max(1LU, max_block_size_marks); while (!task->mark_ranges.empty() && space_left && !isCancelled()) { auto & range = task->mark_ranges.back(); size_t marks_to_read = std::min(range.end - range.begin, space_left); if (task->size_predictor) { size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity); if (res && recommended_marks < 1) break; marks_to_read = std::min(marks_to_read, std::max(1LU, recommended_marks)); } reader->readRange(range.begin, range.begin + marks_to_read, res); if (task->size_predictor) task->size_predictor->update(res); space_left -= marks_to_read; range.begin += marks_to_read; if (range.begin == range.end) task->mark_ranges.pop_back(); } /// In the case of isCancelled. if (!res) return res; progressImpl({ res.rows(), res.bytes() }); reader->fillMissingColumns(res, task->ordered_names, task->should_reorder); } return res; }
Block MergeTreeBaseSelectBlockInputStream::readFromPart() { if (task->size_predictor) task->size_predictor->startBlock(); const auto current_max_block_size_rows = max_block_size_rows; const auto current_preferred_block_size_bytes = preferred_block_size_bytes; const auto current_preferred_max_column_in_block_size_bytes = preferred_max_column_in_block_size_bytes; const auto & index_granularity = task->data_part->index_granularity; const double min_filtration_ratio = 0.00001; auto estimateNumRows = [current_preferred_block_size_bytes, current_max_block_size_rows, index_granularity, current_preferred_max_column_in_block_size_bytes, min_filtration_ratio]( MergeTreeReadTask & current_task, MergeTreeRangeReader & current_reader) { if (!current_task.size_predictor) return static_cast<size_t>(current_max_block_size_rows); /// Calculates number of rows will be read using preferred_block_size_bytes. /// Can't be less than avg_index_granularity. size_t rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes); if (!rows_to_read) return rows_to_read; auto total_row_in_current_granule = current_reader.numRowsInCurrentGranule(); rows_to_read = std::max(total_row_in_current_granule, rows_to_read); if (current_preferred_max_column_in_block_size_bytes) { /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes. auto rows_to_read_for_max_size_column = current_task.size_predictor->estimateNumRowsForMaxSizeColumn(current_preferred_max_column_in_block_size_bytes); double filtration_ratio = std::max(min_filtration_ratio, 1.0 - current_task.size_predictor->filtered_rows_ratio); auto rows_to_read_for_max_size_column_with_filtration = static_cast<size_t>(rows_to_read_for_max_size_column / filtration_ratio); /// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than current_index_granularity. rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration); } auto unread_rows_in_current_granule = current_reader.numPendingRowsInCurrentGranule(); if (unread_rows_in_current_granule >= rows_to_read) return rows_to_read; return index_granularity.countMarksForRows(current_reader.currentMark(), rows_to_read, current_reader.numReadRowsInCurrentGranule()); }; if (!task->range_reader.isInitialized()) { if (prewhere_info) { if (reader->getColumns().empty()) { task->range_reader = MergeTreeRangeReader( pre_reader.get(), nullptr, prewhere_info->alias_actions, prewhere_info->prewhere_actions, &prewhere_info->prewhere_column_name, &task->ordered_names, task->should_reorder, task->remove_prewhere_column, true); } else { MergeTreeRangeReader * pre_reader_ptr = nullptr; if (pre_reader != nullptr) { task->pre_range_reader = MergeTreeRangeReader( pre_reader.get(), nullptr, prewhere_info->alias_actions, prewhere_info->prewhere_actions, &prewhere_info->prewhere_column_name, &task->ordered_names, task->should_reorder, task->remove_prewhere_column, false); pre_reader_ptr = &task->pre_range_reader; } task->range_reader = MergeTreeRangeReader( reader.get(), pre_reader_ptr, nullptr, nullptr, nullptr, &task->ordered_names, true, false, true); } } else { task->range_reader = MergeTreeRangeReader( reader.get(), nullptr, nullptr, nullptr, nullptr, &task->ordered_names, task->should_reorder, false, true); } } UInt64 recommended_rows = estimateNumRows(*task, task->range_reader); UInt64 rows_to_read = std::max(UInt64(1), std::min(current_max_block_size_rows, recommended_rows)); auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges); /// All rows were filtered. Repeat. if (read_result.block.rows() == 0) read_result.block.clear(); UInt64 num_filtered_rows = read_result.numReadRows() - read_result.block.rows(); progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); if (task->size_predictor) { task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows); if (read_result.block) task->size_predictor->update(read_result.block); } if (read_result.block && prewhere_info && !task->remove_prewhere_column) { /// Convert const column to full here because it's cheaper to filter const column than full. auto & column = read_result.block.getByName(prewhere_info->prewhere_column_name); column.column = column.column->convertToFullColumnIfConst(); } read_result.block.checkNumberOfRows(); return read_result.block; }