MergeSortingBlockInputStream::MergeSortingBlockInputStream( const BlockInputStreamPtr & input, SortDescription & description_, size_t max_merged_block_size_, size_t limit_, size_t max_bytes_before_remerge_, size_t max_bytes_before_external_sort_, const std::string & tmp_path_) : description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_), max_bytes_before_remerge(max_bytes_before_remerge_), max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_path(tmp_path_) { children.push_back(input); header = children.at(0)->getHeader(); header_without_constants = header; removeConstantsFromBlock(header_without_constants); removeConstantsFromSortDescription(header, description); }
Block MergeSortingBlockInputStream::readImpl() { /** Algorithm: * - read to memory blocks from source stream; * - if too much of them and if external sorting is enabled, * - merge all blocks to sorted stream and write it to temporary file; * - at the end, merge all sorted streams from temporary files and also from rest of blocks in memory. */ /// If has not read source blocks. if (!impl) { while (Block block = children.back()->read()) { if (!sample_block) { sample_block = block.cloneEmpty(); removeConstantsFromSortDescription(sample_block, description); } /// If there were only const columns in sort description, then there is no need to sort. /// Return the blocks as is. if (description.empty()) return block; removeConstantsFromBlock(block); blocks.push_back(block); sum_bytes_in_blocks += block.bytes(); /** If too much of them and if external sorting is enabled, * will merge blocks that we have in memory at this moment and write merged stream to temporary (compressed) file. * NOTE. It's possible to check free space in filesystem. */ if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort) { temporary_files.emplace_back(new Poco::TemporaryFile(tmp_path)); const std::string & path = temporary_files.back()->path(); WriteBufferFromFile file_buf(path); CompressedWriteBuffer compressed_buf(file_buf); NativeBlockOutputStream block_out(compressed_buf); MergeSortingBlocksBlockInputStream block_in(blocks, description, max_merged_block_size, limit); LOG_INFO(log, "Sorting and writing part of data into temporary file " + path); ProfileEvents::increment(ProfileEvents::ExternalSortWritePart); copyData(block_in, block_out, &is_cancelled); /// NOTE. Possibly limit disk usage. LOG_INFO(log, "Done writing part of data into temporary file " + path); blocks.clear(); sum_bytes_in_blocks = 0; } } if ((blocks.empty() && temporary_files.empty()) || isCancelled()) return Block(); if (temporary_files.empty()) { impl = std::make_unique<MergeSortingBlocksBlockInputStream>(blocks, description, max_merged_block_size, limit); } else { /// If there was temporary files. ProfileEvents::increment(ProfileEvents::ExternalSortMerge); LOG_INFO(log, "There are " << temporary_files.size() << " temporary sorted parts to merge."); /// Create sorted streams to merge. for (const auto & file : temporary_files) { temporary_inputs.emplace_back(std::make_unique<TemporaryFileStream>(file->path())); inputs_to_merge.emplace_back(temporary_inputs.back()->block_in); } /// Rest of blocks in memory. if (!blocks.empty()) inputs_to_merge.emplace_back(std::make_shared<MergeSortingBlocksBlockInputStream>(blocks, description, max_merged_block_size, limit)); /// Will merge that sorted streams. impl = std::make_unique<MergingSortedBlockInputStream>(inputs_to_merge, description, max_merged_block_size, limit); } } Block res = impl->read(); if (res) enrichBlockWithConstants(res, sample_block); return res; }