Block TinyLogBlockInputStream::readImpl() { Block res; if (finished || (!streams.empty() && streams.begin()->second->compressed.eof())) { /** Close the files (before destroying the object). * When many sources are created, but simultaneously reading only a few of them, * buffers don't waste memory. */ finished = true; streams.clear(); return res; } { /// if there are no files in the folder, it means that the table is empty if (Poco::DirectoryIterator(storage.full_path()) == Poco::DirectoryIterator()) return res; } for (const auto & name_type : columns) { MutableColumnPtr column = name_type.type->createColumn(); try { readData(name_type.name, *name_type.type, *column, block_size); } catch (Exception & e) { e.addMessage("while reading column " + name_type.name + " at " + storage.full_path()); throw; } if (column->size()) res.insert(ColumnWithTypeAndName(std::move(column), name_type.type, name_type.name)); } if (!res || streams.begin()->second->compressed.eof()) { finished = true; streams.clear(); } return Nested::flatten(res); }
size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res) { size_t read_rows = 0; try { /// Pointers to offset columns that are common to the nested data structure columns. /// If append is true, then the value will be equal to nullptr and will be used only to /// check that the offsets column has been already read. OffsetColumns offset_columns; for (const NameAndTypePair & it : columns) { /// The column is already present in the block so we will append the values to the end. bool append = res.has(it.name); if (!append) res.insert(ColumnWithTypeAndName(it.type->createColumn(), it.type, it.name)); /// To keep offsets shared. TODO Very dangerous. Get rid of this. MutableColumnPtr column = res.getByName(it.name).column->assumeMutable(); bool read_offsets = true; /// For nested data structures collect pointers to offset columns. if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(it.type.get())) { String name = Nested::extractTableName(it.name); auto it_inserted = offset_columns.emplace(name, nullptr); /// offsets have already been read on the previous iteration and we don't need to read it again if (!it_inserted.second) read_offsets = false; /// need to create new offsets if (it_inserted.second && !append) it_inserted.first->second = ColumnArray::ColumnOffsets::create(); /// share offsets in all elements of nested structure if (!append) column = ColumnArray::create(type_arr->getNestedType()->createColumn(), it_inserted.first->second); } try { size_t column_size_before_reading = column->size(); readData(it.name, *it.type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets); /// For elements of Nested, column_size_before_reading may be greater than column size /// if offsets are not empty and were already read, but elements are empty. if (column->size()) read_rows = std::max(read_rows, column->size() - column_size_before_reading); } catch (Exception & e) { /// Better diagnostics. e.addMessage("(while reading column " + it.name + ")"); throw; } if (column->size()) res.getByName(it.name).column = std::move(column); else res.erase(it.name); } /// NOTE: positions for all streams must be kept in sync. In particular, even if for some streams there are no rows to be read, /// you must ensure that no seeks are skipped and at this point they all point to to_mark. } catch (Exception & e) { if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) storage.reportBrokenPart(data_part->name); /// Better diagnostics. e.addMessage("(while reading from part " + path + " from mark " + toString(from_mark) + " with max_rows_to_read = " + toString(max_rows_to_read) + ")"); throw; } catch (...) { storage.reportBrokenPart(data_part->name); throw; } return read_rows; }