void DataTypeArray::serializeOffsets(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { const ColumnArray & column_array = typeid_cast<const ColumnArray &>(column); const ColumnArray::Offsets_t & offsets = column_array.getOffsets(); size_t size = offsets.size(); if (!size) return; size_t end = limit && (offset + limit < size) ? offset + limit : size; if (offset == 0) { writeIntBinary(offsets[0], ostr); ++offset; } for (size_t i = offset; i < end; ++i) writeIntBinary(offsets[i] - offsets[i - 1], ostr); }
void IMergedBlockOutputStream::writeData( const String & name, const IDataType & type, const IColumn & column, OffsetColumns & offset_columns, bool skip_offsets) { size_t size = column.size(); size_t prev_mark = 0; while (prev_mark < size) { size_t limit = 0; /// If there is `index_offset`, then the first mark goes not immediately, but after this number of rows. if (prev_mark == 0 && index_offset != 0) limit = index_offset; else { limit = storage.index_granularity; /// Write marks. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return; ColumnStream & stream = *column_streams[stream_name]; /// There could already be enough data to compress into the new block. if (stream.compressed.offset() >= min_compress_block_size) stream.compressed.next(); writeIntBinary(stream.plain_hashing.count(), stream.marks); writeIntBinary(stream.compressed.offset(), stream.marks); }, {}); } IDataType::OutputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & substream_path) -> WriteBuffer * { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return nullptr; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return nullptr; return &column_streams[stream_name]->compressed; }; type.serializeBinaryBulkWithMultipleStreams(column, stream_getter, prev_mark, limit, true, {}); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return; column_streams[stream_name]->compressed.nextIfAtEnd(); }, {}); prev_mark += limit; } /// Memoize offsets for Nested types, that are already written. They will not be written again for next columns of Nested structure. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets) { String stream_name = IDataType::getFileNameForStream(name, substream_path); offset_columns.insert(stream_name); } }, {}); }