void IMergedBlockOutputStream::addStreams( const String & path, const String & name, const IDataType & type, size_t estimated_size, bool skip_offsets) { IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path) { if (skip_offsets && !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Shared offsets for Nested type. if (column_streams.count(stream_name)) return; column_streams[stream_name] = std::make_unique<ColumnStream>( stream_name, path + stream_name, DATA_FILE_EXTENSION, path + stream_name, MARKS_FILE_EXTENSION, max_compress_block_size, compression_settings, estimated_size, aio_threshold); }; type.enumerateStreams(callback, {}); }
void MergeTreeReader::addStreams(const String & name, const IDataType & type, const MarkRanges & all_mark_ranges, const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) { IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path) { String stream_name = IDataType::getFileNameForStream(name, substream_path); if (streams.count(stream_name)) return; bool data_file_exists = Poco::File(path + stream_name + DATA_FILE_EXTENSION).exists(); /** If data file is missing then we will not try to open it. * It is necessary since it allows to add new column to structure of the table without creating new files for old parts. */ if (!data_file_exists) return; streams.emplace(stream_name, std::make_unique<Stream>( path + stream_name, DATA_FILE_EXTENSION, data_part->marks_count, all_mark_ranges, mark_cache, save_marks_in_cache, uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type)); }; type.enumerateStreams(callback, {}); }
void StorageTinyLog::addFiles(const String & column_name, const IDataType & type) { if (files.end() != files.find(column_name)) throw Exception("Duplicate column with name " + column_name + " in constructor of StorageTinyLog.", ErrorCodes::DUPLICATE_COLUMN); IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path) { String stream_name = IDataType::getFileNameForStream(column_name, substream_path); if (!files.count(stream_name)) { ColumnData column_data; files.insert(std::make_pair(stream_name, column_data)); files[stream_name].data_file = Poco::File( path + escapeForFileName(name) + '/' + stream_name + DBMS_STORAGE_LOG_DATA_FILE_EXTENSION); } }; IDataType::SubstreamPath substream_path; type.enumerateStreams(stream_callback, substream_path); }
void IMergedBlockOutputStream::writeData( const String & name, const IDataType & type, const IColumn & column, OffsetColumns & offset_columns, bool skip_offsets) { size_t size = column.size(); size_t prev_mark = 0; while (prev_mark < size) { size_t limit = 0; /// If there is `index_offset`, then the first mark goes not immediately, but after this number of rows. if (prev_mark == 0 && index_offset != 0) limit = index_offset; else { limit = storage.index_granularity; /// Write marks. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return; ColumnStream & stream = *column_streams[stream_name]; /// There could already be enough data to compress into the new block. if (stream.compressed.offset() >= min_compress_block_size) stream.compressed.next(); writeIntBinary(stream.plain_hashing.count(), stream.marks); writeIntBinary(stream.compressed.offset(), stream.marks); }, {}); } IDataType::OutputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & substream_path) -> WriteBuffer * { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return nullptr; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return nullptr; return &column_streams[stream_name]->compressed; }; type.serializeBinaryBulkWithMultipleStreams(column, stream_getter, prev_mark, limit, true, {}); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return; column_streams[stream_name]->compressed.nextIfAtEnd(); }, {}); prev_mark += limit; } /// Memoize offsets for Nested types, that are already written. They will not be written again for next columns of Nested structure. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets) { String stream_name = IDataType::getFileNameForStream(name, substream_path); offset_columns.insert(stream_name); } }, {}); }