void TinyLogBlockOutputStream::writeData(const String & name, const IDataType & type, const IColumn & column, WrittenStreams & written_streams) { IDataType::SerializeBinaryBulkSettings settings; settings.getter = createStreamGetter(name, written_streams); if (serialize_states.count(name) == 0) type.serializeBinaryBulkStatePrefix(settings, serialize_states[name]); type.serializeBinaryBulkWithMultipleStreams(column, 0, 0, settings, serialize_states[name]); }
void IMergedBlockOutputStream::addStreams( const String & path, const String & name, const IDataType & type, size_t estimated_size, bool skip_offsets) { IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path) { if (skip_offsets && !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Shared offsets for Nested type. if (column_streams.count(stream_name)) return; column_streams[stream_name] = std::make_unique<ColumnStream>( stream_name, path + stream_name, DATA_FILE_EXTENSION, path + stream_name, MARKS_FILE_EXTENSION, max_compress_block_size, compression_settings, estimated_size, aio_threshold); }; type.enumerateStreams(callback, {}); }
void MergeTreeReader::readData( const String & name, const IDataType & type, IColumn & column, size_t from_mark, bool continue_reading, size_t max_rows_to_read, bool with_offsets) { IDataType::InputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & path) -> ReadBuffer * { /// If offsets for arrays have already been read. if (!with_offsets && !path.empty() && path.back().type == IDataType::Substream::ArraySizes) return nullptr; String stream_name = IDataType::getFileNameForStream(name, path); auto it = streams.find(stream_name); if (it == streams.end()) return nullptr; Stream & stream = *it->second; if (!continue_reading) stream.seekToMark(from_mark); return stream.data_buffer; }; double & avg_value_size_hint = avg_value_size_hints[name]; type.deserializeBinaryBulkWithMultipleStreams(column, stream_getter, max_rows_to_read, avg_value_size_hint, true, {}); IDataType::updateAvgValueSizeHint(column, avg_value_size_hint); }
void MergeTreeReader::addStreams(const String & name, const IDataType & type, const MarkRanges & all_mark_ranges, const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type) { IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path) { String stream_name = IDataType::getFileNameForStream(name, substream_path); if (streams.count(stream_name)) return; bool data_file_exists = Poco::File(path + stream_name + DATA_FILE_EXTENSION).exists(); /** If data file is missing then we will not try to open it. * It is necessary since it allows to add new column to structure of the table without creating new files for old parts. */ if (!data_file_exists) return; streams.emplace(stream_name, std::make_unique<Stream>( path + stream_name, DATA_FILE_EXTENSION, data_part->marks_count, all_mark_ranges, mark_cache, save_marks_in_cache, uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type)); }; type.enumerateStreams(callback, {}); }
void NativeBlockInputStream::readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows) { /** Для массивов требуется сначала десериализовать смещения, а потом значения. */ if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(&type)) { IColumn & offsets_column = *typeid_cast<ColumnArray &>(column).getOffsetsColumn(); type_arr->getOffsetsType()->deserializeBinary(offsets_column, istr, rows, 0); if (offsets_column.size() != rows) throw Exception("Cannot read all data in NativeBlockInputStream.", ErrorCodes::CANNOT_READ_ALL_DATA); if (rows) readData( *type_arr->getNestedType(), typeid_cast<ColumnArray &>(column).getData(), istr, typeid_cast<const ColumnArray &>(column).getOffsets()[rows - 1]); } else type.deserializeBinary(column, istr, rows, 0); /// TODO Использовать avg_value_size_hint. if (column.size() != rows) throw Exception("Cannot read all data in NativeBlockInputStream.", ErrorCodes::CANNOT_READ_ALL_DATA); }
void JSONEachRowRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { writeString(fields[field_number], ostr); writeChar(':', ostr); type.serializeTextJSON(column, row_num, ostr); ++field_number; }
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ ColumnPtr full_column = column->convertToFullColumnIfConst(); IDataType::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; }; settings.position_independent_encoding = false; settings.low_cardinality_max_dictionary_size = 0; IDataType::SerializeBinaryBulkStatePtr state; type.serializeBinaryBulkStatePrefix(settings, state); type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); type.serializeBinaryBulkStateSuffix(settings, state); }
void JSONRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { writeCString("\t\t\t", *ostr); writeString(fields[field_number].name, *ostr); writeCString(": ", *ostr); type.serializeTextJSON(column, row_num, *ostr, force_quoting_64bit_integers); ++field_number; }
void TinyLogBlockInputStream::readData(const String & name, const IDataType & type, IColumn & column, UInt64 limit) { IDataType::DeserializeBinaryBulkSettings settings; /// TODO Use avg_value_size_hint. settings.getter = [&] (const IDataType::SubstreamPath & path) -> ReadBuffer * { String stream_name = IDataType::getFileNameForStream(name, path); if (!streams.count(stream_name)) streams[stream_name] = std::make_unique<Stream>(storage.files[stream_name].data_file.path(), max_read_buffer_size); return &streams[stream_name]->compressed; }; if (deserialize_states.count(name) == 0) type.deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]); type.deserializeBinaryBulkWithMultipleStreams(column, limit, settings, deserialize_states[name]); }
void XMLRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { writeCString("\t\t\t<", *ostr); writeString(field_tag_names[field_number], *ostr); writeCString(">", *ostr); type.serializeTextXML(column, row_num, *ostr); writeCString("</", *ostr); writeString(field_tag_names[field_number], *ostr); writeCString(">\n", *ostr); ++field_number; }
void StorageTinyLog::addFiles(const String & column_name, const IDataType & type) { if (files.end() != files.find(column_name)) throw Exception("Duplicate column with name " + column_name + " in constructor of StorageTinyLog.", ErrorCodes::DUPLICATE_COLUMN); IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path) { String stream_name = IDataType::getFileNameForStream(column_name, substream_path); if (!files.count(stream_name)) { ColumnData column_data; files.insert(std::make_pair(stream_name, column_data)); files[stream_name].data_file = Poco::File( path + escapeForFileName(name) + '/' + stream_name + DBMS_STORAGE_LOG_DATA_FILE_EXTENSION); } }; IDataType::SubstreamPath substream_path; type.enumerateStreams(stream_callback, substream_path); }
void BinaryRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { type.serializeBinary(column, row_num, ostr); }
void TSKVRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { writeString(fields[field_number].name, ostr); type.serializeTextEscaped(column, row_num, ostr, format_settings); ++field_number; }
void VerticalRowOutputStream::writeValue(const IColumn & column, const IDataType & type, size_t row_num) const { type.serializeText(column, row_num, ostr, format_settings); }
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, size_t offset, size_t limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ ColumnPtr full_column; if (auto converted = column->convertToFullColumnIfConst()) full_column = converted; else full_column = column; if (type.isNullable()) { const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(type); const IDataType & nested_type = *nullable_type.getNestedType(); const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*full_column.get()); const ColumnPtr & nested_col = nullable_col.getNestedColumn(); const IColumn & null_map = nullable_col.getNullMapConcreteColumn(); DataTypeUInt8{}.serializeBinaryBulk(null_map, ostr, offset, limit); writeData(nested_type, nested_col, ostr, offset, limit); } else if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(&type)) { /** For arrays, you first need to serialize the offsets, and then the values. */ const ColumnArray & column_array = typeid_cast<const ColumnArray &>(*full_column); type_arr->getOffsetsType()->serializeBinaryBulk(*column_array.getOffsetsColumn(), ostr, offset, limit); if (!typeid_cast<const ColumnArray &>(*full_column).getData().empty()) { const ColumnArray::Offsets_t & offsets = column_array.getOffsets(); if (offset > offsets.size()) return; /** offset - from which array to write. * limit - how many arrays should be written, or 0, if you write everything that is. * end - up to which array written part finishes. * * nested_offset - from which nested element to write. * nested_limit - how many nested elements to write, or 0, if you write everything that is. */ size_t end = std::min(offset + limit, offsets.size()); size_t nested_offset = offset ? offsets[offset - 1] : 0; size_t nested_limit = limit ? offsets[end - 1] - nested_offset : 0; const DataTypePtr & nested_type = type_arr->getNestedType(); DataTypePtr actual_type; if (nested_type->isNull()) { /// Special case: an array of Null is actually an array of Nullable(UInt8). actual_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); } else actual_type = nested_type; if (limit == 0 || nested_limit) writeData(*actual_type, typeid_cast<const ColumnArray &>(*full_column).getDataPtr(), ostr, nested_offset, nested_limit); } } else type.serializeBinaryBulk(*full_column, ostr, offset, limit); }
void ValuesRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { type.serializeTextQuoted(column, row_num, ostr, format_settings); }
void CSVRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { type.serializeTextCSV(column, row_num, ostr); }
void IMergedBlockOutputStream::writeData( const String & name, const IDataType & type, const IColumn & column, OffsetColumns & offset_columns, bool skip_offsets) { size_t size = column.size(); size_t prev_mark = 0; while (prev_mark < size) { size_t limit = 0; /// If there is `index_offset`, then the first mark goes not immediately, but after this number of rows. if (prev_mark == 0 && index_offset != 0) limit = index_offset; else { limit = storage.index_granularity; /// Write marks. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return; ColumnStream & stream = *column_streams[stream_name]; /// There could already be enough data to compress into the new block. if (stream.compressed.offset() >= min_compress_block_size) stream.compressed.next(); writeIntBinary(stream.plain_hashing.count(), stream.marks); writeIntBinary(stream.compressed.offset(), stream.marks); }, {}); } IDataType::OutputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & substream_path) -> WriteBuffer * { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return nullptr; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return nullptr; return &column_streams[stream_name]->compressed; }; type.serializeBinaryBulkWithMultipleStreams(column, stream_getter, prev_mark, limit, true, {}); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets && skip_offsets) return; String stream_name = IDataType::getFileNameForStream(name, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.count(stream_name)) return; column_streams[stream_name]->compressed.nextIfAtEnd(); }, {}); prev_mark += limit; } /// Memoize offsets for Nested types, that are already written. They will not be written again for next columns of Nested structure. type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes; if (is_offsets) { String stream_name = IDataType::getFileNameForStream(name, substream_path); offset_columns.insert(stream_name); } }, {}); }
void JSONCompactRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { type.serializeTextJSON(column, row_num, *ostr, settings); ++field_number; }
void TabSeparatedRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { type.serializeAsTextEscaped(column, row_num, ostr, format_settings); }
bool DataTypeNullable::equals(const IDataType & rhs) const { return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type); }