Пример #1
0
void TinyLogBlockOutputStream::writeData(const String & name, const IDataType & type, const IColumn & column, WrittenStreams & written_streams)
{
    IDataType::SerializeBinaryBulkSettings settings;
    settings.getter = createStreamGetter(name, written_streams);

    if (serialize_states.count(name) == 0)
        type.serializeBinaryBulkStatePrefix(settings, serialize_states[name]);

    type.serializeBinaryBulkWithMultipleStreams(column, 0, 0, settings, serialize_states[name]);
}
void IMergedBlockOutputStream::addStreams(
    const String & path,
    const String & name,
    const IDataType & type,
    size_t estimated_size,
    bool skip_offsets)
{
    IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path)
    {
        if (skip_offsets && !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes)
            return;

        String stream_name = IDataType::getFileNameForStream(name, substream_path);

        /// Shared offsets for Nested type.
        if (column_streams.count(stream_name))
            return;

        column_streams[stream_name] = std::make_unique<ColumnStream>(
            stream_name,
            path + stream_name, DATA_FILE_EXTENSION,
            path + stream_name, MARKS_FILE_EXTENSION,
            max_compress_block_size,
            compression_settings,
            estimated_size,
            aio_threshold);
    };

    type.enumerateStreams(callback, {});
}
Пример #3
0
void MergeTreeReader::readData(
    const String & name, const IDataType & type, IColumn & column,
    size_t from_mark, bool continue_reading, size_t max_rows_to_read,
    bool with_offsets)
{
    IDataType::InputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & path) -> ReadBuffer *
    {
        /// If offsets for arrays have already been read.
        if (!with_offsets && !path.empty() && path.back().type == IDataType::Substream::ArraySizes)
            return nullptr;

        String stream_name = IDataType::getFileNameForStream(name, path);

        auto it = streams.find(stream_name);
        if (it == streams.end())
            return nullptr;

        Stream & stream = *it->second;

        if (!continue_reading)
            stream.seekToMark(from_mark);

        return stream.data_buffer;
    };

    double & avg_value_size_hint = avg_value_size_hints[name];
    type.deserializeBinaryBulkWithMultipleStreams(column, stream_getter, max_rows_to_read, avg_value_size_hint, true, {});
    IDataType::updateAvgValueSizeHint(column, avg_value_size_hint);
}
Пример #4
0
void MergeTreeReader::addStreams(const String & name, const IDataType & type, const MarkRanges & all_mark_ranges,
    const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
{
    IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path)
    {
        String stream_name = IDataType::getFileNameForStream(name, substream_path);

        if (streams.count(stream_name))
            return;

        bool data_file_exists = Poco::File(path + stream_name + DATA_FILE_EXTENSION).exists();

        /** If data file is missing then we will not try to open it.
          * It is necessary since it allows to add new column to structure of the table without creating new files for old parts.
          */
        if (!data_file_exists)
            return;

        streams.emplace(stream_name, std::make_unique<Stream>(
            path + stream_name, DATA_FILE_EXTENSION, data_part->marks_count,
            all_mark_ranges, mark_cache, save_marks_in_cache,
            uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type));
    };

    type.enumerateStreams(callback, {});
}
void NativeBlockInputStream::readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows)
{
	/** Для массивов требуется сначала десериализовать смещения, а потом значения.
	  */
	if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(&type))
	{
		IColumn & offsets_column = *typeid_cast<ColumnArray &>(column).getOffsetsColumn();
		type_arr->getOffsetsType()->deserializeBinary(offsets_column, istr, rows, 0);

		if (offsets_column.size() != rows)
			throw Exception("Cannot read all data in NativeBlockInputStream.", ErrorCodes::CANNOT_READ_ALL_DATA);

		if (rows)
			readData(
				*type_arr->getNestedType(),
				typeid_cast<ColumnArray &>(column).getData(),
				istr,
				typeid_cast<const ColumnArray &>(column).getOffsets()[rows - 1]);
	}
	else
		type.deserializeBinary(column, istr, rows, 0);	/// TODO Использовать avg_value_size_hint.

	if (column.size() != rows)
		throw Exception("Cannot read all data in NativeBlockInputStream.", ErrorCodes::CANNOT_READ_ALL_DATA);
}
void JSONEachRowRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
	writeString(fields[field_number], ostr);
	writeChar(':', ostr);
	type.serializeTextJSON(column, row_num, ostr);
	++field_number;
}
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit)
{
    /** If there are columns-constants - then we materialize them.
      * (Since the data type does not know how to serialize / deserialize constants.)
      */
    ColumnPtr full_column = column->convertToFullColumnIfConst();

    IDataType::SerializeBinaryBulkSettings settings;
    settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; };
    settings.position_independent_encoding = false;
    settings.low_cardinality_max_dictionary_size = 0;

    IDataType::SerializeBinaryBulkStatePtr state;
    type.serializeBinaryBulkStatePrefix(settings, state);
    type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
    type.serializeBinaryBulkStateSuffix(settings, state);
}
Пример #8
0
void JSONRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
    writeCString("\t\t\t", *ostr);
    writeString(fields[field_number].name, *ostr);
    writeCString(": ", *ostr);
    type.serializeTextJSON(column, row_num, *ostr, force_quoting_64bit_integers);
    ++field_number;
}
Пример #9
0
void TinyLogBlockInputStream::readData(const String & name, const IDataType & type, IColumn & column, UInt64 limit)
{
    IDataType::DeserializeBinaryBulkSettings settings; /// TODO Use avg_value_size_hint.
    settings.getter = [&] (const IDataType::SubstreamPath & path) -> ReadBuffer *
    {
        String stream_name = IDataType::getFileNameForStream(name, path);

        if (!streams.count(stream_name))
            streams[stream_name] = std::make_unique<Stream>(storage.files[stream_name].data_file.path(), max_read_buffer_size);

        return &streams[stream_name]->compressed;
    };

    if (deserialize_states.count(name) == 0)
         type.deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]);

    type.deserializeBinaryBulkWithMultipleStreams(column, limit, settings, deserialize_states[name]);
}
Пример #10
0
void XMLRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
	writeCString("\t\t\t<", *ostr);
	writeString(field_tag_names[field_number], *ostr);
	writeCString(">", *ostr);
	type.serializeTextXML(column, row_num, *ostr);
	writeCString("</", *ostr);
	writeString(field_tag_names[field_number], *ostr);
	writeCString(">\n", *ostr);
	++field_number;
}
Пример #11
0
void StorageTinyLog::addFiles(const String & column_name, const IDataType & type)
{
    if (files.end() != files.find(column_name))
        throw Exception("Duplicate column with name " + column_name + " in constructor of StorageTinyLog.",
            ErrorCodes::DUPLICATE_COLUMN);

    IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path)
    {
        String stream_name = IDataType::getFileNameForStream(column_name, substream_path);
        if (!files.count(stream_name))
        {
            ColumnData column_data;
            files.insert(std::make_pair(stream_name, column_data));
            files[stream_name].data_file = Poco::File(
                path + escapeForFileName(name) + '/' + stream_name + DBMS_STORAGE_LOG_DATA_FILE_EXTENSION);
        }
    };

    IDataType::SubstreamPath substream_path;
    type.enumerateStreams(stream_callback, substream_path);
}
Пример #12
0
void BinaryRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
    type.serializeBinary(column, row_num, ostr);
}
Пример #13
0
void TSKVRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
    writeString(fields[field_number].name, ostr);
    type.serializeTextEscaped(column, row_num, ostr, format_settings);
    ++field_number;
}
Пример #14
0
void VerticalRowOutputStream::writeValue(const IColumn & column, const IDataType & type, size_t row_num) const
{
    type.serializeText(column, row_num, ostr, format_settings);
}
Пример #15
0
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, size_t offset, size_t limit)
{
    /** If there are columns-constants - then we materialize them.
      * (Since the data type does not know how to serialize / deserialize constants.)
      */
    ColumnPtr full_column;

    if (auto converted = column->convertToFullColumnIfConst())
        full_column = converted;
    else
        full_column = column;

    if (type.isNullable())
    {
        const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(type);
        const IDataType & nested_type = *nullable_type.getNestedType();

        const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*full_column.get());
        const ColumnPtr & nested_col = nullable_col.getNestedColumn();

        const IColumn & null_map = nullable_col.getNullMapConcreteColumn();
        DataTypeUInt8{}.serializeBinaryBulk(null_map, ostr, offset, limit);

        writeData(nested_type, nested_col, ostr, offset, limit);
    }
    else if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(&type))
    {
        /** For arrays, you first need to serialize the offsets, and then the values.
          */
        const ColumnArray & column_array = typeid_cast<const ColumnArray &>(*full_column);
        type_arr->getOffsetsType()->serializeBinaryBulk(*column_array.getOffsetsColumn(), ostr, offset, limit);

        if (!typeid_cast<const ColumnArray &>(*full_column).getData().empty())
        {
            const ColumnArray::Offsets_t & offsets = column_array.getOffsets();

            if (offset > offsets.size())
                return;

            /** offset - from which array to write.
              * limit - how many arrays should be written, or 0, if you write everything that is.
              * end - up to which array written part finishes.
              *
              * nested_offset - from which nested element to write.
              * nested_limit - how many nested elements to write, or 0, if you write everything that is.
              */

            size_t end = std::min(offset + limit, offsets.size());

            size_t nested_offset = offset ? offsets[offset - 1] : 0;
            size_t nested_limit = limit
                ? offsets[end - 1] - nested_offset
                : 0;

            const DataTypePtr & nested_type = type_arr->getNestedType();

            DataTypePtr actual_type;
            if (nested_type->isNull())
            {
                /// Special case: an array of Null is actually an array of Nullable(UInt8).
                actual_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
            }
            else
                actual_type = nested_type;

            if (limit == 0 || nested_limit)
                writeData(*actual_type, typeid_cast<const ColumnArray &>(*full_column).getDataPtr(), ostr, nested_offset, nested_limit);
        }
    }
    else
        type.serializeBinaryBulk(*full_column, ostr, offset, limit);
}
Пример #16
0
void ValuesRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
    type.serializeTextQuoted(column, row_num, ostr, format_settings);
}
Пример #17
0
void CSVRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
	type.serializeTextCSV(column, row_num, ostr);
}
Пример #18
0
void IMergedBlockOutputStream::writeData(
    const String & name,
    const IDataType & type,
    const IColumn & column,
    OffsetColumns & offset_columns,
    bool skip_offsets)
{
    size_t size = column.size();
    size_t prev_mark = 0;
    while (prev_mark < size)
    {
        size_t limit = 0;

        /// If there is `index_offset`, then the first mark goes not immediately, but after this number of rows.
        if (prev_mark == 0 && index_offset != 0)
            limit = index_offset;
        else
        {
            limit = storage.index_granularity;

            /// Write marks.
            type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path)
            {
                bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
                if (is_offsets && skip_offsets)
                    return;

                String stream_name = IDataType::getFileNameForStream(name, substream_path);

                /// Don't write offsets more than one time for Nested type.
                if (is_offsets && offset_columns.count(stream_name))
                    return;

                ColumnStream & stream = *column_streams[stream_name];

                /// There could already be enough data to compress into the new block.
                if (stream.compressed.offset() >= min_compress_block_size)
                    stream.compressed.next();

                writeIntBinary(stream.plain_hashing.count(), stream.marks);
                writeIntBinary(stream.compressed.offset(), stream.marks);
            }, {});
        }

        IDataType::OutputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & substream_path) -> WriteBuffer *
        {
            bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
            if (is_offsets && skip_offsets)
                return nullptr;

            String stream_name = IDataType::getFileNameForStream(name, substream_path);

            /// Don't write offsets more than one time for Nested type.
            if (is_offsets && offset_columns.count(stream_name))
                return nullptr;

            return &column_streams[stream_name]->compressed;
        };

        type.serializeBinaryBulkWithMultipleStreams(column, stream_getter, prev_mark, limit, true, {});

        /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one.
        type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path)
        {
            bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
            if (is_offsets && skip_offsets)
                return;

            String stream_name = IDataType::getFileNameForStream(name, substream_path);

            /// Don't write offsets more than one time for Nested type.
            if (is_offsets && offset_columns.count(stream_name))
                return;

            column_streams[stream_name]->compressed.nextIfAtEnd();
        }, {});

        prev_mark += limit;
    }

    /// Memoize offsets for Nested types, that are already written. They will not be written again for next columns of Nested structure.
    type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path)
    {
        bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
        if (is_offsets)
        {
            String stream_name = IDataType::getFileNameForStream(name, substream_path);
            offset_columns.insert(stream_name);
        }
    }, {});
}
void JSONCompactRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
    type.serializeTextJSON(column, row_num, *ostr, settings);
    ++field_number;
}
void TabSeparatedRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
    type.serializeAsTextEscaped(column, row_num, ostr, format_settings);
}
Пример #21
0
bool DataTypeNullable::equals(const IDataType & rhs) const
{
    return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type);
}