Block TinyLogBlockInputStream::readImpl()
{
    Block res;

    if (finished || (!streams.empty() && streams.begin()->second->compressed.eof()))
    {
        /** Close the files (before destroying the object).
          * When many sources are created, but simultaneously reading only a few of them,
          * buffers don't waste memory.
          */
        finished = true;
        streams.clear();
        return res;
    }

    {
        /// if there are no files in the folder, it means that the table is empty
        if (Poco::DirectoryIterator(storage.full_path()) == Poco::DirectoryIterator())
            return res;
    }

    for (const auto & name_type : columns)
    {
        MutableColumnPtr column = name_type.type->createColumn();

        try
        {
            readData(name_type.name, *name_type.type, *column, block_size);
        }
        catch (Exception & e)
        {
            e.addMessage("while reading column " + name_type.name + " at " + storage.full_path());
            throw;
        }

        if (column->size())
            res.insert(ColumnWithTypeAndName(std::move(column), name_type.type, name_type.name));
    }

    if (!res || streams.begin()->second->compressed.eof())
    {
        finished = true;
        streams.clear();
    }

    return Nested::flatten(res);
}
Beispiel #2
0
size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res)
{
    size_t read_rows = 0;
    try
    {
        /// Pointers to offset columns that are common to the nested data structure columns.
        /// If append is true, then the value will be equal to nullptr and will be used only to
        /// check that the offsets column has been already read.
        OffsetColumns offset_columns;

        for (const NameAndTypePair & it : columns)
        {
            /// The column is already present in the block so we will append the values to the end.
            bool append = res.has(it.name);
            if (!append)
                res.insert(ColumnWithTypeAndName(it.type->createColumn(), it.type, it.name));

            /// To keep offsets shared. TODO Very dangerous. Get rid of this.
            MutableColumnPtr column = res.getByName(it.name).column->assumeMutable();

            bool read_offsets = true;

            /// For nested data structures collect pointers to offset columns.
            if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(it.type.get()))
            {
                String name = Nested::extractTableName(it.name);

                auto it_inserted = offset_columns.emplace(name, nullptr);

                /// offsets have already been read on the previous iteration and we don't need to read it again
                if (!it_inserted.second)
                    read_offsets = false;

                /// need to create new offsets
                if (it_inserted.second && !append)
                    it_inserted.first->second = ColumnArray::ColumnOffsets::create();

                /// share offsets in all elements of nested structure
                if (!append)
                    column = ColumnArray::create(type_arr->getNestedType()->createColumn(), it_inserted.first->second);
            }

            try
            {
                size_t column_size_before_reading = column->size();

                readData(it.name, *it.type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets);

                /// For elements of Nested, column_size_before_reading may be greater than column size
                ///  if offsets are not empty and were already read, but elements are empty.
                if (column->size())
                    read_rows = std::max(read_rows, column->size() - column_size_before_reading);
            }
            catch (Exception & e)
            {
                /// Better diagnostics.
                e.addMessage("(while reading column " + it.name + ")");
                throw;
            }

            if (column->size())
                res.getByName(it.name).column = std::move(column);
            else
                res.erase(it.name);
        }

        /// NOTE: positions for all streams must be kept in sync. In particular, even if for some streams there are no rows to be read,
        /// you must ensure that no seeks are skipped and at this point they all point to to_mark.
    }
    catch (Exception & e)
    {
        if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
            storage.reportBrokenPart(data_part->name);

        /// Better diagnostics.
        e.addMessage("(while reading from part " + path + " from mark " + toString(from_mark) + " with max_rows_to_read = " + toString(max_rows_to_read) + ")");
        throw;
    }
    catch (...)
    {
        storage.reportBrokenPart(data_part->name);

        throw;
    }

    return read_rows;
}