void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit)
{
    /** If there are columns-constants - then we materialize them.
      * (Since the data type does not know how to serialize / deserialize constants.)
      */
    ColumnPtr full_column = column->convertToFullColumnIfConst();

    IDataType::SerializeBinaryBulkSettings settings;
    settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; };
    settings.position_independent_encoding = false;
    settings.low_cardinality_max_dictionary_size = 0;

    IDataType::SerializeBinaryBulkStatePtr state;
    type.serializeBinaryBulkStatePrefix(settings, state);
    type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
    type.serializeBinaryBulkStateSuffix(settings, state);
}
Example #2
0
bool Join::insertFromBlock(const Block & block)
{
    std::unique_lock lock(rwlock);

    if (empty())
        throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR);

    size_t keys_size = key_names_right.size();
    ColumnRawPtrs key_columns(keys_size);

    /// Rare case, when keys are constant. To avoid code bloat, simply materialize them.
    Columns materialized_columns;

    /// Memoize key columns to work.
    for (size_t i = 0; i < keys_size; ++i)
    {
        key_columns[i] = block.getByName(key_names_right[i]).column.get();

        if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst())
        {
            materialized_columns.emplace_back(converted);
            key_columns[i] = materialized_columns.back().get();
        }
    }

    /// We will insert to the map only keys, where all components are not NULL.
    ColumnPtr null_map_holder;
    ConstNullMapPtr null_map{};
    extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);

    size_t rows = block.rows();

    blocks.push_back(block);
    Block * stored_block = &blocks.back();

    if (getFullness(kind))
    {
        /** Move the key columns to the beginning of the block.
          * This is where NonJoinedBlockInputStream will expect.
          */
        size_t key_num = 0;
        for (const auto & name : key_names_right)
        {
            size_t pos = stored_block->getPositionByName(name);
            ColumnWithTypeAndName col = stored_block->safeGetByPosition(pos);
            stored_block->erase(pos);
            stored_block->insert(key_num, std::move(col));
            ++key_num;
        }
    }
    else
    {
        /// Remove the key columns from stored_block, as they are not needed.
        for (const auto & name : key_names_right)
            stored_block->erase(stored_block->getPositionByName(name));
    }

    size_t size = stored_block->columns();

    /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
    for (size_t i = 0; i < size; ++i)
    {
        ColumnPtr col = stored_block->safeGetByPosition(i).column;
        if (ColumnPtr converted = col->convertToFullColumnIfConst())
            stored_block->safeGetByPosition(i).column = converted;
    }

    /// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable.
    if (use_nulls && (kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Full))
    {
        for (size_t i = getFullness(kind) ? keys_size : 0; i < size; ++i)
        {
            convertColumnToNullable(stored_block->getByPosition(i));
        }
    }

    if (kind != ASTTableJoin::Kind::Cross)
    {
        /// Fill the hash table.
        if (!getFullness(kind))
        {
            if (strictness == ASTTableJoin::Strictness::Any)
                insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool);
            else
                insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool);
        }
        else
        {
            if (strictness == ASTTableJoin::Strictness::Any)
                insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool);
            else
                insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool);
        }
    }

    return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
}
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, size_t offset, size_t limit)
{
    /** If there are columns-constants - then we materialize them.
      * (Since the data type does not know how to serialize / deserialize constants.)
      */
    ColumnPtr full_column;

    if (auto converted = column->convertToFullColumnIfConst())
        full_column = converted;
    else
        full_column = column;

    if (type.isNullable())
    {
        const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(type);
        const IDataType & nested_type = *nullable_type.getNestedType();

        const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*full_column.get());
        const ColumnPtr & nested_col = nullable_col.getNestedColumn();

        const IColumn & null_map = nullable_col.getNullMapConcreteColumn();
        DataTypeUInt8{}.serializeBinaryBulk(null_map, ostr, offset, limit);

        writeData(nested_type, nested_col, ostr, offset, limit);
    }
    else if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(&type))
    {
        /** For arrays, you first need to serialize the offsets, and then the values.
          */
        const ColumnArray & column_array = typeid_cast<const ColumnArray &>(*full_column);
        type_arr->getOffsetsType()->serializeBinaryBulk(*column_array.getOffsetsColumn(), ostr, offset, limit);

        if (!typeid_cast<const ColumnArray &>(*full_column).getData().empty())
        {
            const ColumnArray::Offsets_t & offsets = column_array.getOffsets();

            if (offset > offsets.size())
                return;

            /** offset - from which array to write.
              * limit - how many arrays should be written, or 0, if you write everything that is.
              * end - up to which array written part finishes.
              *
              * nested_offset - from which nested element to write.
              * nested_limit - how many nested elements to write, or 0, if you write everything that is.
              */

            size_t end = std::min(offset + limit, offsets.size());

            size_t nested_offset = offset ? offsets[offset - 1] : 0;
            size_t nested_limit = limit
                ? offsets[end - 1] - nested_offset
                : 0;

            const DataTypePtr & nested_type = type_arr->getNestedType();

            DataTypePtr actual_type;
            if (nested_type->isNull())
            {
                /// Special case: an array of Null is actually an array of Nullable(UInt8).
                actual_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
            }
            else
                actual_type = nested_type;

            if (limit == 0 || nested_limit)
                writeData(*actual_type, typeid_cast<const ColumnArray &>(*full_column).getDataPtr(), ostr, nested_offset, nested_limit);
        }
    }
    else
        type.serializeBinaryBulk(*full_column, ostr, offset, limit);
}
Example #4
0
 static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
 {
     return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr());
 }